mirror of
https://github.com/photoprism/photoprism.git
synced 2025-12-11 16:24:11 +01:00
AI: Refactor face package configuration and reporting #5167
Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
@@ -2,15 +2,57 @@ package face
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"github.com/photoprism/photoprism/internal/thumb/crop"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
SampleRadius = 0.35
|
// CropSize is the face image crop size used when generating FaceNet embeddings.
|
||||||
Epsilon = 0.01
|
CropSize = crop.Sizes[crop.Tile160]
|
||||||
SkipChildren = true
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// OverlapThreshold defines the minimum face area overlap percentage required to treat detections as identical.
|
||||||
|
OverlapThreshold = 42
|
||||||
|
// OverlapThresholdFloor is the relaxed overlap threshold used to avoid rounding inconsistencies.
|
||||||
|
OverlapThresholdFloor = OverlapThreshold - 1
|
||||||
|
// ScoreThreshold is the base minimum face score accepted by the detector.
|
||||||
|
ScoreThreshold = 9.0
|
||||||
|
// ClusterScoreThreshold is the minimum score required for faces that contribute to automatic clustering.
|
||||||
|
ClusterScoreThreshold = 15
|
||||||
|
// SizeThreshold is the minimum detected face size, in pixels.
|
||||||
|
SizeThreshold = 25
|
||||||
|
// ClusterSizeThreshold is the minimum face size, in pixels, for faces considered when forming clusters.
|
||||||
|
ClusterSizeThreshold = 50
|
||||||
|
// ClusterDist is the similarity distance threshold that defines the cluster core.
|
||||||
|
ClusterDist = 0.64
|
||||||
|
// MatchDist is the distance offset threshold used to match new faces with existing clusters.
|
||||||
|
MatchDist = 0.46
|
||||||
|
// ClusterCore is the minimum number of faces required to seed a cluster core.
|
||||||
|
ClusterCore = 4
|
||||||
|
// SampleThreshold is the number of faces required before automatic clustering begins.
|
||||||
|
SampleThreshold = 2 * ClusterCore
|
||||||
|
// SampleRadius is the maximum normalized distance for cluster samples.
|
||||||
|
SampleRadius = 0.35
|
||||||
|
// Epsilon is the numeric tolerance used during cluster comparisons.
|
||||||
|
Epsilon = 0.01
|
||||||
|
// SkipChildren controls whether the clustering step omits faces from child samples by default.
|
||||||
|
SkipChildren = true
|
||||||
|
// IgnoreBackground determines whether background faces are ignored when generating matches.
|
||||||
IgnoreBackground = true
|
IgnoreBackground = true
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// LandmarkQualityFloor is the minimum score accepted when both eyes are located by the landmark detector.
|
||||||
|
LandmarkQualityFloor = float32(5.0)
|
||||||
|
// LandmarkQualityScaleMin is the minimum face size eligible for the landmark-assisted quality fallback.
|
||||||
|
LandmarkQualityScaleMin = 60
|
||||||
|
// LandmarkQualityScaleMax is the maximum face size eligible for the landmark-assisted quality fallback.
|
||||||
|
LandmarkQualityScaleMax = 90
|
||||||
|
// LandmarkQualitySlack is the maximum allowed difference between the quality threshold and the detected score.
|
||||||
|
LandmarkQualitySlack = float32(4.0)
|
||||||
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
// Disable ignore/skip for background and children if legacy env variables are set.
|
// Disable ignore/skip for background and children if legacy env variables are set.
|
||||||
if os.Getenv("PHOTOPRISM_FACE_CHILDREN_DIST") != "" || os.Getenv("PHOTOPRISM_FACE_KIDS_DIST") != "" {
|
if os.Getenv("PHOTOPRISM_FACE_CHILDREN_DIST") != "" || os.Getenv("PHOTOPRISM_FACE_KIDS_DIST") != "" {
|
||||||
@@ -20,3 +62,26 @@ func init() {
|
|||||||
IgnoreBackground = false
|
IgnoreBackground = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PigoQualityThreshold returns the scale-adjusted minimum Pigo quality score threshold for the provided detection scale.
|
||||||
|
func PigoQualityThreshold(scale int) (score float32) {
|
||||||
|
score = float32(ScoreThreshold)
|
||||||
|
|
||||||
|
// Smaller faces require higher quality.
|
||||||
|
switch {
|
||||||
|
case scale < 26:
|
||||||
|
score += 12.0
|
||||||
|
case scale < 32:
|
||||||
|
score += 8.0
|
||||||
|
case scale < 40:
|
||||||
|
score += 6.0
|
||||||
|
case scale < 50:
|
||||||
|
score += 4.0
|
||||||
|
case scale < 80:
|
||||||
|
score += 2.0
|
||||||
|
case scale < 110:
|
||||||
|
score += 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
return score
|
||||||
|
}
|
||||||
|
|||||||
31
internal/ai/face/config_test.go
Normal file
31
internal/ai/face/config_test.go
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
package face
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPigoQualityThreshold(t *testing.T) {
|
||||||
|
t.Run("XXS", func(t *testing.T) {
|
||||||
|
assert.Equal(t, float32(21), PigoQualityThreshold(21))
|
||||||
|
})
|
||||||
|
t.Run("XS", func(t *testing.T) {
|
||||||
|
assert.Equal(t, float32(17), PigoQualityThreshold(27))
|
||||||
|
})
|
||||||
|
t.Run("S", func(t *testing.T) {
|
||||||
|
assert.Equal(t, float32(15), PigoQualityThreshold(33))
|
||||||
|
})
|
||||||
|
t.Run("M", func(t *testing.T) {
|
||||||
|
assert.Equal(t, float32(13), PigoQualityThreshold(45))
|
||||||
|
})
|
||||||
|
t.Run("L", func(t *testing.T) {
|
||||||
|
assert.Equal(t, float32(11), PigoQualityThreshold(75))
|
||||||
|
})
|
||||||
|
t.Run("XL", func(t *testing.T) {
|
||||||
|
assert.Equal(t, float32(10), PigoQualityThreshold(100))
|
||||||
|
})
|
||||||
|
t.Run("XXL", func(t *testing.T) {
|
||||||
|
assert.Equal(t, float32(9), PigoQualityThreshold(250))
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -227,7 +227,7 @@ func (d *pigoDetector) Faces(det []pigo.Detection, params pigo.CascadeParams, fi
|
|||||||
for _, face := range det {
|
for _, face := range det {
|
||||||
score := face.Q
|
score := face.Q
|
||||||
scale := face.Scale
|
scale := face.Scale
|
||||||
requiredScore := QualityThreshold(scale)
|
requiredScore := PigoQualityThreshold(scale)
|
||||||
scaleMin := LandmarkQualityScaleMin
|
scaleMin := LandmarkQualityScaleMin
|
||||||
scaleMax := LandmarkQualityScaleMax
|
scaleMax := LandmarkQualityScaleMax
|
||||||
fallbackCandidate := false
|
fallbackCandidate := false
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ func TestDetectQualityFallback(t *testing.T) {
|
|||||||
found := false
|
found := false
|
||||||
|
|
||||||
for _, face := range faces {
|
for _, face := range faces {
|
||||||
if face.Score < int(QualityThreshold(face.Area.Scale)) {
|
if face.Score < int(PigoQualityThreshold(face.Area.Scale)) {
|
||||||
found = true
|
found = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,44 +0,0 @@
|
|||||||
package face
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/photoprism/photoprism/internal/thumb/crop"
|
|
||||||
)
|
|
||||||
|
|
||||||
var CropSize = crop.Sizes[crop.Tile160] // Face image crop size for FaceNet.
|
|
||||||
var OverlapThreshold = 42 // Face area overlap threshold in percent.
|
|
||||||
var OverlapThresholdFloor = OverlapThreshold - 1 // Reduced overlap area to avoid rounding inconsistencies.
|
|
||||||
var ScoreThreshold = 9.0 // Min face score.
|
|
||||||
var LandmarkQualityFloor = float32(5.0) // Min score when both eyes are located.
|
|
||||||
var LandmarkQualityScaleMin = 60 // Min face size eligible for landmark-based quality fallback.
|
|
||||||
var LandmarkQualityScaleMax = 90 // Max face size eligible for landmark-based quality fallback.
|
|
||||||
var LandmarkQualitySlack = float32(4.0) // Max allowed gap between quality threshold and score.
|
|
||||||
var ClusterScoreThreshold = 15 // Min score for faces forming a cluster.
|
|
||||||
var SizeThreshold = 25 // Min face size in pixels.
|
|
||||||
var ClusterSizeThreshold = 50 // Min size for faces forming a cluster in pixels.
|
|
||||||
var ClusterDist = 0.64 // Similarity distance threshold of faces forming a cluster core.
|
|
||||||
var MatchDist = 0.46 // Dist offset threshold for matching new faces with clusters.
|
|
||||||
var ClusterCore = 4 // Min number of faces forming a cluster core.
|
|
||||||
var SampleThreshold = 2 * ClusterCore // Threshold for automatic clustering to start.
|
|
||||||
|
|
||||||
// QualityThreshold returns the scale adjusted quality score threshold.
|
|
||||||
func QualityThreshold(scale int) (score float32) {
|
|
||||||
score = float32(ScoreThreshold)
|
|
||||||
|
|
||||||
// Smaller faces require higher quality.
|
|
||||||
switch {
|
|
||||||
case scale < 26:
|
|
||||||
score += 12.0
|
|
||||||
case scale < 32:
|
|
||||||
score += 8.0
|
|
||||||
case scale < 40:
|
|
||||||
score += 6.0
|
|
||||||
case scale < 50:
|
|
||||||
score += 4.0
|
|
||||||
case scale < 80:
|
|
||||||
score += 2.0
|
|
||||||
case scale < 110:
|
|
||||||
score += 1.0
|
|
||||||
}
|
|
||||||
|
|
||||||
return score
|
|
||||||
}
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
package face
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestQualityThreshold(t *testing.T) {
|
|
||||||
t.Run("XXS", func(t *testing.T) {
|
|
||||||
assert.Equal(t, float32(21), QualityThreshold(21))
|
|
||||||
})
|
|
||||||
t.Run("XS", func(t *testing.T) {
|
|
||||||
assert.Equal(t, float32(17), QualityThreshold(27))
|
|
||||||
})
|
|
||||||
t.Run("S", func(t *testing.T) {
|
|
||||||
assert.Equal(t, float32(15), QualityThreshold(33))
|
|
||||||
})
|
|
||||||
t.Run("M", func(t *testing.T) {
|
|
||||||
assert.Equal(t, float32(13), QualityThreshold(45))
|
|
||||||
})
|
|
||||||
t.Run("L", func(t *testing.T) {
|
|
||||||
assert.Equal(t, float32(11), QualityThreshold(75))
|
|
||||||
})
|
|
||||||
t.Run("XL", func(t *testing.T) {
|
|
||||||
assert.Equal(t, float32(10), QualityThreshold(100))
|
|
||||||
})
|
|
||||||
t.Run("XXL", func(t *testing.T) {
|
|
||||||
assert.Equal(t, float32(9), QualityThreshold(250))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"github.com/photoprism/photoprism/internal/ai/face"
|
||||||
"github.com/photoprism/photoprism/internal/ai/vision"
|
"github.com/photoprism/photoprism/internal/ai/vision"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -14,6 +15,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
|
|||||||
cols = []string{"Name", "Value"}
|
cols = []string{"Name", "Value"}
|
||||||
|
|
||||||
reportDatabaseDSN := c.ReportDatabaseDSN()
|
reportDatabaseDSN := c.ReportDatabaseDSN()
|
||||||
|
faceEngine := c.FaceEngine()
|
||||||
|
|
||||||
rows = [][]string{
|
rows = [][]string{
|
||||||
// Authentication.
|
// Authentication.
|
||||||
@@ -279,7 +281,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
|
|||||||
{"jpeg-size", fmt.Sprintf("%d", c.JpegSize())},
|
{"jpeg-size", fmt.Sprintf("%d", c.JpegSize())},
|
||||||
{"png-size", fmt.Sprintf("%d", c.PngSize())},
|
{"png-size", fmt.Sprintf("%d", c.PngSize())},
|
||||||
|
|
||||||
// Computer Vision.
|
// Computer Vision & Facial Recognition.
|
||||||
{"vision-yaml", c.VisionYaml()},
|
{"vision-yaml", c.VisionYaml()},
|
||||||
{"vision-api", fmt.Sprintf("%t", c.VisionApi())},
|
{"vision-api", fmt.Sprintf("%t", c.VisionApi())},
|
||||||
{"vision-uri", c.VisionUri()},
|
{"vision-uri", c.VisionUri()},
|
||||||
@@ -290,14 +292,32 @@ func (c *Config) Report() (rows [][]string, cols []string) {
|
|||||||
{"facenet-model-path", c.FacenetModelPath()},
|
{"facenet-model-path", c.FacenetModelPath()},
|
||||||
{"nsfw-model-path", c.NsfwModelPath()},
|
{"nsfw-model-path", c.NsfwModelPath()},
|
||||||
{"detect-nsfw", fmt.Sprintf("%t", c.DetectNSFW())},
|
{"detect-nsfw", fmt.Sprintf("%t", c.DetectNSFW())},
|
||||||
|
{"face-engine", faceEngine},
|
||||||
// Facial Recognition.
|
|
||||||
{"face-engine", c.FaceEngine()},
|
|
||||||
{"face-engine-run", vision.ReportRunType(c.FaceEngineRunType())},
|
{"face-engine-run", vision.ReportRunType(c.FaceEngineRunType())},
|
||||||
{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
|
}...)
|
||||||
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
|
|
||||||
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
|
if faceEngine == face.EngineONNX {
|
||||||
{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
|
rows = append(rows, [][]string{
|
||||||
|
{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
|
||||||
|
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
|
||||||
|
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
|
||||||
|
}...)
|
||||||
|
} else if faceEngine == face.EnginePigo {
|
||||||
|
rows = append(rows, [][]string{
|
||||||
|
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
|
||||||
|
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
|
||||||
|
{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
|
||||||
|
}...)
|
||||||
|
} else {
|
||||||
|
rows = append(rows, [][]string{
|
||||||
|
{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
|
||||||
|
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
|
||||||
|
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
|
||||||
|
{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
|
||||||
|
}...)
|
||||||
|
}
|
||||||
|
|
||||||
|
rows = append(rows, [][]string{
|
||||||
{"face-overlap", fmt.Sprintf("%d", c.FaceOverlap())},
|
{"face-overlap", fmt.Sprintf("%d", c.FaceOverlap())},
|
||||||
{"face-cluster-size", fmt.Sprintf("%d", c.FaceClusterSize())},
|
{"face-cluster-size", fmt.Sprintf("%d", c.FaceClusterSize())},
|
||||||
{"face-cluster-score", fmt.Sprintf("%d", c.FaceClusterScore())},
|
{"face-cluster-score", fmt.Sprintf("%d", c.FaceClusterScore())},
|
||||||
|
|||||||
Reference in New Issue
Block a user