AI: Refactor face package configuration and reporting #5167

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer
2025-10-26 09:02:11 +01:00
parent f94219404e
commit 3236a32a2b
7 changed files with 129 additions and 88 deletions

View File

@@ -2,15 +2,57 @@ package face
import (
"os"
"github.com/photoprism/photoprism/internal/thumb/crop"
)
var (
SampleRadius = 0.35
Epsilon = 0.01
SkipChildren = true
// CropSize is the face image crop size used when generating FaceNet embeddings.
CropSize = crop.Sizes[crop.Tile160]
)
var (
// OverlapThreshold defines the minimum face area overlap percentage required to treat detections as identical.
OverlapThreshold = 42
// OverlapThresholdFloor is the relaxed overlap threshold used to avoid rounding inconsistencies.
OverlapThresholdFloor = OverlapThreshold - 1
// ScoreThreshold is the base minimum face score accepted by the detector.
ScoreThreshold = 9.0
// ClusterScoreThreshold is the minimum score required for faces that contribute to automatic clustering.
ClusterScoreThreshold = 15
// SizeThreshold is the minimum detected face size, in pixels.
SizeThreshold = 25
// ClusterSizeThreshold is the minimum face size, in pixels, for faces considered when forming clusters.
ClusterSizeThreshold = 50
// ClusterDist is the similarity distance threshold that defines the cluster core.
ClusterDist = 0.64
// MatchDist is the distance offset threshold used to match new faces with existing clusters.
MatchDist = 0.46
// ClusterCore is the minimum number of faces required to seed a cluster core.
ClusterCore = 4
// SampleThreshold is the number of faces required before automatic clustering begins.
SampleThreshold = 2 * ClusterCore
// SampleRadius is the maximum normalized distance for cluster samples.
SampleRadius = 0.35
// Epsilon is the numeric tolerance used during cluster comparisons.
Epsilon = 0.01
// SkipChildren controls whether the clustering step omits faces from child samples by default.
SkipChildren = true
// IgnoreBackground determines whether background faces are ignored when generating matches.
IgnoreBackground = true
)
var (
// LandmarkQualityFloor is the minimum score accepted when both eyes are located by the landmark detector.
LandmarkQualityFloor = float32(5.0)
// LandmarkQualityScaleMin is the minimum face size eligible for the landmark-assisted quality fallback.
LandmarkQualityScaleMin = 60
// LandmarkQualityScaleMax is the maximum face size eligible for the landmark-assisted quality fallback.
LandmarkQualityScaleMax = 90
// LandmarkQualitySlack is the maximum allowed difference between the quality threshold and the detected score.
LandmarkQualitySlack = float32(4.0)
)
func init() {
// Disable ignore/skip for background and children if legacy env variables are set.
if os.Getenv("PHOTOPRISM_FACE_CHILDREN_DIST") != "" || os.Getenv("PHOTOPRISM_FACE_KIDS_DIST") != "" {
@@ -20,3 +62,26 @@ func init() {
IgnoreBackground = false
}
}
// PigoQualityThreshold returns the scale-adjusted minimum Pigo quality score threshold for the provided detection scale.
func PigoQualityThreshold(scale int) (score float32) {
score = float32(ScoreThreshold)
// Smaller faces require higher quality.
switch {
case scale < 26:
score += 12.0
case scale < 32:
score += 8.0
case scale < 40:
score += 6.0
case scale < 50:
score += 4.0
case scale < 80:
score += 2.0
case scale < 110:
score += 1.0
}
return score
}

View File

@@ -0,0 +1,31 @@
package face
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestPigoQualityThreshold(t *testing.T) {
t.Run("XXS", func(t *testing.T) {
assert.Equal(t, float32(21), PigoQualityThreshold(21))
})
t.Run("XS", func(t *testing.T) {
assert.Equal(t, float32(17), PigoQualityThreshold(27))
})
t.Run("S", func(t *testing.T) {
assert.Equal(t, float32(15), PigoQualityThreshold(33))
})
t.Run("M", func(t *testing.T) {
assert.Equal(t, float32(13), PigoQualityThreshold(45))
})
t.Run("L", func(t *testing.T) {
assert.Equal(t, float32(11), PigoQualityThreshold(75))
})
t.Run("XL", func(t *testing.T) {
assert.Equal(t, float32(10), PigoQualityThreshold(100))
})
t.Run("XXL", func(t *testing.T) {
assert.Equal(t, float32(9), PigoQualityThreshold(250))
})
}

View File

@@ -227,7 +227,7 @@ func (d *pigoDetector) Faces(det []pigo.Detection, params pigo.CascadeParams, fi
for _, face := range det {
score := face.Q
scale := face.Scale
requiredScore := QualityThreshold(scale)
requiredScore := PigoQualityThreshold(scale)
scaleMin := LandmarkQualityScaleMin
scaleMax := LandmarkQualityScaleMax
fallbackCandidate := false

View File

@@ -163,7 +163,7 @@ func TestDetectQualityFallback(t *testing.T) {
found := false
for _, face := range faces {
if face.Score < int(QualityThreshold(face.Area.Scale)) {
if face.Score < int(PigoQualityThreshold(face.Area.Scale)) {
found = true
break
}

View File

@@ -1,44 +0,0 @@
package face
import (
"github.com/photoprism/photoprism/internal/thumb/crop"
)
var CropSize = crop.Sizes[crop.Tile160] // Face image crop size for FaceNet.
var OverlapThreshold = 42 // Face area overlap threshold in percent.
var OverlapThresholdFloor = OverlapThreshold - 1 // Reduced overlap area to avoid rounding inconsistencies.
var ScoreThreshold = 9.0 // Min face score.
var LandmarkQualityFloor = float32(5.0) // Min score when both eyes are located.
var LandmarkQualityScaleMin = 60 // Min face size eligible for landmark-based quality fallback.
var LandmarkQualityScaleMax = 90 // Max face size eligible for landmark-based quality fallback.
var LandmarkQualitySlack = float32(4.0) // Max allowed gap between quality threshold and score.
var ClusterScoreThreshold = 15 // Min score for faces forming a cluster.
var SizeThreshold = 25 // Min face size in pixels.
var ClusterSizeThreshold = 50 // Min size for faces forming a cluster in pixels.
var ClusterDist = 0.64 // Similarity distance threshold of faces forming a cluster core.
var MatchDist = 0.46 // Dist offset threshold for matching new faces with clusters.
var ClusterCore = 4 // Min number of faces forming a cluster core.
var SampleThreshold = 2 * ClusterCore // Threshold for automatic clustering to start.
// QualityThreshold returns the scale adjusted quality score threshold.
func QualityThreshold(scale int) (score float32) {
score = float32(ScoreThreshold)
// Smaller faces require higher quality.
switch {
case scale < 26:
score += 12.0
case scale < 32:
score += 8.0
case scale < 40:
score += 6.0
case scale < 50:
score += 4.0
case scale < 80:
score += 2.0
case scale < 110:
score += 1.0
}
return score
}

View File

@@ -1,31 +0,0 @@
package face
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestQualityThreshold(t *testing.T) {
t.Run("XXS", func(t *testing.T) {
assert.Equal(t, float32(21), QualityThreshold(21))
})
t.Run("XS", func(t *testing.T) {
assert.Equal(t, float32(17), QualityThreshold(27))
})
t.Run("S", func(t *testing.T) {
assert.Equal(t, float32(15), QualityThreshold(33))
})
t.Run("M", func(t *testing.T) {
assert.Equal(t, float32(13), QualityThreshold(45))
})
t.Run("L", func(t *testing.T) {
assert.Equal(t, float32(11), QualityThreshold(75))
})
t.Run("XL", func(t *testing.T) {
assert.Equal(t, float32(10), QualityThreshold(100))
})
t.Run("XXL", func(t *testing.T) {
assert.Equal(t, float32(9), QualityThreshold(250))
})
}

View File

@@ -6,6 +6,7 @@ import (
"time"
"unicode/utf8"
"github.com/photoprism/photoprism/internal/ai/face"
"github.com/photoprism/photoprism/internal/ai/vision"
)
@@ -14,6 +15,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
cols = []string{"Name", "Value"}
reportDatabaseDSN := c.ReportDatabaseDSN()
faceEngine := c.FaceEngine()
rows = [][]string{
// Authentication.
@@ -279,7 +281,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
{"jpeg-size", fmt.Sprintf("%d", c.JpegSize())},
{"png-size", fmt.Sprintf("%d", c.PngSize())},
// Computer Vision.
// Computer Vision & Facial Recognition.
{"vision-yaml", c.VisionYaml()},
{"vision-api", fmt.Sprintf("%t", c.VisionApi())},
{"vision-uri", c.VisionUri()},
@@ -290,14 +292,32 @@ func (c *Config) Report() (rows [][]string, cols []string) {
{"facenet-model-path", c.FacenetModelPath()},
{"nsfw-model-path", c.NsfwModelPath()},
{"detect-nsfw", fmt.Sprintf("%t", c.DetectNSFW())},
// Facial Recognition.
{"face-engine", c.FaceEngine()},
{"face-engine", faceEngine},
{"face-engine-run", vision.ReportRunType(c.FaceEngineRunType())},
{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
}...)
if faceEngine == face.EngineONNX {
rows = append(rows, [][]string{
{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
}...)
} else if faceEngine == face.EnginePigo {
rows = append(rows, [][]string{
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
}...)
} else {
rows = append(rows, [][]string{
{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
}...)
}
rows = append(rows, [][]string{
{"face-overlap", fmt.Sprintf("%d", c.FaceOverlap())},
{"face-cluster-size", fmt.Sprintf("%d", c.FaceClusterSize())},
{"face-cluster-score", fmt.Sprintf("%d", c.FaceClusterScore())},