AI: Refactor face package configuration and reporting #5167

Signed-off-by: Michael Mayer <michael@photoprism.app>
2025-12-11 16:24:11 +01:00 · 2025-10-26 09:02:11 +01:00
parent f94219404e
commit 3236a32a2b
7 changed files with 129 additions and 88 deletions
--- a/internal/ai/face/config.go
+++ b/internal/ai/face/config.go
@@ -2,15 +2,57 @@ package face

 import (
 	"os"
+
+	"github.com/photoprism/photoprism/internal/thumb/crop"
 )

 var (
-	SampleRadius     = 0.35
-	Epsilon          = 0.01
-	SkipChildren     = true
+	// CropSize is the face image crop size used when generating FaceNet embeddings.
+	CropSize = crop.Sizes[crop.Tile160]
+)
+
+var (
+	// OverlapThreshold defines the minimum face area overlap percentage required to treat detections as identical.
+	OverlapThreshold = 42
+	// OverlapThresholdFloor is the relaxed overlap threshold used to avoid rounding inconsistencies.
+	OverlapThresholdFloor = OverlapThreshold - 1
+	// ScoreThreshold is the base minimum face score accepted by the detector.
+	ScoreThreshold = 9.0
+	// ClusterScoreThreshold is the minimum score required for faces that contribute to automatic clustering.
+	ClusterScoreThreshold = 15
+	// SizeThreshold is the minimum detected face size, in pixels.
+	SizeThreshold = 25
+	// ClusterSizeThreshold is the minimum face size, in pixels, for faces considered when forming clusters.
+	ClusterSizeThreshold = 50
+	// ClusterDist is the similarity distance threshold that defines the cluster core.
+	ClusterDist = 0.64
+	// MatchDist is the distance offset threshold used to match new faces with existing clusters.
+	MatchDist = 0.46
+	// ClusterCore is the minimum number of faces required to seed a cluster core.
+	ClusterCore = 4
+	// SampleThreshold is the number of faces required before automatic clustering begins.
+	SampleThreshold = 2 * ClusterCore
+	// SampleRadius is the maximum normalized distance for cluster samples.
+	SampleRadius = 0.35
+	// Epsilon is the numeric tolerance used during cluster comparisons.
+	Epsilon = 0.01
+	// SkipChildren controls whether the clustering step omits faces from child samples by default.
+	SkipChildren = true
+	// IgnoreBackground determines whether background faces are ignored when generating matches.
 	IgnoreBackground = true
 )

+var (
+	// LandmarkQualityFloor is the minimum score accepted when both eyes are located by the landmark detector.
+	LandmarkQualityFloor = float32(5.0)
+	// LandmarkQualityScaleMin is the minimum face size eligible for the landmark-assisted quality fallback.
+	LandmarkQualityScaleMin = 60
+	// LandmarkQualityScaleMax is the maximum face size eligible for the landmark-assisted quality fallback.
+	LandmarkQualityScaleMax = 90
+	// LandmarkQualitySlack is the maximum allowed difference between the quality threshold and the detected score.
+	LandmarkQualitySlack = float32(4.0)
+)
+
 func init() {
 	// Disable ignore/skip for background and children if legacy env variables are set.
 	if os.Getenv("PHOTOPRISM_FACE_CHILDREN_DIST") != "" || os.Getenv("PHOTOPRISM_FACE_KIDS_DIST") != "" {
@@ -20,3 +62,26 @@ func init() {
 		IgnoreBackground = false
 	}
 }
+
+// PigoQualityThreshold returns the scale-adjusted minimum Pigo quality score threshold for the provided detection scale.
+func PigoQualityThreshold(scale int) (score float32) {
+	score = float32(ScoreThreshold)
+
+	// Smaller faces require higher quality.
+	switch {
+	case scale < 26:
+		score += 12.0
+	case scale < 32:
+		score += 8.0
+	case scale < 40:
+		score += 6.0
+	case scale < 50:
+		score += 4.0
+	case scale < 80:
+		score += 2.0
+	case scale < 110:
+		score += 1.0
+	}
+
+	return score
+}
--- a/internal/ai/face/config_test.go
+++ b/internal/ai/face/config_test.go
@@ -0,0 +1,31 @@
+package face
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestPigoQualityThreshold(t *testing.T) {
+	t.Run("XXS", func(t *testing.T) {
+		assert.Equal(t, float32(21), PigoQualityThreshold(21))
+	})
+	t.Run("XS", func(t *testing.T) {
+		assert.Equal(t, float32(17), PigoQualityThreshold(27))
+	})
+	t.Run("S", func(t *testing.T) {
+		assert.Equal(t, float32(15), PigoQualityThreshold(33))
+	})
+	t.Run("M", func(t *testing.T) {
+		assert.Equal(t, float32(13), PigoQualityThreshold(45))
+	})
+	t.Run("L", func(t *testing.T) {
+		assert.Equal(t, float32(11), PigoQualityThreshold(75))
+	})
+	t.Run("XL", func(t *testing.T) {
+		assert.Equal(t, float32(10), PigoQualityThreshold(100))
+	})
+	t.Run("XXL", func(t *testing.T) {
+		assert.Equal(t, float32(9), PigoQualityThreshold(250))
+	})
+}
--- a/internal/ai/face/detector.go
+++ b/internal/ai/face/detector.go
@@ -227,7 +227,7 @@ func (d *pigoDetector) Faces(det []pigo.Detection, params pigo.CascadeParams, fi
 	for _, face := range det {
 		score := face.Q
 		scale := face.Scale
-		requiredScore := QualityThreshold(scale)
+		requiredScore := PigoQualityThreshold(scale)
 		scaleMin := LandmarkQualityScaleMin
 		scaleMax := LandmarkQualityScaleMax
 		fallbackCandidate := false
--- a/internal/ai/face/detector_test.go
+++ b/internal/ai/face/detector_test.go
@@ -163,7 +163,7 @@ func TestDetectQualityFallback(t *testing.T) {
 	found := false

 	for _, face := range faces {
-		if face.Score < int(QualityThreshold(face.Area.Scale)) {
+		if face.Score < int(PigoQualityThreshold(face.Area.Scale)) {
 			found = true
 			break
 		}
--- a/internal/ai/face/thresholds.go
+++ b/internal/ai/face/thresholds.go
@@ -1,44 +0,0 @@
-package face
-
-import (
-	"github.com/photoprism/photoprism/internal/thumb/crop"
-)
-
-var CropSize = crop.Sizes[crop.Tile160]          // Face image crop size for FaceNet.
-var OverlapThreshold = 42                        // Face area overlap threshold in percent.
-var OverlapThresholdFloor = OverlapThreshold - 1 // Reduced overlap area to avoid rounding inconsistencies.
-var ScoreThreshold = 9.0                         // Min face score.
-var LandmarkQualityFloor = float32(5.0)          // Min score when both eyes are located.
-var LandmarkQualityScaleMin = 60                 // Min face size eligible for landmark-based quality fallback.
-var LandmarkQualityScaleMax = 90                 // Max face size eligible for landmark-based quality fallback.
-var LandmarkQualitySlack = float32(4.0)          // Max allowed gap between quality threshold and score.
-var ClusterScoreThreshold = 15                   // Min score for faces forming a cluster.
-var SizeThreshold = 25                           // Min face size in pixels.
-var ClusterSizeThreshold = 50                    // Min size for faces forming a cluster in pixels.
-var ClusterDist = 0.64                           // Similarity distance threshold of faces forming a cluster core.
-var MatchDist = 0.46                             // Dist offset threshold for matching new faces with clusters.
-var ClusterCore = 4                              // Min number of faces forming a cluster core.
-var SampleThreshold = 2 * ClusterCore            // Threshold for automatic clustering to start.
-
-// QualityThreshold returns the scale adjusted quality score threshold.
-func QualityThreshold(scale int) (score float32) {
-	score = float32(ScoreThreshold)
-
-	// Smaller faces require higher quality.
-	switch {
-	case scale < 26:
-		score += 12.0
-	case scale < 32:
-		score += 8.0
-	case scale < 40:
-		score += 6.0
-	case scale < 50:
-		score += 4.0
-	case scale < 80:
-		score += 2.0
-	case scale < 110:
-		score += 1.0
-	}
-
-	return score
-}
--- a/internal/ai/face/thresholds_test.go
+++ b/internal/ai/face/thresholds_test.go
@@ -1,31 +0,0 @@
-package face
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestQualityThreshold(t *testing.T) {
-	t.Run("XXS", func(t *testing.T) {
-		assert.Equal(t, float32(21), QualityThreshold(21))
-	})
-	t.Run("XS", func(t *testing.T) {
-		assert.Equal(t, float32(17), QualityThreshold(27))
-	})
-	t.Run("S", func(t *testing.T) {
-		assert.Equal(t, float32(15), QualityThreshold(33))
-	})
-	t.Run("M", func(t *testing.T) {
-		assert.Equal(t, float32(13), QualityThreshold(45))
-	})
-	t.Run("L", func(t *testing.T) {
-		assert.Equal(t, float32(11), QualityThreshold(75))
-	})
-	t.Run("XL", func(t *testing.T) {
-		assert.Equal(t, float32(10), QualityThreshold(100))
-	})
-	t.Run("XXL", func(t *testing.T) {
-		assert.Equal(t, float32(9), QualityThreshold(250))
-	})
-}
--- a/internal/config/report.go
+++ b/internal/config/report.go
@@ -6,6 +6,7 @@ import (
 	"time"
 	"unicode/utf8"

+	"github.com/photoprism/photoprism/internal/ai/face"
 	"github.com/photoprism/photoprism/internal/ai/vision"
 )

@@ -14,6 +15,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
 	cols = []string{"Name", "Value"}

 	reportDatabaseDSN := c.ReportDatabaseDSN()
+	faceEngine := c.FaceEngine()

 	rows = [][]string{
 		// Authentication.
@@ -279,7 +281,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
 		{"jpeg-size", fmt.Sprintf("%d", c.JpegSize())},
 		{"png-size", fmt.Sprintf("%d", c.PngSize())},

-		// Computer Vision.
+		// Computer Vision & Facial Recognition.
 		{"vision-yaml", c.VisionYaml()},
 		{"vision-api", fmt.Sprintf("%t", c.VisionApi())},
 		{"vision-uri", c.VisionUri()},
@@ -290,14 +292,32 @@ func (c *Config) Report() (rows [][]string, cols []string) {
 		{"facenet-model-path", c.FacenetModelPath()},
 		{"nsfw-model-path", c.NsfwModelPath()},
 		{"detect-nsfw", fmt.Sprintf("%t", c.DetectNSFW())},
-
-		// Facial Recognition.
-		{"face-engine", c.FaceEngine()},
+		{"face-engine", faceEngine},
 		{"face-engine-run", vision.ReportRunType(c.FaceEngineRunType())},
-		{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
-		{"face-size", fmt.Sprintf("%d", c.FaceSize())},
-		{"face-score", fmt.Sprintf("%f", c.FaceScore())},
-		{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
+	}...)
+
+	if faceEngine == face.EngineONNX {
+		rows = append(rows, [][]string{
+			{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
+			{"face-size", fmt.Sprintf("%d", c.FaceSize())},
+			{"face-score", fmt.Sprintf("%f", c.FaceScore())},
+		}...)
+	} else if faceEngine == face.EnginePigo {
+		rows = append(rows, [][]string{
+			{"face-size", fmt.Sprintf("%d", c.FaceSize())},
+			{"face-score", fmt.Sprintf("%f", c.FaceScore())},
+			{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
+		}...)
+	} else {
+		rows = append(rows, [][]string{
+			{"face-engine-threads", fmt.Sprintf("%d", c.FaceEngineThreads())},
+			{"face-size", fmt.Sprintf("%d", c.FaceSize())},
+			{"face-score", fmt.Sprintf("%f", c.FaceScore())},
+			{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
+		}...)
+	}
+
+	rows = append(rows, [][]string{
 		{"face-overlap", fmt.Sprintf("%d", c.FaceOverlap())},
 		{"face-cluster-size", fmt.Sprintf("%d", c.FaceClusterSize())},
 		{"face-cluster-score", fmt.Sprintf("%d", c.FaceClusterScore())},