Faces: Optimized midpoint computation and performance #4669 #5167

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer
2025-10-02 19:21:29 +02:00
parent d19d59632d
commit dcb3dd6f18
20 changed files with 491 additions and 160 deletions

View File

@@ -3,9 +3,10 @@ package face
import (
"testing"
"github.com/photoprism/photoprism/internal/thumb/crop"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/photoprism/photoprism/internal/thumb/crop"
)
var area1 = NewArea("face1", 400, 250, 200)

View File

@@ -4,7 +4,6 @@ import (
_ "embed"
"fmt"
_ "image/jpeg"
"io"
"os"
"path/filepath"
"runtime/debug"
@@ -28,6 +27,12 @@ var (
flpcs map[string][]*FlpCascade
)
// DefaultAngles contains the canonical detection angles in radians.
var DefaultAngles = []float64{-0.3, 0, 0.3}
// DetectionAngles holds the active detection angles configured at runtime.
var DetectionAngles = append([]float64(nil), DefaultAngles...)
func init() {
var err error
@@ -62,11 +67,12 @@ var (
// Detector struct contains Pigo face detector general settings.
type Detector struct {
minSize int
angle float64
shiftFactor float64
scaleFactor float64
iouThreshold float64
perturb int
landmarkAngle float64
angles []float64
}
// Detect runs the detection algorithm over the provided source image.
@@ -81,13 +87,16 @@ func Detect(fileName string, findLandmarks bool, minSize int) (faces Faces, err
minSize = 20
}
angles := append([]float64(nil), DetectionAngles...)
d := &Detector{
minSize: minSize,
angle: 0.0,
shiftFactor: 0.1,
scaleFactor: 1.1,
iouThreshold: float64(OverlapThresholdFloor) / 100,
perturb: 63,
landmarkAngle: 0.0,
angles: angles,
}
if !fs.FileExists(fileName) {
@@ -100,8 +109,8 @@ func Detect(fileName string, findLandmarks bool, minSize int) (faces Faces, err
return faces, fmt.Errorf("faces: %s (detect faces)", err)
}
if det == nil {
return faces, fmt.Errorf("faces: no result")
if len(det) == 0 {
return faces, nil
}
faces, err = d.Faces(det, params, findLandmarks)
@@ -115,7 +124,10 @@ func Detect(fileName string, findLandmarks bool, minSize int) (faces Faces, err
// Detect runs the detection algorithm over the provided source image.
func (d *Detector) Detect(fileName string) (faces []pigo.Detection, params pigo.CascadeParams, err error) {
var srcFile io.Reader
if len(d.angles) == 0 {
// Fallback to defaults when the detector is constructed manually (e.g. tests).
d.angles = append([]float64(nil), DetectionAngles...)
}
file, err := os.Open(fileName)
@@ -123,13 +135,13 @@ func (d *Detector) Detect(fileName string) (faces []pigo.Detection, params pigo.
return faces, params, err
}
defer func(file *os.File) {
err = file.Close()
}(file)
defer func() {
if cerr := file.Close(); err == nil && cerr != nil {
err = cerr
}
}()
srcFile = file
src, err := pigo.DecodeImage(srcFile)
src, err := pigo.DecodeImage(file)
if err != nil {
return faces, params, err
@@ -148,7 +160,7 @@ func (d *Detector) Detect(fileName string) (faces []pigo.Detection, params pigo.
maxSize = rows - 4
}
imageParams := &pigo.ImageParams{
imageParams := pigo.ImageParams{
Pixels: pixels,
Rows: rows,
Cols: cols,
@@ -160,17 +172,28 @@ func (d *Detector) Detect(fileName string) (faces []pigo.Detection, params pigo.
MaxSize: maxSize,
ShiftFactor: d.shiftFactor,
ScaleFactor: d.scaleFactor,
ImageParams: *imageParams,
ImageParams: imageParams,
}
log.Tracef("faces: image size %dx%d, face size min %d, max %d", cols, rows, params.MinSize, params.MaxSize)
// Run the classifier over the obtained leaf nodes and return the Face results.
// The result contains quadruplets representing the row, column, scale and Face score.
faces = classifier.RunCascade(params, d.angle)
// Run the classifier over the obtained leaf nodes for each configured angle and merge the results.
var detections []pigo.Detection
for _, angle := range d.angles {
result := classifier.RunCascade(params, angle)
if len(result) == 0 {
continue
}
detections = append(detections, result...)
}
if len(detections) == 0 {
return detections, params, nil
}
// Calculate the intersection over union (IoU) of two clusters.
faces = classifier.ClusterDetections(faces, d.iouThreshold)
faces = classifier.ClusterDetections(detections, d.iouThreshold)
return faces, params, nil
}
@@ -182,16 +205,14 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
return det[i].Scale > det[j].Scale
})
results = make(Faces, 0, len(det))
for _, face := range det {
// Skip result if quality is too low.
if face.Q < QualityThreshold(face.Scale) {
continue
}
var eyesCoords []Area
var landmarkCoords []Area
var puploc *pigo.Puploc
faceCoord := NewArea(
"face",
face.Row,
@@ -199,19 +220,23 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
face.Scale,
)
// Detect additional face landmarks?
if face.Scale > 50 && findLandmarks {
// Find left eye.
puploc = &pigo.Puploc{
Row: face.Row - int(0.075*float32(face.Scale)),
Col: face.Col - int(0.175*float32(face.Scale)),
Scale: float32(face.Scale) * 0.25,
var eyesCoords []Area
var landmarkCoords []Area
if findLandmarks && face.Scale > 50 {
eyesCoords = make([]Area, 0, 2)
scale := float32(face.Scale)
leftCandidate := pigo.Puploc{
Row: face.Row - int(0.075*scale),
Col: face.Col - int(0.175*scale),
Scale: scale * 0.25,
Perturbs: d.perturb,
}
leftEye := plc.RunDetector(*puploc, params.ImageParams, d.angle, false)
if leftEye.Row > 0 && leftEye.Col > 0 {
leftEye := plc.RunDetector(leftCandidate, params.ImageParams, d.landmarkAngle, false)
leftEyeFound := leftEye.Row > 0 && leftEye.Col > 0
if leftEyeFound {
eyesCoords = append(eyesCoords, NewArea(
"eye_l",
leftEye.Row,
@@ -220,17 +245,16 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
))
}
// Find right eye.
puploc = &pigo.Puploc{
Row: face.Row - int(0.075*float32(face.Scale)),
Col: face.Col + int(0.185*float32(face.Scale)),
Scale: float32(face.Scale) * 0.25,
rightCandidate := pigo.Puploc{
Row: face.Row - int(0.075*scale),
Col: face.Col + int(0.185*scale),
Scale: scale * 0.25,
Perturbs: d.perturb,
}
rightEye := plc.RunDetector(*puploc, params.ImageParams, d.angle, false)
if rightEye.Row > 0 && rightEye.Col > 0 {
rightEye := plc.RunDetector(rightCandidate, params.ImageParams, d.landmarkAngle, false)
rightEyeFound := rightEye.Row > 0 && rightEye.Col > 0
if rightEyeFound {
eyesCoords = append(eyesCoords, NewArea(
"eye_r",
rightEye.Row,
@@ -239,6 +263,10 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
))
}
if leftEyeFound && rightEyeFound {
landmarkCapacity := len(eyeCascades)*2 + len(mouthCascades) + 1
landmarkCoords = make([]Area, 0, landmarkCapacity)
for _, eye := range eyeCascades {
for _, flpc := range flpcs[eye] {
if flpc == nil {
@@ -267,7 +295,6 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
}
}
// Find mouth.
for _, mouth := range mouthCascades {
for _, flpc := range flpcs[mouth] {
if flpc == nil {
@@ -286,9 +313,8 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
}
}
flpc := flpcs["lp84"][0]
if flpc != nil {
if cascades := flpcs["lp84"]; len(cascades) > 0 {
if flpc := cascades[0]; flpc != nil {
flp := flpc.GetLandmarkPoint(leftEye, rightEye, params.ImageParams, d.perturb, true)
if flp.Row > 0 && flp.Col > 0 {
landmarkCoords = append(landmarkCoords, NewArea(
@@ -300,6 +326,8 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
}
}
}
}
}
// Create face.
f := Face{
@@ -313,11 +341,10 @@ func (d *Detector) Faces(det []pigo.Detection, params pigo.CascadeParams, findLa
// Does the face significantly overlap with previous results?
if results.Contains(f) {
// Ignore face.
} else {
// Append face.
results.Append(f)
continue
}
results.Append(f)
}
return results, nil

View File

@@ -1,12 +1,15 @@
package face
import (
"fmt"
"os"
"path/filepath"
"testing"
"github.com/dustin/go-humanize/english"
pigo "github.com/esimov/pigo/core"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/photoprism/photoprism/pkg/fs/fastwalk"
)
@@ -29,9 +32,9 @@ func TestDetect(t *testing.T) {
"14.jpg": 0,
"15.jpg": 0,
"16.jpg": 1,
"17.jpg": 1,
"17.jpg": 2,
"18.jpg": 2,
"19.jpg": 0,
"19.jpg": 1,
}
if err := fastwalk.Walk("testdata", func(fileName string, info os.FileMode) error {
@@ -131,3 +134,61 @@ func TestDetectOverlap(t *testing.T) {
t.Fatal(err)
}
}
func TestDetectLandmarkCounts(t *testing.T) {
faces, err := Detect("testdata/18.jpg", true, 20)
require.NoError(t, err)
require.Equal(t, 2, faces.Count())
expectedEyes := []int{2, 0}
expectedLandmarks := []int{15, 0}
for i, face := range faces {
t.Run(fmt.Sprintf("face-%d", i), func(t *testing.T) {
t.Logf("eyes=%d landmarks=%d", len(face.Eyes), len(face.Landmarks))
require.Equal(t, expectedEyes[i], len(face.Eyes))
require.Equal(t, expectedLandmarks[i], len(face.Landmarks))
})
}
}
var benchmarkFacesCount int
func BenchmarkDetectorFacesLandmarks(b *testing.B) {
const sample = "testdata/18.jpg"
d := &Detector{
minSize: 20,
shiftFactor: 0.1,
scaleFactor: 1.1,
iouThreshold: float64(OverlapThresholdFloor) / 100,
perturb: 63,
landmarkAngle: 0.0,
angles: append([]float64(nil), DetectionAngles...),
}
det, params, err := d.Detect(sample)
if err != nil {
b.Fatal(err)
}
if len(det) == 0 {
b.Fatalf("no detections found for %s", sample)
}
b.ReportAllocs()
b.ResetTimer()
detections := make([]pigo.Detection, len(det))
for b.Loop() {
copy(detections, det)
faces, err := d.Faces(detections, params, true)
if err != nil {
b.Fatal(err)
}
benchmarkFacesCount = faces.Count()
}
}

View File

@@ -3,9 +3,8 @@ package face
import (
"encoding/json"
"fmt"
"math"
"strings"
"github.com/photoprism/photoprism/pkg/vector/alg"
)
// Embedding represents a face embedding.
@@ -24,6 +23,8 @@ func NewEmbedding(inference []float32) Embedding {
result[i] = float64(v)
}
normalizeEmbedding(result)
return result
}
@@ -54,7 +55,27 @@ func (m Embedding) Dist(other Embedding) float64 {
return -1
}
return alg.EuclideanDist(m, other)
var sum float64
var diff0, diff1, diff2, diff3 float64
i := 0
limit := len(m)
for ; i+4 <= limit; i += 4 {
diff0 = m[i] - other[i]
diff1 = m[i+1] - other[i+1]
diff2 = m[i+2] - other[i+2]
diff3 = m[i+3] - other[i+3]
sum += diff0*diff0 + diff1*diff1 + diff2*diff2 + diff3*diff3
}
for ; i < limit; i++ {
diff := m[i] - other[i]
sum += diff * diff
}
return math.Sqrt(sum)
}
// Magnitude returns the face embedding vector length (magnitude).
@@ -62,6 +83,24 @@ func (m Embedding) Magnitude() float64 {
return m.Dist(NullEmbedding)
}
func normalizeEmbedding(e Embedding) {
var sum float64
for _, v := range e {
sum += v * v
}
if sum == 0 {
return
}
inv := 1 / math.Sqrt(sum)
for i := range e {
e[i] *= inv
}
}
// JSON returns the face embedding as JSON-encoded bytes.
func (m Embedding) JSON() []byte {
var noResult = []byte("")
@@ -87,5 +126,7 @@ func UnmarshalEmbedding(s string) (result Embedding, err error) {
err = json.Unmarshal([]byte(s), &result)
normalizeEmbedding(result)
return result, err
}

File diff suppressed because one or more lines are too long

View File

@@ -3,11 +3,8 @@ package face
import (
"encoding/json"
"fmt"
"math"
"strings"
"github.com/montanaflynn/stats"
"github.com/photoprism/photoprism/pkg/vector/alg"
)
// Embeddings represents a face embedding cluster.
@@ -149,24 +146,38 @@ func EmbeddingsMidpoint(embeddings Embeddings) (result Embedding, radius float64
result = make(Embedding, dim)
// The mean of a set of vectors is calculated component-wise.
invCount := 1.0 / float64(count)
for i := range embeddings {
emb := embeddings[i]
if len(emb) != dim {
continue
}
normalizeEmbedding(emb)
for j := 0; j < dim; j++ {
result[j] += emb[j]
}
}
for i := 0; i < dim; i++ {
values := make(stats.Float64Data, count)
for j := 0; j < count; j++ {
values[j] = embeddings[j][i]
result[i] *= invCount
}
if m, err := stats.Mean(values); err != nil {
log.Warnf("embeddings: %s", err)
} else {
result[i] = m
}
}
normalizeEmbedding(result)
// Radius is the max embedding distance + 0.01 from result.
for _, emb := range embeddings {
if d := alg.EuclideanDist(result, emb); d > radius {
var dist float64
for i := 0; i < dim; i++ {
diff := result[i] - emb[i]
dist += diff * diff
}
if d := math.Sqrt(dist); d > radius {
radius = d + 0.01
}
}
@@ -184,5 +195,9 @@ func UnmarshalEmbeddings(s string) (result Embeddings, err error) {
err = json.Unmarshal([]byte(s), &result)
for i := range result {
normalizeEmbedding(result[i])
}
return result, err
}

View File

@@ -22,6 +22,10 @@ func init() {
} else {
log.Warnf("faces: PHOTOPRISM_FACE_IGNORED_DIST is out of range (0.1-1.5; -1 to disable)")
}
for i := range IgnoredEmbeddings {
normalizeEmbedding(IgnoredEmbeddings[i])
}
}
// Ignored tests whether the embedding is generally unsuitable for matching.

View File

@@ -22,6 +22,10 @@ func init() {
} else {
log.Warnf("faces: PHOTOPRISM_FACE_KIDS_DIST is out of range (0.1-1.5; -1 to disable)")
}
for i := range KidsEmbeddings {
normalizeEmbedding(KidsEmbeddings[i])
}
}
// KidsFace tests if the embedded face belongs to a baby or young child.

View File

@@ -62,6 +62,8 @@ func RandomEmbedding() (result Embedding) {
}
}
normalizeEmbedding(result)
return result
}
@@ -77,6 +79,8 @@ func RandomKidsEmbedding() (result Embedding) {
result[i] = RandomFloat64(e[i], d)
}
normalizeEmbedding(result)
return result
}
@@ -92,5 +96,7 @@ func RandomIgnoredEmbedding() (result Embedding) {
result[i] = RandomFloat64(e[i], d)
}
normalizeEmbedding(result)
return result
}

File diff suppressed because one or more lines are too long

View File

@@ -30,9 +30,9 @@ func TestNet(t *testing.T) {
"14.jpg": 0,
"15.jpg": 0,
"16.jpg": 1,
"17.jpg": 1,
"17.jpg": 2,
"18.jpg": 2,
"19.jpg": 0,
"19.jpg": 1,
}
faceIndices := map[string][]int{
@@ -75,12 +75,17 @@ func TestNet(t *testing.T) {
// }
if len(faces) > 0 {
indices, ok := faceIndices[baseName]
for i, f := range faces {
if !ok || i >= len(indices) {
continue
}
if len(f.Embeddings) > 0 {
// t.Logf("FACE %d IN %s: %#v", i, fileName, f.Embeddings)
embeddings[faceIndices[baseName][i]] = f.Embeddings[0]
embeddings[indices[i]] = f.Embeddings[0]
} else {
embeddings[faceIndices[baseName][i]] = nil
embeddings[indices[i]] = nil
}
}
}

View File

@@ -23,17 +23,17 @@ func QualityThreshold(scale int) (score float32) {
// Smaller faces require higher quality.
switch {
case scale < 26:
score += 26.0
score += 12.0
case scale < 32:
score += 16.0
score += 8.0
case scale < 40:
score += 11.0
case scale < 50:
score += 9.0
case scale < 80:
score += 6.0
case scale < 110:
case scale < 50:
score += 4.0
case scale < 80:
score += 2.0
case scale < 110:
score += 1.0
}
return score

View File

@@ -8,22 +8,22 @@ import (
func TestQualityThreshold(t *testing.T) {
t.Run("XXS", func(t *testing.T) {
assert.Equal(t, float32(35), QualityThreshold(21))
assert.Equal(t, float32(21), QualityThreshold(21))
})
t.Run("XS", func(t *testing.T) {
assert.Equal(t, float32(25), QualityThreshold(27))
assert.Equal(t, float32(17), QualityThreshold(27))
})
t.Run("S", func(t *testing.T) {
assert.Equal(t, float32(20), QualityThreshold(33))
assert.Equal(t, float32(15), QualityThreshold(33))
})
t.Run("M", func(t *testing.T) {
assert.Equal(t, float32(18), QualityThreshold(45))
assert.Equal(t, float32(13), QualityThreshold(45))
})
t.Run("L", func(t *testing.T) {
assert.Equal(t, float32(15), QualityThreshold(75))
assert.Equal(t, float32(11), QualityThreshold(75))
})
t.Run("XL", func(t *testing.T) {
assert.Equal(t, float32(11), QualityThreshold(100))
assert.Equal(t, float32(10), QualityThreshold(100))
})
t.Run("XXL", func(t *testing.T) {
assert.Equal(t, float32(9), QualityThreshold(250))

View File

@@ -349,6 +349,7 @@ func (c *Config) Propagate() {
face.ClusterCore = c.FaceClusterCore()
face.ClusterDist = c.FaceClusterDist()
face.MatchDist = c.FaceMatchDist()
face.DetectionAngles = c.FaceAngles()
// Set default theme and locale.
customize.DefaultTheme = c.DefaultTheme()

View File

@@ -1,6 +1,10 @@
package config
import "github.com/photoprism/photoprism/internal/ai/face"
import (
"math"
"github.com/photoprism/photoprism/internal/ai/face"
)
// FaceSize returns the face size threshold in pixels.
func (c *Config) FaceSize() int {
@@ -73,3 +77,36 @@ func (c *Config) FaceMatchDist() float64 {
return c.options.FaceMatchDist
}
// FaceAngles returns the set of detection angles in radians.
func (c *Config) FaceAngles() []float64 {
if len(c.options.FaceAngles) == 0 {
return append([]float64(nil), face.DefaultAngles...)
}
angles := make([]float64, 0, len(c.options.FaceAngles))
seen := make(map[float64]struct{}, len(c.options.FaceAngles))
for _, angle := range c.options.FaceAngles {
if math.IsNaN(angle) || math.IsInf(angle, 0) {
continue
}
if angle < -math.Pi || angle > math.Pi {
continue
}
if _, ok := seen[angle]; ok {
continue
}
seen[angle] = struct{}{}
angles = append(angles, angle)
}
if len(angles) == 0 {
return append([]float64(nil), face.DefaultAngles...)
}
return angles
}

View File

@@ -1,9 +1,12 @@
package config
import (
"math"
"testing"
"github.com/stretchr/testify/assert"
"github.com/photoprism/photoprism/internal/ai/face"
)
func TestConfig_FaceSize(t *testing.T) {
@@ -77,3 +80,14 @@ func TestConfig_FaceMatchDist(t *testing.T) {
c.options.FaceMatchDist = 0.01
assert.Equal(t, 0.46, c.FaceMatchDist())
}
func TestConfig_FaceAngles(t *testing.T) {
c := NewConfig(CliTestContext())
assert.Equal(t, face.DefaultAngles, c.FaceAngles())
c.options.FaceAngles = []float64{-0.5, 0, 0.5}
assert.Equal(t, []float64{-0.5, 0, 0.5}, c.FaceAngles())
c.options.FaceAngles = []float64{math.Pi + 0.1, math.NaN(), 4}
assert.Equal(t, face.DefaultAngles, c.FaceAngles())
}

View File

@@ -1179,6 +1179,12 @@ var Flags = CliFlags{
Value: face.ScoreThreshold,
EnvVars: EnvVars("FACE_SCORE"),
}}, {
Flag: &cli.Float64SliceFlag{
Name: "face-angle",
Usage: "face detection `ANGLE` in radians (repeatable)",
Value: cli.NewFloat64Slice(face.DefaultAngles...),
EnvVars: EnvVars("FACE_ANGLE"),
}}, {
Flag: &cli.IntFlag{
Name: "face-overlap",
Usage: "face area overlap threshold in `PERCENT` (1-100)",

View File

@@ -231,6 +231,7 @@ type Options struct {
DetectNSFW bool `yaml:"DetectNSFW" json:"DetectNSFW" flag:"detect-nsfw"`
FaceSize int `yaml:"-" json:"-" flag:"face-size"`
FaceScore float64 `yaml:"-" json:"-" flag:"face-score"`
FaceAngles []float64 `yaml:"-" json:"-" flag:"face-angle"`
FaceOverlap int `yaml:"-" json:"-" flag:"face-overlap"`
FaceClusterSize int `yaml:"-" json:"-" flag:"face-cluster-size"`
FaceClusterScore int `yaml:"-" json:"-" flag:"face-cluster-score"`

View File

@@ -285,6 +285,7 @@ func (c *Config) Report() (rows [][]string, cols []string) {
// Facial Recognition.
{"face-size", fmt.Sprintf("%d", c.FaceSize())},
{"face-score", fmt.Sprintf("%f", c.FaceScore())},
{"face-angle", fmt.Sprintf("%v", c.FaceAngles())},
{"face-overlap", fmt.Sprintf("%d", c.FaceOverlap())},
{"face-cluster-size", fmt.Sprintf("%d", c.FaceClusterSize())},
{"face-cluster-score", fmt.Sprintf("%d", c.FaceClusterScore())},

View File

@@ -186,11 +186,11 @@ func TestMergeFaces(t *testing.T) {
t.Fatal(err)
}
assert.Equal(t, "5LH5E35ZGUMF5AYLM42BIZH4DGQHJDAV", result.ID)
assert.Equal(t, "4FD6YTOMWTDU5JKD3SS2MTRUTKZRZT7O", result.ID)
assert.Equal(t, entity.SrcManual, result.FaceSrc)
assert.Equal(t, "jqynvsf28rhn6b0c", result.SubjUID)
assert.Equal(t, 2, result.Samples)
assert.Equal(t, 0.03948165743305488, result.SampleRadius)
assert.InDelta(t, 0.040200777224183845, result.SampleRadius, 1e-9)
assert.Equal(t, 0, result.Collisions)
assert.Equal(t, float64(0), result.CollisionRadius)
@@ -222,7 +222,7 @@ func TestMergeFaces(t *testing.T) {
assert.Nil(t, result)
})
t.Run("OneSubject", func(t *testing.T) {
result, err := MergeFaces(entity.Faces{entity.Face{ID: "5LH5E35ZGUMF5AYLM42BIZH4DGQHJDAV"}}, false)
result, err := MergeFaces(entity.Faces{entity.Face{ID: "4FD6YTOMWTDU5JKD3SS2MTRUTKZRZT7O"}}, false)
assert.EqualError(t, err, "faces: two or more clusters required for merging")
assert.Nil(t, result)