Files
photoprism/internal/workers/vision.go
2025-11-22 10:22:05 +01:00

329 lines
9.6 KiB
Go

package workers
import (
"errors"
"fmt"
"runtime/debug"
"slices"
"strings"
"time"
"github.com/dustin/go-humanize/english"
"github.com/photoprism/photoprism/internal/ai/vision"
"github.com/photoprism/photoprism/internal/config"
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/internal/entity/query"
"github.com/photoprism/photoprism/internal/entity/search"
"github.com/photoprism/photoprism/internal/entity/sortby"
"github.com/photoprism/photoprism/internal/form"
"github.com/photoprism/photoprism/internal/mutex"
"github.com/photoprism/photoprism/internal/photoprism"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/enum"
"github.com/photoprism/photoprism/pkg/txt"
)
// Vision orchestrates background computer-vision tasks (labels, captions,
// NSFW detection). It wraps configuration lookups and scheduling helpers.
type Vision struct {
conf *config.Config
}
// NewVision constructs a Vision worker bound to the provided configuration.
func NewVision(conf *config.Config) *Vision {
return &Vision{conf: conf}
}
// StartScheduled executes the worker in scheduled mode, selecting models that
// are allowed to run in the RunOnSchedule context.
func (w *Vision) StartScheduled() {
models := w.scheduledModels()
if len(models) == 0 {
return
}
if err := w.Start(
w.conf.VisionFilter(),
0,
models,
entity.SrcAuto,
false,
vision.RunOnSchedule,
); err != nil {
log.Errorf("scheduler: %s (vision)", err)
}
}
// scheduledModels returns the model types that should run for scheduled jobs.
func (w *Vision) scheduledModels() []string {
if w.conf == nil {
return nil
}
models := make([]string, 0, 4)
if w.conf.VisionModelShouldRun(vision.ModelTypeLabels, vision.RunOnSchedule) {
models = append(models, vision.ModelTypeLabels)
}
if w.conf.VisionModelShouldRun(vision.ModelTypeNsfw, vision.RunOnSchedule) {
models = append(models, vision.ModelTypeNsfw)
}
if w.conf.VisionModelShouldRun(vision.ModelTypeCaption, vision.RunOnSchedule) {
models = append(models, vision.ModelTypeCaption)
}
if w.conf.VisionModelShouldRun(vision.ModelTypeFace, vision.RunOnSchedule) {
models = append(models, vision.ModelTypeFace)
}
return models
}
// Start runs the requested vision models against photos matching the search
// filter. `customSrc` allows the caller to override the metadata source string,
// `force` regenerates metadata regardless of existing values, and `runType`
// describes the scheduling context (manual, scheduled, etc.). A global worker
// mutex prevents multiple vision jobs from running concurrently.
func (w *Vision) Start(filter string, count int, models []string, customSrc string, force bool, runType vision.RunType) (err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("vision: %s (worker panic)\nstack: %s", r, debug.Stack())
log.Error(err)
}
}()
if err = mutex.VisionWorker.Start(); err != nil {
return err
}
defer mutex.VisionWorker.Stop()
models = vision.FilterModels(models, runType, func(mt vision.ModelType, when vision.RunType) bool {
return w.conf.VisionModelShouldRun(mt, when)
})
updateLabels := slices.Contains(models, vision.ModelTypeLabels)
updateNsfw := slices.Contains(models, vision.ModelTypeNsfw)
updateCaptions := slices.Contains(models, vision.ModelTypeCaption)
detectFaces := slices.Contains(models, vision.ModelTypeFace)
// Refresh index metadata.
if n := len(models); n == 0 {
log.Warnf("vision: no models were specified")
return nil
} else {
log.Infof("vision: running %s models", txt.JoinAnd(models))
}
customSrc = clean.ShortTypeLower(customSrc)
// Check time when worker was last executed.
updateIndex := false
// Remember if we saved new face markers so recognition can run after the loop.
updateFaces := false
start := time.Now()
done := make(map[string]bool)
offset := 0
updated := 0
processed := 0
// Make sure count is within
if count < 1 || count > search.MaxResults {
count = search.MaxResults
}
frm := form.SearchPhotos{
Query: filter,
Primary: true,
Merged: false,
Count: count,
Offset: offset,
Order: sortby.Added,
}
// Find photos without captions when only
// captions are updated without force flag.
if !updateLabels && !updateNsfw && !force {
frm.Caption = enum.False
}
photos, _, queryErr := search.Photos(frm)
if queryErr != nil {
return queryErr
}
if n := len(photos); n == 0 {
log.Info("vision: no pictures to process")
return nil
} else {
log.Infof("vision: processing %s", english.Plural(n, "picture", "pictures"))
}
for _, photo := range photos {
if mutex.VisionWorker.Canceled() {
return errors.New("vision: worker canceled")
}
if done[photo.PhotoUID] {
continue
}
done[photo.PhotoUID] = true
logName := photo.String()
m, loadErr := query.PhotoByUID(photo.PhotoUID)
if loadErr != nil {
log.Errorf("vision: failed to load %s (%s)", logName, loadErr)
continue
}
generateLabels := updateLabels && m.ShouldGenerateLabels(force)
generateCaptions := updateCaptions && m.ShouldGenerateCaption(customSrc, force)
detectNsfw := updateNsfw && (!photo.PhotoPrivate || force)
if !generateLabels && !generateCaptions && !detectNsfw && !detectFaces {
continue
}
processed++
fileName := photoprism.FileName(photo.FileRoot, photo.FileName)
file, fileErr := photoprism.NewMediaFile(fileName)
if fileErr != nil {
log.Errorf("vision: failed to open %s (%s)", logName, fileErr)
continue
}
// Track whether this iteration produced metadata that needs persisting.
changed := false
if detectFaces {
if primaryFile, err := m.PrimaryFile(); err != nil {
log.Debugf("vision: photo %s has invalid primary file (%s)", logName, clean.Error(err))
} else if primaryFile == nil {
log.Debugf("vision: missing primary file for %s", logName)
} else if markers := primaryFile.Markers(); markers == nil {
log.Errorf("vision: failed loading markers for %s", logName)
} else {
expected := markers.DetectedFaceCount()
faces, detectErr := photoprism.DetectFaces(file, expected)
if detectErr != nil {
log.Debugf("vision: %s in %s (detect faces)", detectErr, clean.Log(file.BaseName()))
} else if saved, faceCount, applyErr := photoprism.ApplyDetectedFaces(primaryFile, faces); applyErr != nil {
log.Warnf("vision: %s in %s (save faces)", clean.Error(applyErr), logName)
} else if saved {
m.PhotoFaces = faceCount
updateFaces = true
changed = true
}
}
}
// Generate labels.
if generateLabels {
if labels := file.GenerateLabels(customSrc); len(labels) > 0 {
if w.conf.DetectNSFW() && !m.PhotoPrivate {
if labels.IsNSFW(vision.Config.Thresholds.GetNSFW()) {
m.PhotoPrivate = true
log.Infof("vision: changed private flag of %s to %t (labels)", logName, m.PhotoPrivate)
}
}
m.AddLabels(labels)
changed = true
}
}
// Detect NSFW content.
if detectNsfw {
if isNsfw := file.DetectNSFW(); m.PhotoPrivate != isNsfw {
m.PhotoPrivate = isNsfw
changed = true
log.Infof("vision: changed private flag of %s to %t", logName, m.PhotoPrivate)
}
}
// Generate a caption if none exists or the force flag is used,
// and only if no caption was set or removed by a higher-priority source.
if generateCaptions {
if caption, captionErr := file.GenerateCaption(customSrc); captionErr != nil {
log.Warnf("vision: %s in %s (generate caption)", clean.Error(captionErr), logName)
} else if text := strings.TrimSpace(caption.Text); text != "" {
m.SetCaption(text, caption.Source)
if updateErr := m.UpdateCaptionLabels(); updateErr != nil {
log.Warnf("vision: %s in %s (update caption labels)", clean.Error(updateErr), logName)
}
changed = true
log.Infof("vision: changed caption of %s to %s", logName, clean.Log(m.PhotoCaption))
}
}
if changed {
if saveErr := m.SaveVision(); saveErr == nil {
updated++
}
}
if mutex.VisionWorker.Canceled() {
return errors.New("vision: worker canceled")
}
}
elapsed := time.Since(start)
switch {
case processed == 0:
log.Infof("vision: no pictures required processing [%s]", elapsed)
case updated == processed:
log.Infof("vision: updated %s [%s]", english.Plural(updated, "picture", "pictures"), elapsed)
case updated == 0:
log.Infof("vision: processed %s (no metadata changes detected) [%s]", english.Plural(processed, "picture", "pictures"), elapsed)
default:
log.Infof("vision: updated %s out of %s [%s]", english.Plural(updated, "picture", "pictures"), english.Plural(processed, "picture", "pictures"), elapsed)
}
if updated > 0 {
updateIndex = true
}
if updateFaces {
// Perform face recognition after saving new face markers.
log.Debugf("vision: running face recognition")
if faces := photoprism.NewFaces(w.conf); faces.Disabled() {
log.Debugf("vision: skipping face recognition")
} else if facesErr := faces.Start(photoprism.FacesOptions{}); facesErr != nil {
log.Warn(facesErr)
}
}
// Only update index if photo metadata has changed or the force flag was used.
if updateIndex {
// Run moments worker.
if moments := photoprism.NewMoments(w.conf); moments == nil {
log.Errorf("vision: failed to update moments")
} else if err = moments.Start(); err != nil {
log.Warnf("moments: %s in optimization worker", err)
}
// Update precalculated photo and file counts.
if err = entity.UpdateCounts(); err != nil {
log.Warnf("vision: %s in optimization worker", err)
}
// Update album, subject, and label cover thumbs.
if err = query.UpdateCovers(); err != nil {
log.Warnf("vision: %s in optimization worker", err)
}
}
return nil
}