From 4b5e3b574a2a305ce8c336ad20575c23f26317ec Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Tue, 22 Jul 2025 22:09:00 +0200 Subject: [PATCH] Index: Improve generation, caching and logging of labels #5011 #5123 Signed-off-by: Michael Mayer --- internal/ai/vision/api_response.go | 14 ++++++------ internal/ai/vision/caption.go | 2 +- internal/ai/vision/labels.go | 8 +++---- internal/ai/vision/labels_test.go | 9 ++++---- internal/ai/vision/nsfw.go | 6 ++--- internal/api/vision_labels.go | 3 ++- internal/commands/find.go | 4 +++- internal/commands/vision_run.go | 3 ++- internal/config/config.go | 2 +- internal/entity/label_cache.go | 31 +++++++++++++++++--------- internal/entity/label_cache_test.go | 2 +- internal/entity/photo.go | 8 ++++++- internal/entity/photo_caption.go | 17 ++++++++++++-- internal/photoprism/index.go | 13 ++++++----- internal/photoprism/index_labels.go | 4 ++-- internal/photoprism/index_mediafile.go | 2 +- internal/workers/vision.go | 20 ++++++++++++----- 17 files changed, 97 insertions(+), 51 deletions(-) diff --git a/internal/ai/vision/api_response.go b/internal/ai/vision/api_response.go index e18165647..1acdf7242 100644 --- a/internal/ai/vision/api_response.go +++ b/internal/ai/vision/api_response.go @@ -86,7 +86,7 @@ type LabelResult struct { } // ToClassify returns the label results as classify.Label. -func (r LabelResult) ToClassify() classify.Label { +func (r LabelResult) ToClassify(labelSrc string) classify.Label { // Calculate uncertainty from confidence or assume a default of 20%. var uncertainty int @@ -97,18 +97,18 @@ func (r LabelResult) ToClassify() classify.Label { } // Default to "image" of no source name is provided. - var source string - - if r.Source != "" { - source = r.Source + if labelSrc != entity.SrcAuto { + labelSrc = clean.ShortTypeLower(labelSrc) + } else if r.Source != "" { + labelSrc = clean.ShortTypeLower(r.Source) } else { - source = entity.SrcImage + labelSrc = entity.SrcImage } // Return label. return classify.Label{ Name: r.Name, - Source: source, + Source: labelSrc, Priority: r.Priority, Uncertainty: uncertainty, Categories: r.Categories} diff --git a/internal/ai/vision/caption.go b/internal/ai/vision/caption.go index b148d61cd..5f642e861 100644 --- a/internal/ai/vision/caption.go +++ b/internal/ai/vision/caption.go @@ -19,7 +19,7 @@ var CaptionPromptDefault = `Create an interesting caption that sounds natural an var CaptionModelDefault = "qwen2.5vl" // Caption returns generated captions for the specified images. -func Caption(images Files, src media.Src) (result *CaptionResult, model *Model, err error) { +func Caption(images Files, mediaSrc media.Src) (result *CaptionResult, model *Model, err error) { // Return if there is no configuration or no image classification models are configured. if Config == nil { return result, model, errors.New("vision service is not configured") diff --git a/internal/ai/vision/labels.go b/internal/ai/vision/labels.go index d5626333b..8af129b86 100644 --- a/internal/ai/vision/labels.go +++ b/internal/ai/vision/labels.go @@ -11,7 +11,7 @@ import ( ) // Labels finds matching labels for the specified image. -func Labels(images Files, src media.Src) (result classify.Labels, err error) { +func Labels(images Files, mediaSrc media.Src, labelSrc string) (result classify.Labels, err error) { // Return if no thumbnail filenames were given. if len(images) == 0 { return result, errors.New("at least one image required") @@ -53,20 +53,20 @@ func Labels(images Files, src media.Src) (result classify.Labels, err error) { } for _, label := range apiResponse.Result.Labels { - result = append(result, label.ToClassify()) + result = append(result, label.ToClassify(labelSrc)) } } else if tf := model.ClassifyModel(); tf != nil { // Predict labels with local TensorFlow model. for i := range images { var labels classify.Labels - switch src { + switch mediaSrc { case media.SrcLocal: labels, err = tf.File(images[i], Config.Thresholds.Confidence) case media.SrcRemote: labels, err = tf.Url(images[i], Config.Thresholds.Confidence) default: - return result, fmt.Errorf("invalid image source %s", clean.Log(src)) + return result, fmt.Errorf("invalid media source %s", clean.Log(mediaSrc)) } if err != nil { diff --git a/internal/ai/vision/labels_test.go b/internal/ai/vision/labels_test.go index 89de5f9ed..d68418a0b 100644 --- a/internal/ai/vision/labels_test.go +++ b/internal/ai/vision/labels_test.go @@ -6,6 +6,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/photoprism/photoprism/internal/ai/classify" + "github.com/photoprism/photoprism/internal/entity" "github.com/photoprism/photoprism/pkg/fs" "github.com/photoprism/photoprism/pkg/media" ) @@ -15,7 +16,7 @@ func TestLabels(t *testing.T) { var examplesPath = assetsPath + "/examples" t.Run("Success", func(t *testing.T) { - result, err := Labels(Files{examplesPath + "/chameleon_lime.jpg"}, media.SrcLocal) + result, err := Labels(Files{examplesPath + "/chameleon_lime.jpg"}, media.SrcLocal, entity.SrcAuto) assert.NoError(t, err) assert.IsType(t, classify.Labels{}, result) @@ -27,7 +28,7 @@ func TestLabels(t *testing.T) { assert.Equal(t, 7, result[0].Uncertainty) }) t.Run("Cat224", func(t *testing.T) { - result, err := Labels(Files{examplesPath + "/cat_224.jpeg"}, media.SrcLocal) + result, err := Labels(Files{examplesPath + "/cat_224.jpeg"}, media.SrcLocal, entity.SrcAuto) assert.NoError(t, err) assert.IsType(t, classify.Labels{}, result) @@ -40,7 +41,7 @@ func TestLabels(t *testing.T) { assert.InDelta(t, float32(0.41), result[0].Confidence(), 0.1) }) t.Run("Cat720", func(t *testing.T) { - result, err := Labels(Files{examplesPath + "/cat_720.jpeg"}, media.SrcLocal) + result, err := Labels(Files{examplesPath + "/cat_720.jpeg"}, media.SrcLocal, entity.SrcAuto) assert.NoError(t, err) assert.IsType(t, classify.Labels{}, result) @@ -53,7 +54,7 @@ func TestLabels(t *testing.T) { assert.InDelta(t, float32(0.4), result[0].Confidence(), 0.1) }) t.Run("InvalidFile", func(t *testing.T) { - _, err := Labels(Files{examplesPath + "/notexisting.jpg"}, media.SrcLocal) + _, err := Labels(Files{examplesPath + "/notexisting.jpg"}, media.SrcLocal, entity.SrcAuto) assert.Error(t, err) }) } diff --git a/internal/ai/vision/nsfw.go b/internal/ai/vision/nsfw.go index bd662c05c..41a15ae57 100644 --- a/internal/ai/vision/nsfw.go +++ b/internal/ai/vision/nsfw.go @@ -10,7 +10,7 @@ import ( ) // Nsfw checks the specified images for inappropriate content. -func Nsfw(images Files, src media.Src) (result []nsfw.Result, err error) { +func Nsfw(images Files, mediaSrc media.Src) (result []nsfw.Result, err error) { // Return if no thumbnail filenames were given. if len(images) == 0 { return result, errors.New("at least one image required") @@ -59,13 +59,13 @@ func Nsfw(images Files, src media.Src) (result []nsfw.Result, err error) { for i := range images { var labels nsfw.Result - switch src { + switch mediaSrc { case media.SrcLocal: labels, err = tf.File(images[i]) case media.SrcRemote: labels, err = tf.Url(images[i]) default: - return result, fmt.Errorf("invalid image source %s", clean.Log(src)) + return result, fmt.Errorf("invalid media source %s", clean.Log(mediaSrc)) } if err != nil { diff --git a/internal/api/vision_labels.go b/internal/api/vision_labels.go index 4e430bb1d..8d09ed820 100644 --- a/internal/api/vision_labels.go +++ b/internal/api/vision_labels.go @@ -7,6 +7,7 @@ import ( "github.com/photoprism/photoprism/internal/ai/vision" "github.com/photoprism/photoprism/internal/auth/acl" + "github.com/photoprism/photoprism/internal/entity" "github.com/photoprism/photoprism/internal/photoprism/get" "github.com/photoprism/photoprism/pkg/media" "github.com/photoprism/photoprism/pkg/media/http/header" @@ -54,7 +55,7 @@ func PostVisionLabels(router *gin.RouterGroup) { } // Run inference to find matching labels. - labels, err := vision.Labels(request.Images, media.SrcRemote) + labels, err := vision.Labels(request.Images, media.SrcRemote, entity.SrcAuto) if err != nil { log.Errorf("vision: %s (run labels)", err) diff --git a/internal/commands/find.go b/internal/commands/find.go index 953995e87..e1b4b437b 100644 --- a/internal/commands/find.go +++ b/internal/commands/find.go @@ -41,8 +41,10 @@ func findAction(ctx *cli.Context) error { defer conf.Shutdown() + filter := strings.TrimSpace(strings.Join(ctx.Args().Slice(), " ")) + frm := form.SearchPhotos{ - Query: strings.TrimSpace(ctx.Args().First()), + Query: filter, Primary: false, Merged: false, Count: ctx.Int("count"), diff --git a/internal/commands/vision_run.go b/internal/commands/vision_run.go index d70dd76b6..ad74743ee 100644 --- a/internal/commands/vision_run.go +++ b/internal/commands/vision_run.go @@ -41,8 +41,9 @@ var VisionRunCommand = &cli.Command{ func visionRunAction(ctx *cli.Context) error { return CallWithDependencies(ctx, func(conf *config.Config) error { worker := workers.NewVision(conf) + filter := strings.TrimSpace(strings.Join(ctx.Args().Slice(), " ")) return worker.Start( - strings.TrimSpace(ctx.Args().First()), + filter, vision.ParseTypes(ctx.String("models")), ctx.String("source"), ctx.Bool("force"), diff --git a/internal/config/config.go b/internal/config/config.go index 4395ae2d1..e37edbd78 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -91,7 +91,7 @@ func init() { // Disable entity cache if requested. if txt.Bool(os.Getenv(EnvVar("disable-photolabelcache"))) { - entity.CachePhotoLabels = false + entity.UsePhotoLabelsCache = false } initThumbs() diff --git a/internal/entity/label_cache.go b/internal/entity/label_cache.go index e673e6058..bf7f9a010 100644 --- a/internal/entity/label_cache.go +++ b/internal/entity/label_cache.go @@ -6,6 +6,7 @@ import ( "sync" "time" + "github.com/dustin/go-humanize/english" gc "github.com/patrickmn/go-cache" "github.com/photoprism/photoprism/pkg/clean" @@ -17,12 +18,12 @@ const ( labelCacheDefaultExpiration = 15 * time.Minute labelCacheErrorExpiration = 5 * time.Minute labelCacheCleanupInterval = 10 * time.Minute - photoLabelCacheExpiration = time.Hour + photoLabelCacheExpiration = 24 * time.Hour ) // Cache Label and PhotoLabel entities for faster indexing. var ( - CachePhotoLabels = true + UsePhotoLabelsCache = true labelCache = gc.New(labelCacheDefaultExpiration, labelCacheCleanupInterval) photoLabelCache = gc.New(photoLabelCacheExpiration, labelCacheCleanupInterval) photoLabelCacheMutex = sync.Mutex{} @@ -40,34 +41,40 @@ func FlushLabelCache() { // FlushPhotoLabelCache removes all cached PhotoLabel entities from the cache. func FlushPhotoLabelCache() { - if !CachePhotoLabels { + if !UsePhotoLabelsCache { return } photoLabelCacheMutex.Lock() defer photoLabelCacheMutex.Unlock() + start := time.Now() + photoLabelCache.Flush() + + log.Debugf("index: flushed photo labels cache [%s]", time.Since(start)) } // FlushCachedPhotoLabel deletes a cached PhotoLabel entity from the cache. func FlushCachedPhotoLabel(m *PhotoLabel) { - if m == nil || !CachePhotoLabels { + if m == nil || !UsePhotoLabelsCache { return } else if m.HasID() { photoLabelCache.Delete(photoLabelCacheKey(m.PhotoID, m.LabelID)) } } -// WarmPhotoLabelCache warms up the PhotoLabel cache. -func WarmPhotoLabelCache() (err error) { - if !CachePhotoLabels { +// CachePhotoLabels warms up the PhotoLabel cache. +func CachePhotoLabels() (err error) { + if !UsePhotoLabelsCache { return nil } photoLabelCacheMutex.Lock() defer photoLabelCacheMutex.Unlock() + start := time.Now() + var photoLabels []PhotoLabel // Find photo label assignments. @@ -82,6 +89,8 @@ func WarmPhotoLabelCache() (err error) { photoLabelCache.SetDefault(m.CacheKey(), m) } + log.Debugf("index: cached %s [%s]", english.Plural(len(photoLabels), "photo label", "photo labels"), time.Since(start)) + return nil } @@ -145,7 +154,7 @@ func FindPhotoLabel(photoId, labelId uint, cached bool) (*PhotoLabel, error) { } // Return cached label, if found. - if cached && CachePhotoLabels { + if cached && UsePhotoLabelsCache { if cacheData, ok := photoLabelCache.Get(cacheKey); ok { log.Tracef("photo-label: cache hit for %s", cacheKey) @@ -164,16 +173,16 @@ func FindPhotoLabel(photoId, labelId uint, cached bool) (*PhotoLabel, error) { result := &PhotoLabel{} if find := Db().First(result, "photo_id = ? AND label_id = ?", photoId, labelId); find.RecordNotFound() { - if CachePhotoLabels { + if UsePhotoLabelsCache { photoLabelCache.Set(cacheKey, *result, labelCacheErrorExpiration) } return result, fmt.Errorf("photo-label not found") } else if find.Error != nil { - if CachePhotoLabels { + if UsePhotoLabelsCache { photoLabelCache.Set(cacheKey, *result, labelCacheErrorExpiration) } return result, find.Error - } else if CachePhotoLabels { + } else if UsePhotoLabelsCache { photoLabelCache.SetDefault(cacheKey, *result) } diff --git a/internal/entity/label_cache_test.go b/internal/entity/label_cache_test.go index 572c0135d..f58528fb5 100644 --- a/internal/entity/label_cache_test.go +++ b/internal/entity/label_cache_test.go @@ -41,7 +41,7 @@ func TestFindLabel(t *testing.T) { func TestFindPhotoLabel(t *testing.T) { t.Run("Success", func(t *testing.T) { - if err := WarmPhotoLabelCache(); err != nil { + if err := CachePhotoLabels(); err != nil { t.Fatal(err) } diff --git a/internal/entity/photo.go b/internal/entity/photo.go index ee063330c..1cc757f67 100644 --- a/internal/entity/photo.go +++ b/internal/entity/photo.go @@ -779,9 +779,15 @@ func (m *Photo) AddLabels(labels classify.Labels) { } if photoLabel.HasID() && photoLabel.Uncertainty > classifyLabel.Uncertainty && photoLabel.Uncertainty < 100 { + var labelSrc string + if classifyLabel.Source == "" { + labelSrc = SrcImage + } else { + labelSrc = clean.ShortTypeLower(classifyLabel.Source) + } if err := photoLabel.Updates(map[string]interface{}{ "Uncertainty": classifyLabel.Uncertainty, - "LabelSrc": classifyLabel.Source, + "LabelSrc": labelSrc, }); err != nil { log.Errorf("index: %s", err) } diff --git a/internal/entity/photo_caption.go b/internal/entity/photo_caption.go index 7992b441b..3add2b23b 100644 --- a/internal/entity/photo_caption.go +++ b/internal/entity/photo_caption.go @@ -1,6 +1,11 @@ package entity import ( + "strings" + "time" + + "github.com/dustin/go-humanize/english" + "github.com/photoprism/photoprism/pkg/txt" ) @@ -11,7 +16,7 @@ func (m *Photo) HasCaption() bool { // NoCaption returns true if the photo has no caption. func (m *Photo) NoCaption() bool { - return m.GetCaption() == "" + return strings.TrimSpace(m.GetCaption()) == "" } // GetCaption returns the photo caption, if any. @@ -68,6 +73,8 @@ func (m *Photo) UpdateCaptionLabels() error { return nil } + start := time.Now() + var uncertainty int if captionSrcPriority < SrcPriority[SrcMeta] { @@ -91,5 +98,11 @@ func (m *Photo) UpdateCaptionLabels() error { } } - return Db().Where("label_src = ? AND photo_id = ? AND label_id NOT IN (?)", SrcCaption, m.ID, labelIds).Delete(&PhotoLabel{}).Error + if err := Db().Where("label_src = ? AND photo_id = ? AND label_id NOT IN (?)", SrcCaption, m.ID, labelIds).Delete(&PhotoLabel{}).Error; err != nil { + return err + } + + log.Debugf("index: updated %s [%s]", english.Plural(len(labelIds), "caption label", "caption labels"), time.Since(start)) + + return nil } diff --git a/internal/photoprism/index.go b/internal/photoprism/index.go index 10eeacdb5..a3c8c43ca 100644 --- a/internal/photoprism/index.go +++ b/internal/photoprism/index.go @@ -50,11 +50,6 @@ func NewIndex(conf *config.Config, convert *Convert, files *Files, photos *Photo findLabels: !conf.DisableClassification(), } - // Warm up the cache. - if err := entity.WarmPhotoLabelCache(); err != nil { - log.Warnf("index: %s (cache warm-up)", err) - } - return i } @@ -123,6 +118,13 @@ func (ind *Index) Start(o IndexOptions) (found fs.Done, updated int) { defer ind.files.Done() + // Cache photo labels to reduce number of database queries. + if o.FacesOnly { + // Skip labels cache warmup if only faces are indexed. + } else if err := entity.CachePhotoLabels(); err != nil { + log.Warnf("index: %s (cache photo labels)", err) + } + skipRaw := ind.conf.DisableRaw() ignore := fs.NewIgnoreList(fs.PPIgnoreFilename, true, false) @@ -320,6 +322,7 @@ func (ind *Index) Start(o IndexOptions) (found fs.Done, updated int) { } config.FlushUsageCache() + entity.FlushPhotoLabelCache() runtime.GC() ind.lastRun = entity.Now() diff --git a/internal/photoprism/index_labels.go b/internal/photoprism/index_labels.go index e7e66e108..c34197793 100644 --- a/internal/photoprism/index_labels.go +++ b/internal/photoprism/index_labels.go @@ -13,7 +13,7 @@ import ( ) // Labels classifies a JPEG image and returns matching labels. -func (ind *Index) Labels(file *MediaFile) (labels classify.Labels) { +func (ind *Index) Labels(file *MediaFile, labelSrc string) (labels classify.Labels) { start := time.Now() var err error @@ -42,7 +42,7 @@ func (ind *Index) Labels(file *MediaFile) (labels classify.Labels) { } // Get matching labels from computer vision model. - if labels, err = vision.Labels(thumbnails, media.SrcLocal); err != nil { + if labels, err = vision.Labels(thumbnails, media.SrcLocal, labelSrc); err != nil { log.Debugf("labels: %s in %s", err, clean.Log(file.BaseName())) return labels } diff --git a/internal/photoprism/index_mediafile.go b/internal/photoprism/index_mediafile.go index c25663972..60b099ce1 100644 --- a/internal/photoprism/index_mediafile.go +++ b/internal/photoprism/index_mediafile.go @@ -815,7 +815,7 @@ func (ind *Index) UserMediaFile(m *MediaFile, o IndexOptions, originalName, phot // Classify images with TensorFlow? if ind.findLabels { - labels = ind.Labels(m) + labels = ind.Labels(m, entity.SrcImage) // Append labels from other sources such as face detection. if len(extraLabels) > 0 { diff --git a/internal/workers/vision.go b/internal/workers/vision.go index 3173ddfeb..9ed35f4d0 100644 --- a/internal/workers/vision.go +++ b/internal/workers/vision.go @@ -22,6 +22,7 @@ import ( "github.com/photoprism/photoprism/internal/photoprism" "github.com/photoprism/photoprism/internal/photoprism/get" "github.com/photoprism/photoprism/pkg/clean" + "github.com/photoprism/photoprism/pkg/txt" ) // Vision represents a computer vision worker. @@ -39,8 +40,8 @@ func (w *Vision) originalsPath() string { return w.conf.OriginalsPath() } -// Start runs the specified model types for the photos that match the search query. -func (w *Vision) Start(q string, models []string, customSrc string, force bool) (err error) { +// Start runs the specified model types for photos matching the search query filter string. +func (w *Vision) Start(filter string, models []string, customSrc string, force bool) (err error) { defer func() { if r := recover(); r != nil { err = fmt.Errorf("vision: %s (worker panic)\nstack: %s", r, debug.Stack()) @@ -90,12 +91,18 @@ func (w *Vision) Start(q string, models []string, customSrc string, force bool) for { frm := form.SearchPhotos{ - Query: strings.TrimSpace(q), + Query: filter, Primary: true, Merged: false, Count: limit, Offset: offset, - Order: sortby.Oldest, + Order: sortby.Added, + } + + // Find photos without captions when only + // captions are updated without force flag. + if !updateLabels && !updateNsfw && !force { + frm.Caption = txt.False } photos, _, queryErr := search.Photos(frm) @@ -139,7 +146,7 @@ func (w *Vision) Start(q string, models []string, customSrc string, force bool) // Generate labels. if updateLabels && (len(m.Labels) == 0 || force) { - if labels := ind.Labels(file); len(labels) > 0 { + if labels := ind.Labels(file, dataSrc); len(labels) > 0 { m.AddLabels(labels) changed = true } @@ -161,6 +168,9 @@ func (w *Vision) Start(q string, models []string, customSrc string, force bool) log.Warnf("vision: %s in %s (generate caption)", clean.Error(captionErr), photoName) } else if caption.Text = strings.TrimSpace(caption.Text); caption.Text != "" { m.SetCaption(caption.Text, dataSrc) + if updateErr := m.UpdateCaptionLabels(); updateErr != nil { + log.Warnf("vision: %s in %s (update caption labels)", clean.Error(updateErr), photoName) + } changed = true log.Infof("vision: changed caption of %s to %s", photoName, clean.Log(m.PhotoCaption)) }