Indexer: Merge identical photos (stack files) #576

This commit is contained in:
Michael Mayer
2020-12-04 19:51:51 +01:00
parent d09f22b202
commit 902aacece9
27 changed files with 316 additions and 34 deletions

View File

@@ -48,7 +48,8 @@ func StartIndexing(router *gin.RouterGroup) {
indOpt := photoprism.IndexOptions{
Rescan: f.Rescan,
Convert: f.Convert && conf.SidecarWritable(),
Convert: conf.Settings().Index.Convert && conf.SidecarWritable(),
Stack: conf.Settings().Index.Stack,
Path: filepath.Clean(f.Path),
}

View File

@@ -131,7 +131,7 @@ func PhotoUnstack(router *gin.RouterGroup) {
ind := service.Index()
// Index new, unstacked file.
if res := ind.File(mediaFile.FileName()); res.Failed() {
if res := ind.SingleFile(mediaFile.FileName()); res.Failed() {
log.Errorf("photo: %s (unstack %s)", res.Err, txt.Quote(baseName))
AbortSaveFailed(c)
return
@@ -148,7 +148,7 @@ func PhotoUnstack(router *gin.RouterGroup) {
oldPrimaryName := photoprism.FileName(oldPrimary.FileRoot, oldPrimary.FileName)
// Re-index old, existing primary file.
if res := ind.File(oldPrimaryName); res.Failed() {
if res := ind.SingleFile(oldPrimaryName); res.Failed() {
log.Errorf("photo: %s (unstack %s)", res.Err, txt.Quote(baseName))
AbortSaveFailed(c)
return

View File

@@ -63,6 +63,7 @@ func indexAction(ctx *cli.Context) error {
Path: subPath,
Rescan: ctx.Bool("all"),
Convert: conf.Settings().Index.Convert && conf.SidecarWritable(),
Stack: conf.Settings().Index.Stack,
}
indexed := ind.Start(indOpt)

View File

@@ -33,6 +33,7 @@ type IndexSettings struct {
Convert bool `json:"convert" yaml:"convert"`
Rescan bool `json:"rescan" yaml:"rescan"`
Sequences bool `json:"sequences" yaml:"sequences"`
Stack bool `json:"stack" yaml:"stack"`
}
// ImportSettings represents import settings.
@@ -107,6 +108,7 @@ func NewSettings() *Settings {
Rescan: false,
Convert: true,
Sequences: true,
Stack: true,
},
}
}

View File

@@ -28,3 +28,4 @@ index:
convert: true
rescan: false
sequences: true
stack: true

View File

@@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"path"
"strconv"
"strings"
"time"
@@ -30,6 +31,11 @@ func (m Photos) UIDs() []string {
return result
}
// MapKey returns a key referencing time and location for indexing.
func MapKey(takenAt time.Time, cellId string) string {
return path.Join(strconv.FormatInt(takenAt.Unix(), 36), cellId)
}
// Photo represents a photo, all its properties, and link to all its images and sidecar files.
type Photo struct {
ID uint `gorm:"primary_key" yaml:"-"`
@@ -48,6 +54,7 @@ type Photo struct {
PhotoName string `gorm:"type:VARBINARY(255);" json:"Name" yaml:"-"`
OriginalName string `gorm:"type:VARBINARY(768);" json:"OriginalName" yaml:"OriginalName,omitempty"`
PhotoFavorite bool `json:"Favorite" yaml:"Favorite,omitempty"`
PhotoSingle bool `json:"Single" yaml:"Single,omitempty"`
PhotoPrivate bool `json:"Private" yaml:"Private,omitempty"`
PhotoScan bool `json:"Scan" yaml:"Scan,omitempty"`
PhotoPanorama bool `json:"Panorama" yaml:"Panorama,omitempty"`
@@ -998,3 +1005,39 @@ func (m *Photo) Links() Links {
func (m *Photo) PrimaryFile() (File, error) {
return PrimaryFile(m.PhotoUID)
}
// MapKey returns a key referencing time and location for indexing.
func (m *Photo) MapKey() string {
return MapKey(m.TakenAt, m.CellID)
}
// Stack merges the photo with identical ones.
func (m *Photo) Stack() (identical Photos, err error) {
if err := Db().
Where("id <> ?", m.ID).
Where("taken_at = ?", m.TakenAt).
Where("cell_id = ?", m.CellID).
Where("camera_serial = ?", m.CameraSerial).
Where("photo_single = 0").
Find(&identical).Error; err != nil {
return identical, err
}
for _, photo := range identical {
if err := UnscopedDb().Model(File{}).Where("photo_id = ?", photo.ID).Updates(File{PhotoID: m.ID, PhotoUID: m.PhotoUID}).Error; err != nil {
return identical, err
}
UnscopedDb().Model(PhotoKeyword{}).Where("photo_id = ?", photo.ID).Updates(PhotoKeyword{PhotoID: m.ID})
UnscopedDb().Model(PhotoLabel{}).Where("photo_id = ?", photo.ID).Updates(PhotoLabel{PhotoID: m.ID})
UnscopedDb().Model(PhotoAlbum{}).Where("photo_uid = ?", photo.PhotoUID).Updates(PhotoAlbum{PhotoUID: m.PhotoUID})
if err := photo.Updates(map[string]interface{}{"DeletedAt": Timestamp(), "PhotoQuality": -1}); err != nil {
return identical, err
}
}
_, err = m.Optimize()
return identical, err
}

View File

@@ -1,7 +1,6 @@
package form
type IndexOptions struct {
Path string `json:"path"`
Convert bool `json:"convert"`
Rescan bool `json:"rescan"`
Path string `json:"path"`
Rescan bool `json:"rescan"`
}

View File

@@ -4,6 +4,8 @@ import (
"math"
"time"
"github.com/photoprism/photoprism/pkg/s2"
"github.com/photoprism/photoprism/pkg/rnd"
)
@@ -115,3 +117,8 @@ func (data Data) ActualHeight() int {
return data.Height
}
// CellID returns the S2 cell ID.
func (data Data) CellID() string {
return s2.PrefixedToken(float64(data.Lat), float64(data.Lng))
}

View File

@@ -16,7 +16,7 @@ type Files struct {
mutex sync.RWMutex
}
// NewFiles returns a new Files instance pointer.
// NewFiles returns a new Files instance.
func NewFiles() *Files {
m := &Files{
files: make(query.FileMap),
@@ -42,7 +42,7 @@ func (m *Files) Init() error {
files, err := query.IndexedFiles()
if err != nil {
return fmt.Errorf("%s (query indexed files)", err.Error())
return fmt.Errorf("%s (find indexed files)", err.Error())
} else {
m.files = files
m.count = len(files)

View File

@@ -16,7 +16,7 @@ func TestNewImport(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
imp := NewImport(conf, ind, convert)
assert.IsType(t, &Import{}, imp)
@@ -31,7 +31,7 @@ func TestImport_DestinationFilename(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
imp := NewImport(conf, ind, convert)
@@ -63,7 +63,7 @@ func TestImport_Start(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
imp := NewImport(conf, ind, convert)

View File

@@ -9,6 +9,8 @@ import (
"strings"
"sync"
"github.com/photoprism/photoprism/internal/query"
"github.com/karrick/godirwalk"
"github.com/photoprism/photoprism/internal/classify"
"github.com/photoprism/photoprism/internal/config"
@@ -27,16 +29,18 @@ type Index struct {
nsfwDetector *nsfw.Detector
convert *Convert
files *Files
photos *Photos
}
// NewIndex returns a new indexer and expects its dependencies as arguments.
func NewIndex(conf *config.Config, tensorFlow *classify.TensorFlow, nsfwDetector *nsfw.Detector, convert *Convert, files *Files) *Index {
func NewIndex(conf *config.Config, tensorFlow *classify.TensorFlow, nsfwDetector *nsfw.Detector, convert *Convert, files *Files, photos *Photos) *Index {
i := &Index{
conf: conf,
tensorFlow: tensorFlow,
nsfwDetector: nsfwDetector,
convert: convert,
files: files,
photos: photos,
}
return i
@@ -217,6 +221,12 @@ func (ind *Index) Start(opt IndexOptions) fs.Done {
log.Error(err.Error())
}
if opt.Stack {
if err := ind.StackIdenticalPhotos(); err != nil {
log.Errorf("index: %s", err)
}
}
if filesIndexed > 0 {
if err := entity.UpdatePhotoCounts(); err != nil {
log.Errorf("index: %s", err)
@@ -230,8 +240,29 @@ func (ind *Index) Start(opt IndexOptions) fs.Done {
return done
}
// StackIdenticalPhotos stacks files that belong to the same photo.
func (ind *Index) StackIdenticalPhotos() error {
photos, err := query.IdenticalPhotos()
if err != nil {
return err
}
for _, photo := range photos {
if merged, err := photo.Stack(); err != nil {
log.Errorf("index: %s", err)
} else {
log.Infof("index: merged photo uid %s with %s", photo.PhotoUID, merged.UIDs())
event.EntitiesUpdated("photos", []entity.Photo{photo})
event.EntitiesDeleted("photos", merged.UIDs())
}
}
return nil
}
// File indexes a single file and returns the result.
func (ind *Index) File(name string) (result IndexResult) {
func (ind *Index) SingleFile(name string) (result IndexResult) {
file, err := NewMediaFile(name)
if err != nil {
@@ -250,5 +281,5 @@ func (ind *Index) File(name string) (result IndexResult) {
return result
}
return IndexRelated(related, ind, IndexOptionsAll())
return IndexRelated(related, ind, IndexOptionsSingle())
}

View File

@@ -160,22 +160,25 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
if !fileExists {
photoQuery = entity.UnscopedDb().First(&photo, "photo_path = ? AND photo_name = ?", filePath, fileBase)
// Try to find existing photo by exact time and location.
if photoQuery.Error != nil && m.MetaData().HasTimeAndPlace() {
metaData = m.MetaData()
photoQuery = entity.UnscopedDb().First(&photo, "photo_lat = ? AND photo_lng = ? AND taken_at = ? AND camera_serial = ?", metaData.Lat, metaData.Lng, metaData.TakenAt, metaData.CameraSerial)
// Add file to existing photo (file stack)?
if o.Stack {
// Try to find existing photo by exact time and location.
if photoQuery.Error != nil && m.MetaData().HasTimeAndPlace() {
metaData = m.MetaData()
photoQuery = entity.UnscopedDb().First(&photo, "photo_lat = ? AND photo_lng = ? AND taken_at = ? AND camera_serial = ?", metaData.Lat, metaData.Lng, metaData.TakenAt, metaData.CameraSerial)
if photoQuery.Error == nil {
fileStacked = true
if photoQuery.Error == nil {
fileStacked = true
}
}
}
// Try to find existing photo by unique image id.
if photoQuery.Error != nil && m.MetaData().HasDocumentID() {
photoQuery = entity.UnscopedDb().First(&photo, "uuid = ?", m.MetaData().DocumentID)
// Try to find existing photo by unique image id.
if photoQuery.Error != nil && m.MetaData().HasDocumentID() {
photoQuery = entity.UnscopedDb().First(&photo, "uuid = ?", m.MetaData().DocumentID)
if photoQuery.Error == nil {
fileStacked = true
if photoQuery.Error == nil {
fileStacked = true
}
}
}
} else {
@@ -210,6 +213,7 @@ func (ind *Index) MediaFile(m *MediaFile, o IndexOptions, originalName string) (
// Try to recover photo metadata from backup if not exists.
if !photoExists {
photo.PhotoQuality = -1
photo.PhotoSingle = !o.Stack
if yamlName := fs.TypeYaml.FindFirst(m.FileName(), []string{Config().SidecarPath(), fs.HiddenPath}, Config().OriginalsPath(), stripSequence); yamlName != "" {
if err := photo.LoadFromYaml(yamlName); err != nil {

View File

@@ -23,7 +23,7 @@ func TestIndex_MediaFile(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
indexOpt := IndexOptionsAll()
mediaFile, err := NewMediaFile(conf.ExamplesPath() + "/blue-go-video.mp4")
if err != nil {
@@ -44,7 +44,7 @@ func TestIndex_MediaFile(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
indexOpt := IndexOptionsAll()
result := ind.MediaFile(nil, indexOpt, "blue-go-video.mp4")

View File

@@ -4,6 +4,7 @@ type IndexOptions struct {
Path string
Rescan bool
Convert bool
Stack bool
}
func (o *IndexOptions) SkipUnchanged() bool {
@@ -16,6 +17,19 @@ func IndexOptionsAll() IndexOptions {
Path: "/",
Rescan: true,
Convert: true,
Stack: true,
}
return result
}
// IndexOptionsSingle returns new index options for unstacked, single files.
func IndexOptionsSingle() IndexOptions {
result := IndexOptions{
Path: "/",
Rescan: true,
Convert: true,
Stack: false,
}
return result

View File

@@ -54,7 +54,7 @@ func TestIndexRelated(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
opt := IndexOptionsAll()
result := IndexRelated(related, ind, opt)

View File

@@ -23,7 +23,7 @@ func TestIndex_Start(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
imp := NewImport(conf, ind, convert)
opt := ImportOptionsMove(conf.ImportPath())
@@ -48,8 +48,8 @@ func TestIndex_File(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
err := ind.File("xxx")
err := ind.SingleFile("xxx")
assert.Equal(t, IndexFailed, err.Status)
}

View File

@@ -0,0 +1,68 @@
package photoprism
import (
"fmt"
"sync"
"time"
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/internal/query"
)
// Photos represents photo id lookup table, sorted by date and S2 cell id.
type Photos struct {
count int
photos query.PhotoMap
mutex sync.RWMutex
}
// NewPhotos returns a new Photos instance.
func NewPhotos() *Photos {
m := &Photos{
photos: make(query.PhotoMap),
}
return m
}
// Init fetches the list from the database once.
func (m *Photos) Init() error {
m.mutex.Lock()
defer m.mutex.Unlock()
if len(m.photos) > 0 {
m.count = len(m.photos)
return nil
}
photos, err := query.IndexedPhotos()
if err != nil {
return fmt.Errorf("%s (find indexed photos)", err.Error())
} else {
m.photos = photos
m.count = len(photos)
return nil
}
}
// Remove a photo from the lookup table.
func (m *Photos) Remove(takenAt time.Time, cellId string) {
key := entity.MapKey(takenAt, cellId)
m.mutex.Lock()
defer m.mutex.Unlock()
delete(m.photos, key)
}
// Find returns the photo ID for a time and cell id.
func (m *Photos) Find(takenAt time.Time, cellId string) uint {
key := entity.MapKey(takenAt, cellId)
m.mutex.Lock()
defer m.mutex.Unlock()
return m.photos[key]
}

View File

@@ -0,0 +1,13 @@
package photoprism
import (
"testing"
)
func TestPhotos_Init(t *testing.T) {
photos := NewPhotos()
if err := photos.Init(); err != nil {
t.Fatal(err)
}
}

View File

@@ -32,7 +32,7 @@ func TestResample_Start(t *testing.T) {
nd := nsfw.New(conf.NSFWModelPath())
convert := NewConvert(conf)
ind := NewIndex(conf, tf, nd, convert, NewFiles())
ind := NewIndex(conf, tf, nd, convert, NewFiles(), NewPhotos())
imp := NewImport(conf, ind, convert)
opt := ImportOptionsMove(conf.ImportPath())

View File

@@ -109,3 +109,22 @@ func PhotosCheck(limit int, offset int) (entities entity.Photos, err error) {
return entities, err
}
// IdenticalPhotos returns photos sharing the same exact time, location and camera serial.
func IdenticalPhotos() (entities entity.Photos, err error) {
err = Db().Table("photos").
Select("photos.*").
Joins(`JOIN photos dup ON photos.id < dup.id
AND photos.photo_lat = dup.photo_lat
AND photos.photo_lng = dup.photo_lng
AND photos.taken_at = dup.taken_at
AND photos.camera_serial = dup.camera_serial`).
Where(`photos.photo_single = 0
AND dup.photo_single = 0
AND photos.deleted_at IS NULL
AND dup.deleted_at IS NULL`).
Group("photos.id").
Find(&entities).Error
return entities, err
}

View File

@@ -0,0 +1,32 @@
package query
import (
"path"
"strconv"
"time"
)
type PhotoMap map[string]uint
// IndexedPhotos returns a map of already indexed files with their mod time unix timestamp as value.
func IndexedPhotos() (result PhotoMap, err error) {
result = make(PhotoMap)
type Photo struct {
ID uint
TakenAt time.Time
CellID string
}
var rows []Photo
if err := UnscopedDb().Raw("SELECT id, taken_at, cell_id FROM photos WHERE deleted_at IS NULL").Scan(&rows).Error; err != nil {
return result, err
}
for _, row := range rows {
result[path.Join(strconv.FormatInt(row.TakenAt.Unix(), 36), row.CellID)] = row.ID
}
return result, err
}

View File

@@ -0,0 +1,15 @@
package query
import (
"testing"
)
func TestIndexedPhotos(t *testing.T) {
result, err := IndexedPhotos()
if err != nil {
t.Fatal(err)
}
t.Logf("INDEXED Photos: %#v", result)
}

View File

@@ -32,6 +32,7 @@ type PhotoResult struct {
PhotoDay int `json:"Day"`
PhotoCountry string `json:"Country"`
PhotoFavorite bool `json:"Favorite"`
PhotoSingle bool `json:"Single"`
PhotoPrivate bool `json:"Private"`
PhotoIso int `json:"Iso"`
PhotoFocalLength int `json:"FocalLength"`

View File

@@ -78,3 +78,14 @@ func TestPhotosCheck(t *testing.T) {
}
assert.IsType(t, entity.Photos{}, result)
}
func TestIdenticalPhotos(t *testing.T) {
result, err := IdenticalPhotos()
if err != nil {
t.Fatal(err)
}
assert.IsType(t, entity.Photos{}, result)
// t.Logf("%+v", result)
}

View File

@@ -9,7 +9,7 @@ import (
var onceIndex sync.Once
func initIndex() {
services.Index = photoprism.NewIndex(Config(), Classify(), NsfwDetector(), Convert(), Files())
services.Index = photoprism.NewIndex(Config(), Classify(), NsfwDetector(), Convert(), Files(), Photos())
}
func Index() *photoprism.Index {

View File

@@ -0,0 +1,19 @@
package service
import (
"sync"
"github.com/photoprism/photoprism/internal/photoprism"
)
var oncePhotos sync.Once
func initPhotos() {
services.Photos = photoprism.NewPhotos()
}
func Photos() *photoprism.Photos {
oncePhotos.Do(initPhotos)
return services.Photos
}

View File

@@ -19,6 +19,7 @@ var services struct {
Classify *classify.TensorFlow
Convert *photoprism.Convert
Files *photoprism.Files
Photos *photoprism.Photos
Import *photoprism.Import
Index *photoprism.Index
Moments *photoprism.Moments