bisync: refactor normalization code, fix deltas - fixes #7270

Refactored the case / unicode normalization logic to be much more efficient,
 and fix the last outstanding issue from #7270. Before this change, we were
 doing lots of for loops and re-normalizing strings we had already normalized
 earlier. Now, we leave the normalizing entirely to March and avoid
 re-transforming later, which seems to make a large difference in terms of
 performance.
This commit is contained in:
nielash
2023-11-09 05:04:33 -05:00
parent 58fd6d7b94
commit 98f539de8f
11 changed files with 242 additions and 153 deletions

View File

@@ -16,6 +16,7 @@ import (
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/operations"
"golang.org/x/text/unicode/norm"
)
// delta
@@ -240,6 +241,9 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
ctxMove := b.opt.setDryRun(ctx)
// update AliasMap for deleted files, as march does not know about them
b.updateAliases(ctx, ds1, ds2)
// efficient isDir check
// we load the listing just once and store only the dirs
dirs1, dirs1Err := b.listDirsOnly(1)
@@ -270,14 +274,24 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
ctxCheck, filterCheck := filter.AddConfig(ctxNew)
for _, file := range ds1.sort() {
alias := b.aliases.Alias(file)
d1 := ds1.deltas[file]
if d1.is(deltaOther) {
d2 := ds2.deltas[file]
d2, in2 := ds2.deltas[file]
if !in2 && file != alias {
d2 = ds2.deltas[alias]
}
if d2.is(deltaOther) {
if err := filterCheck.AddFile(file); err != nil {
fs.Debugf(nil, "Non-critical error adding file to list of potential conflicts to check: %s", err)
} else {
fs.Debugf(nil, "Added file to list of potential conflicts to check: %s", file)
checkit := func(filename string) {
if err := filterCheck.AddFile(filename); err != nil {
fs.Debugf(nil, "Non-critical error adding file to list of potential conflicts to check: %s", err)
} else {
fs.Debugf(nil, "Added file to list of potential conflicts to check: %s", filename)
}
}
checkit(file)
if file != alias {
checkit(alias)
}
}
}
@@ -287,12 +301,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
matches, err := b.checkconflicts(ctxCheck, filterCheck, b.fs1, b.fs2)
for _, file := range ds1.sort() {
alias := b.aliases.Alias(file)
p1 := path1 + file
p2 := path2 + file
p2 := path2 + alias
d1 := ds1.deltas[file]
if d1.is(deltaOther) {
d2, in2 := ds2.deltas[file]
// try looking under alternate name
if !in2 && file != alias {
d2, in2 = ds2.deltas[alias]
}
if !in2 {
b.indent("Path1", p2, "Queue copy to Path2")
copy1to2.Add(file)
@@ -304,15 +323,26 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
b.indent("!WARNING", file, "New or changed in both paths")
//if files are identical, leave them alone instead of renaming
if dirs1.has(file) && dirs2.has(file) {
if (dirs1.has(file) || dirs1.has(alias)) && (dirs2.has(file) || dirs2.has(alias)) {
fs.Debugf(nil, "This is a directory, not a file. Skipping equality check and will not rename: %s", file)
ls1.getPut(file, skippedDirs1)
ls2.getPut(file, skippedDirs2)
} else {
equal := matches.Has(file)
if !equal {
equal = matches.Has(alias)
}
if equal {
fs.Infof(nil, "Files are equal! Skipping: %s", file)
renameSkipped.Add(file)
if ciCheck.FixCase && file != alias {
// the content is equal but filename still needs to be FixCase'd, so copy1to2
// the Path1 version is deemed "correct" in this scenario
fs.Infof(alias, "Files are equal but will copy anyway to fix case to %s", file)
copy1to2.Add(file)
} else {
fs.Infof(nil, "Files are equal! Skipping: %s", file)
renameSkipped.Add(file)
renameSkipped.Add(alias)
}
} else {
fs.Debugf(nil, "Files are NOT equal: %s", file)
b.indent("!Path1", p1+"..path1", "Renaming Path1 copy")
@@ -330,17 +360,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
copy1to2.Add(file + "..path1")
b.indent("!Path2", p2+"..path2", "Renaming Path2 copy")
if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, file+"..path2", file); err != nil {
err = fmt.Errorf("path2 rename failed for %s: %w", file, err)
if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, alias+"..path2", alias); err != nil {
err = fmt.Errorf("path2 rename failed for %s: %w", alias, err)
return
}
if b.opt.DryRun {
renameSkipped.Add(file)
renameSkipped.Add(alias)
} else {
renamed2.Add(file)
renamed2.Add(alias)
}
b.indent("!Path2", p1+"..path2", "Queue copy to Path1")
copy2to1.Add(file + "..path2")
copy2to1.Add(alias + "..path2")
}
}
handled.Add(file)
@@ -348,6 +378,15 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
} else {
// Path1 deleted
d2, in2 := ds2.deltas[file]
// try looking under alternate name
fs.Debugf(file, "alias: %s, in2: %v", alias, in2)
if !in2 && file != alias {
fs.Debugf(file, "looking for alias: %s", alias)
d2, in2 = ds2.deltas[alias]
if in2 {
fs.Debugf(file, "detected alias: %s", alias)
}
}
if !in2 {
b.indent("Path2", p2, "Queue delete")
delete2.Add(file)
@@ -359,15 +398,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
} else if d2.is(deltaDeleted) {
handled.Add(file)
deletedonboth.Add(file)
deletedonboth.Add(alias)
}
}
}
for _, file := range ds2.sort() {
p1 := path1 + file
alias := b.aliases.Alias(file)
p1 := path1 + alias
d2 := ds2.deltas[file]
if handled.Has(file) {
if handled.Has(file) || handled.Has(alias) {
continue
}
if d2.is(deltaOther) {
@@ -381,17 +422,11 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
}
}
// find alternate names according to normalization settings
altNames1to2 := bilib.Names{}
altNames2to1 := bilib.Names{}
b.findAltNames(ctx, b.fs1, copy2to1, b.newListing1, altNames2to1)
b.findAltNames(ctx, b.fs2, copy1to2, b.newListing2, altNames1to2)
// Do the batch operation
if copy2to1.NotEmpty() {
changes1 = true
b.indent("Path2", "Path1", "Do queued copies to")
results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1", altNames2to1)
results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1")
if err != nil {
return
}
@@ -403,7 +438,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
if copy1to2.NotEmpty() {
changes2 = true
b.indent("Path1", "Path2", "Do queued copies to")
results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2", altNames1to2)
results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2")
if err != nil {
return
}
@@ -458,3 +493,65 @@ func (ds *deltaSet) excessDeletes() bool {
maxDelete, ds.deleted, ds.oldCount, ds.msg, quotePath(bilib.FsPath(ds.fs)))
return true
}
// normally we build the AliasMap from march results,
// however, march does not know about deleted files, so need to manually check them for aliases
func (b *bisyncRun) updateAliases(ctx context.Context, ds1, ds2 *deltaSet) {
ci := fs.GetConfig(ctx)
// skip if not needed
if ci.NoUnicodeNormalization && !ci.IgnoreCaseSync && !b.fs1.Features().CaseInsensitive && !b.fs2.Features().CaseInsensitive {
return
}
if ds1.deleted < 1 && ds2.deleted < 1 {
return
}
fs.Debugf(nil, "Updating AliasMap")
transform := func(s string) string {
if !ci.NoUnicodeNormalization {
s = norm.NFC.String(s)
}
// note: march only checks the dest, but we check both here
if ci.IgnoreCaseSync || b.fs1.Features().CaseInsensitive || b.fs2.Features().CaseInsensitive {
s = strings.ToLower(s)
}
return s
}
delMap1 := map[string]string{} // [transformedname]originalname
delMap2 := map[string]string{} // [transformedname]originalname
fullMap1 := map[string]string{} // [transformedname]originalname
fullMap2 := map[string]string{} // [transformedname]originalname
for _, name := range ls1.list {
fullMap1[transform(name)] = name
}
for _, name := range ls2.list {
fullMap2[transform(name)] = name
}
addDeletes := func(ds *deltaSet, delMap, fullMap map[string]string) {
for _, file := range ds.sort() {
d := ds.deltas[file]
if d.is(deltaDeleted) {
delMap[transform(file)] = file
fullMap[transform(file)] = file
}
}
}
addDeletes(ds1, delMap1, fullMap1)
addDeletes(ds2, delMap2, fullMap2)
addAliases := func(delMap, fullMap map[string]string) {
for transformedname, name := range delMap {
matchedName, found := fullMap[transformedname]
if found && name != matchedName {
fs.Debugf(name, "adding alias %s", matchedName)
b.aliases.Add(name, matchedName)
}
}
}
addAliases(delMap1, fullMap2)
addAliases(delMap2, fullMap1)
}