mirror of
https://github.com/rclone/rclone.git
synced 2025-12-11 22:14:05 +01:00
bisync: refactor normalization code, fix deltas - fixes #7270
Refactored the case / unicode normalization logic to be much more efficient, and fix the last outstanding issue from #7270. Before this change, we were doing lots of for loops and re-normalizing strings we had already normalized earlier. Now, we leave the normalizing entirely to March and avoid re-transforming later, which seems to make a large difference in terms of performance.
This commit is contained in:
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/rclone/rclone/fs/accounting"
|
||||
"github.com/rclone/rclone/fs/filter"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// delta
|
||||
@@ -240,6 +241,9 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
|
||||
ctxMove := b.opt.setDryRun(ctx)
|
||||
|
||||
// update AliasMap for deleted files, as march does not know about them
|
||||
b.updateAliases(ctx, ds1, ds2)
|
||||
|
||||
// efficient isDir check
|
||||
// we load the listing just once and store only the dirs
|
||||
dirs1, dirs1Err := b.listDirsOnly(1)
|
||||
@@ -270,14 +274,24 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
ctxCheck, filterCheck := filter.AddConfig(ctxNew)
|
||||
|
||||
for _, file := range ds1.sort() {
|
||||
alias := b.aliases.Alias(file)
|
||||
d1 := ds1.deltas[file]
|
||||
if d1.is(deltaOther) {
|
||||
d2 := ds2.deltas[file]
|
||||
d2, in2 := ds2.deltas[file]
|
||||
if !in2 && file != alias {
|
||||
d2 = ds2.deltas[alias]
|
||||
}
|
||||
if d2.is(deltaOther) {
|
||||
if err := filterCheck.AddFile(file); err != nil {
|
||||
fs.Debugf(nil, "Non-critical error adding file to list of potential conflicts to check: %s", err)
|
||||
} else {
|
||||
fs.Debugf(nil, "Added file to list of potential conflicts to check: %s", file)
|
||||
checkit := func(filename string) {
|
||||
if err := filterCheck.AddFile(filename); err != nil {
|
||||
fs.Debugf(nil, "Non-critical error adding file to list of potential conflicts to check: %s", err)
|
||||
} else {
|
||||
fs.Debugf(nil, "Added file to list of potential conflicts to check: %s", filename)
|
||||
}
|
||||
}
|
||||
checkit(file)
|
||||
if file != alias {
|
||||
checkit(alias)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -287,12 +301,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
matches, err := b.checkconflicts(ctxCheck, filterCheck, b.fs1, b.fs2)
|
||||
|
||||
for _, file := range ds1.sort() {
|
||||
alias := b.aliases.Alias(file)
|
||||
p1 := path1 + file
|
||||
p2 := path2 + file
|
||||
p2 := path2 + alias
|
||||
d1 := ds1.deltas[file]
|
||||
|
||||
if d1.is(deltaOther) {
|
||||
d2, in2 := ds2.deltas[file]
|
||||
// try looking under alternate name
|
||||
if !in2 && file != alias {
|
||||
d2, in2 = ds2.deltas[alias]
|
||||
}
|
||||
if !in2 {
|
||||
b.indent("Path1", p2, "Queue copy to Path2")
|
||||
copy1to2.Add(file)
|
||||
@@ -304,15 +323,26 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
b.indent("!WARNING", file, "New or changed in both paths")
|
||||
|
||||
//if files are identical, leave them alone instead of renaming
|
||||
if dirs1.has(file) && dirs2.has(file) {
|
||||
if (dirs1.has(file) || dirs1.has(alias)) && (dirs2.has(file) || dirs2.has(alias)) {
|
||||
fs.Debugf(nil, "This is a directory, not a file. Skipping equality check and will not rename: %s", file)
|
||||
ls1.getPut(file, skippedDirs1)
|
||||
ls2.getPut(file, skippedDirs2)
|
||||
} else {
|
||||
equal := matches.Has(file)
|
||||
if !equal {
|
||||
equal = matches.Has(alias)
|
||||
}
|
||||
if equal {
|
||||
fs.Infof(nil, "Files are equal! Skipping: %s", file)
|
||||
renameSkipped.Add(file)
|
||||
if ciCheck.FixCase && file != alias {
|
||||
// the content is equal but filename still needs to be FixCase'd, so copy1to2
|
||||
// the Path1 version is deemed "correct" in this scenario
|
||||
fs.Infof(alias, "Files are equal but will copy anyway to fix case to %s", file)
|
||||
copy1to2.Add(file)
|
||||
} else {
|
||||
fs.Infof(nil, "Files are equal! Skipping: %s", file)
|
||||
renameSkipped.Add(file)
|
||||
renameSkipped.Add(alias)
|
||||
}
|
||||
} else {
|
||||
fs.Debugf(nil, "Files are NOT equal: %s", file)
|
||||
b.indent("!Path1", p1+"..path1", "Renaming Path1 copy")
|
||||
@@ -330,17 +360,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
copy1to2.Add(file + "..path1")
|
||||
|
||||
b.indent("!Path2", p2+"..path2", "Renaming Path2 copy")
|
||||
if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, file+"..path2", file); err != nil {
|
||||
err = fmt.Errorf("path2 rename failed for %s: %w", file, err)
|
||||
if err = operations.MoveFile(ctxMove, b.fs2, b.fs2, alias+"..path2", alias); err != nil {
|
||||
err = fmt.Errorf("path2 rename failed for %s: %w", alias, err)
|
||||
return
|
||||
}
|
||||
if b.opt.DryRun {
|
||||
renameSkipped.Add(file)
|
||||
renameSkipped.Add(alias)
|
||||
} else {
|
||||
renamed2.Add(file)
|
||||
renamed2.Add(alias)
|
||||
}
|
||||
b.indent("!Path2", p1+"..path2", "Queue copy to Path1")
|
||||
copy2to1.Add(file + "..path2")
|
||||
copy2to1.Add(alias + "..path2")
|
||||
}
|
||||
}
|
||||
handled.Add(file)
|
||||
@@ -348,6 +378,15 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
} else {
|
||||
// Path1 deleted
|
||||
d2, in2 := ds2.deltas[file]
|
||||
// try looking under alternate name
|
||||
fs.Debugf(file, "alias: %s, in2: %v", alias, in2)
|
||||
if !in2 && file != alias {
|
||||
fs.Debugf(file, "looking for alias: %s", alias)
|
||||
d2, in2 = ds2.deltas[alias]
|
||||
if in2 {
|
||||
fs.Debugf(file, "detected alias: %s", alias)
|
||||
}
|
||||
}
|
||||
if !in2 {
|
||||
b.indent("Path2", p2, "Queue delete")
|
||||
delete2.Add(file)
|
||||
@@ -359,15 +398,17 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
} else if d2.is(deltaDeleted) {
|
||||
handled.Add(file)
|
||||
deletedonboth.Add(file)
|
||||
deletedonboth.Add(alias)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, file := range ds2.sort() {
|
||||
p1 := path1 + file
|
||||
alias := b.aliases.Alias(file)
|
||||
p1 := path1 + alias
|
||||
d2 := ds2.deltas[file]
|
||||
|
||||
if handled.Has(file) {
|
||||
if handled.Has(file) || handled.Has(alias) {
|
||||
continue
|
||||
}
|
||||
if d2.is(deltaOther) {
|
||||
@@ -381,17 +422,11 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
}
|
||||
}
|
||||
|
||||
// find alternate names according to normalization settings
|
||||
altNames1to2 := bilib.Names{}
|
||||
altNames2to1 := bilib.Names{}
|
||||
b.findAltNames(ctx, b.fs1, copy2to1, b.newListing1, altNames2to1)
|
||||
b.findAltNames(ctx, b.fs2, copy1to2, b.newListing2, altNames1to2)
|
||||
|
||||
// Do the batch operation
|
||||
if copy2to1.NotEmpty() {
|
||||
changes1 = true
|
||||
b.indent("Path2", "Path1", "Do queued copies to")
|
||||
results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1", altNames2to1)
|
||||
results2to1, err = b.fastCopy(ctx, b.fs2, b.fs1, copy2to1, "copy2to1")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -403,7 +438,7 @@ func (b *bisyncRun) applyDeltas(ctx context.Context, ds1, ds2 *deltaSet) (change
|
||||
if copy1to2.NotEmpty() {
|
||||
changes2 = true
|
||||
b.indent("Path1", "Path2", "Do queued copies to")
|
||||
results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2", altNames1to2)
|
||||
results1to2, err = b.fastCopy(ctx, b.fs1, b.fs2, copy1to2, "copy1to2")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -458,3 +493,65 @@ func (ds *deltaSet) excessDeletes() bool {
|
||||
maxDelete, ds.deleted, ds.oldCount, ds.msg, quotePath(bilib.FsPath(ds.fs)))
|
||||
return true
|
||||
}
|
||||
|
||||
// normally we build the AliasMap from march results,
|
||||
// however, march does not know about deleted files, so need to manually check them for aliases
|
||||
func (b *bisyncRun) updateAliases(ctx context.Context, ds1, ds2 *deltaSet) {
|
||||
ci := fs.GetConfig(ctx)
|
||||
// skip if not needed
|
||||
if ci.NoUnicodeNormalization && !ci.IgnoreCaseSync && !b.fs1.Features().CaseInsensitive && !b.fs2.Features().CaseInsensitive {
|
||||
return
|
||||
}
|
||||
if ds1.deleted < 1 && ds2.deleted < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
fs.Debugf(nil, "Updating AliasMap")
|
||||
|
||||
transform := func(s string) string {
|
||||
if !ci.NoUnicodeNormalization {
|
||||
s = norm.NFC.String(s)
|
||||
}
|
||||
// note: march only checks the dest, but we check both here
|
||||
if ci.IgnoreCaseSync || b.fs1.Features().CaseInsensitive || b.fs2.Features().CaseInsensitive {
|
||||
s = strings.ToLower(s)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
delMap1 := map[string]string{} // [transformedname]originalname
|
||||
delMap2 := map[string]string{} // [transformedname]originalname
|
||||
fullMap1 := map[string]string{} // [transformedname]originalname
|
||||
fullMap2 := map[string]string{} // [transformedname]originalname
|
||||
|
||||
for _, name := range ls1.list {
|
||||
fullMap1[transform(name)] = name
|
||||
}
|
||||
for _, name := range ls2.list {
|
||||
fullMap2[transform(name)] = name
|
||||
}
|
||||
|
||||
addDeletes := func(ds *deltaSet, delMap, fullMap map[string]string) {
|
||||
for _, file := range ds.sort() {
|
||||
d := ds.deltas[file]
|
||||
if d.is(deltaDeleted) {
|
||||
delMap[transform(file)] = file
|
||||
fullMap[transform(file)] = file
|
||||
}
|
||||
}
|
||||
}
|
||||
addDeletes(ds1, delMap1, fullMap1)
|
||||
addDeletes(ds2, delMap2, fullMap2)
|
||||
|
||||
addAliases := func(delMap, fullMap map[string]string) {
|
||||
for transformedname, name := range delMap {
|
||||
matchedName, found := fullMap[transformedname]
|
||||
if found && name != matchedName {
|
||||
fs.Debugf(name, "adding alias %s", matchedName)
|
||||
b.aliases.Add(name, matchedName)
|
||||
}
|
||||
}
|
||||
}
|
||||
addAliases(delMap1, fullMap2)
|
||||
addAliases(delMap2, fullMap1)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user