From c5a3e86df81442988f08f91fe97dd987fe6f8396 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sat, 1 Feb 2025 11:01:05 +0000 Subject: [PATCH] operations: fix partial name collisions for non --inplace copies In this commit: c63f1865f36058e1 operations: copy: generate stable partial suffix We made the partial suffix for non inplace copies stable. This was a hash based off the file fingerprint. However, given a directory of files which have the same fingerprint the partial suffix collides. On some backends (eg the local backend) the fingerprint is just the size and modification time so files with different contents can collide. The effect of collisions was hash failures on copy when using --transfers > 1. These copies invariably retried successfully which probably explains why this bug hasn't been reported. This fixes the problem by adding the file name to the hash. It also makes sure the hash is always represented as 8 hex bytes for consistency. --- fs/operations/copy.go | 9 ++++++--- fs/operations/copy_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/fs/operations/copy.go b/fs/operations/copy.go index 2d42fac07..f7871d292 100644 --- a/fs/operations/copy.go +++ b/fs/operations/copy.go @@ -99,10 +99,13 @@ func (c *copy) checkPartial(ctx context.Context) (remoteForCopy string, inplace // Avoid making the leaf name longer if it's already lengthy to avoid // trouble with file name length limits. - // generate a stable random suffix by hashing the fingerprint - hash := crc32.ChecksumIEEE([]byte(fs.Fingerprint(ctx, c.src, true))) + // generate a stable random suffix by hashing the filename and fingerprint + hasher := crc32.New(crc32.IEEETable) + _, _ = hasher.Write([]byte(c.remote)) + _, _ = hasher.Write([]byte(fs.Fingerprint(ctx, c.src, true))) + hash := hasher.Sum32() - suffix := fmt.Sprintf(".%x%s", hash, c.ci.PartialSuffix) + suffix := fmt.Sprintf(".%08x%s", hash, c.ci.PartialSuffix) base := path.Base(remoteForCopy) if len(base) > 100 { remoteForCopy = TruncateString(remoteForCopy, len(remoteForCopy)-len(suffix)) + suffix diff --git a/fs/operations/copy_test.go b/fs/operations/copy_test.go index 1e8f3ae5e..e717f3bef 100644 --- a/fs/operations/copy_test.go +++ b/fs/operations/copy_test.go @@ -15,6 +15,7 @@ import ( "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/accounting" "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/fs/sync" "github.com/rclone/rclone/fstest" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -427,6 +428,32 @@ func TestCopyLongFileName(t *testing.T) { r.CheckRemoteItems(t, file2) } +func TestCopyLongFileNameCollision(t *testing.T) { + ctx := context.Background() + ctx, ci := fs.AddConfig(ctx) + r := fstest.NewRun(t) + + if !r.Fremote.Features().PartialUploads { + t.Skip("Partial uploads not supported") + } + + ci.Inplace = false + ci.Transfers = 4 + + // Write a lot of identical files with long names + files := make([]fstest.Item, 10) + namePrefix := strings.Repeat("file1", 30) + for i := range files { + files[i] = r.WriteFile(fmt.Sprintf("%s%02d", namePrefix, i), "file1 contents", t1) + } + r.CheckLocalItems(t, files...) + + err := sync.CopyDir(ctx, r.Fremote, r.Flocal, false) + require.NoError(t, err) + r.CheckLocalItems(t, files...) + r.CheckRemoteItems(t, files...) +} + func TestCopyFileMaxTransfer(t *testing.T) { ctx := context.Background() ctx, ci := fs.AddConfig(ctx)