diff --git a/docs/content/docs.md b/docs/content/docs.md index dd27f9d44..4032a2633 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -852,6 +852,32 @@ the binary units, e.g. 1, 2\*\*10, 2\*\*20, 2\*\*30 respectively. See also [--human-readable](#human-readable). +### --assume-listings-sorted + +This flag can be used when the source and destination backends are +guaranteed to return the items in the same sorted order and in that +case it will speed up the sync. + +Not all backends are guaranteed to return sorted entries (eg local) +but s3 should, so an s3 to s3 sync could benefit from this flag. + +If rclone finds an out of order directory entry then it will cancel +the sync with the error: + +```console +out of order listing in source (remote:dir) +``` + +In this case you should remove the `--assume-listings-sorted` flag. + +If you are using `--assume-listings-sorted` then rclone will assume +`--no-unicode-normalization` and it will compare file names in a case +sensitive way. + +Normally sorting directory entries is not a bottleneck, but it can +become so with syncs of millions of items in a single directory as the +sync will not start until the directory listing is complete. + ## Main options ### --backup-dir string diff --git a/fs/config.go b/fs/config.go index ce0a1df9e..d7a004d17 100644 --- a/fs/config.go +++ b/fs/config.go @@ -566,6 +566,12 @@ var ConfigOptionsInfo = Options{{ Default: "", Help: "HTTP proxy URL.", Groups: "Networking", +}, { + Name: "assume_listings_sorted", + Default: false, + Advanced: true, + Help: "If set will not sort listings. If listings aren't sorted the sync may go wrong.", + Groups: "Copy", }} // ConfigInfo is filesystem config options @@ -680,6 +686,7 @@ type ConfigInfo struct { MaxConnections int `config:"max_connections"` NameTransform []string `config:"name_transform"` HTTPProxy string `config:"http_proxy"` + AssumeListingsSorted bool `config:"assume_listings_sorted"` } func init() { diff --git a/fs/list/sorter.go b/fs/list/sorter.go index b2674b294..dab0a5dfd 100644 --- a/fs/list/sorter.go +++ b/fs/list/sorter.go @@ -45,6 +45,7 @@ type Sorter struct { keyFn KeyFn // transform an entry into a sort key cutoff int // number of entries above which we start extsort extSort bool // true if we are ext sorting + noSort bool // true if we aren't sorting inputChan chan string // for sending data to the ext sort outputChan <-chan string // for receiving data from the ext sort errChan <-chan error // for getting errors from the ext sort @@ -78,6 +79,7 @@ func NewSorter(ctx context.Context, f NewObjecter, callback fs.ListRCallback, ke keyFn: keyFn, cutoff: ci.ListCutoff, errs: errcount.New(), + noSort: ci.AssumeListingsSorted, }, nil } @@ -172,6 +174,9 @@ func (ls *Sorter) startExtSort() (err error) { // // Safe to call from concurrent go routines func (ls *Sorter) Add(entries fs.DirEntries) error { + if ls.noSort { + return ls.callback(entries) + } ls.mu.Lock() defer ls.mu.Unlock() if ls.extSort { @@ -267,6 +272,9 @@ func (lh *listHelper) Flush() error { // Send the sorted entries to the callback. func (ls *Sorter) Send() (err error) { + if ls.noSort { + return nil + } ls.mu.Lock() defer ls.mu.Unlock() diff --git a/fs/list/sorter_test.go b/fs/list/sorter_test.go index b8db3725e..51bbed46e 100644 --- a/fs/list/sorter_test.go +++ b/fs/list/sorter_test.go @@ -46,6 +46,46 @@ func TestSorter(t *testing.T) { assert.Equal(t, fs.DirEntries(nil), ls.entries) } +func TestSorterAssumeSorted(t *testing.T) { + ctx, ci := fs.AddConfig(context.Background()) + ci.AssumeListingsSorted = true + + gotEntry := 0 + wantEntries := fs.DirEntries{ + mockdir.New("c"), + mockobject.Object("C"), + mockdir.New("b"), + mockobject.Object("B"), + mockdir.New("a"), + mockobject.Object("A"), + } + callback := func(entries fs.DirEntries) error { + for _, entry := range entries { + require.Equal(t, wantEntries[gotEntry], entry) + gotEntry++ + } + return nil + } + ls, err := NewSorter(ctx, nil, callback, nil) + require.NoError(t, err) + + // Test Add + require.NoError(t, ls.Add(wantEntries[0:2])) + require.NoError(t, ls.Add(wantEntries[2:6])) + assert.Equal(t, 6, gotEntry) + assert.Equal(t, fs.DirEntries(nil), ls.entries) + + // Test Send + err = ls.Send() + require.NoError(t, err) + assert.Equal(t, 6, gotEntry) + + // Test Cleanup + ls.CleanUp() + assert.Equal(t, 6, gotEntry) + assert.Equal(t, fs.DirEntries(nil), ls.entries) +} + func TestSorterIdentity(t *testing.T) { ctx := context.Background() cmpFn := func(a, b fs.DirEntry) int { diff --git a/fs/march/march.go b/fs/march/march.go index 841ca4cc7..84354777d 100644 --- a/fs/march/march.go +++ b/fs/march/march.go @@ -13,6 +13,7 @@ import ( "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/dirtree" "github.com/rclone/rclone/fs/filter" + "github.com/rclone/rclone/fs/fserrors" "github.com/rclone/rclone/fs/list" "github.com/rclone/rclone/fs/walk" "github.com/rclone/rclone/lib/transform" @@ -330,7 +331,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO continue } else if srcName < srcPrevName { // this should never happen since we sort the listings - panic("Out of order listing in source") + // however the user may be using the --assume-listings-sorted flag + return fserrors.FatalError(fmt.Errorf("out of order listing in source (%v)", src.Fs())) } } if dst != nil && dstPrev != nil { @@ -340,7 +342,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO continue } else if dstName < dstPrevName { // this should never happen since we sort the listings - panic("Out of order listing in destination") + // however the user may be using the --assume-listings-sorted flag + return fserrors.FatalError(fmt.Errorf("out of order listing in destination (%v)", dst.Fs())) } } switch {