march: Implement --assume-listings-sorted to speed up sync starting
Some checks failed
build / windows (push) Has been cancelled
build / other_os (push) Has been cancelled
build / mac_amd64 (push) Has been cancelled
build / mac_arm64 (push) Has been cancelled
build / linux (push) Has been cancelled
build / go1.24 (push) Has been cancelled
build / linux_386 (push) Has been cancelled
build / lint (push) Has been cancelled
build / android-all (push) Has been cancelled
Build & Push Docker Images / Build Docker Image for linux/386 (push) Has been cancelled
Build & Push Docker Images / Build Docker Image for linux/amd64 (push) Has been cancelled
Build & Push Docker Images / Build Docker Image for linux/arm/v6 (push) Has been cancelled
Build & Push Docker Images / Build Docker Image for linux/arm/v7 (push) Has been cancelled
Build & Push Docker Images / Build Docker Image for linux/arm64 (push) Has been cancelled
Build & Push Docker Images / Merge & Push Final Docker Image (push) Has been cancelled

This can be used when the source and destination backends are
guaranteed to return the items in the same sorted order.

Fixes #5859
This commit is contained in:
Nick Craig-Wood
2025-06-30 16:19:25 +01:00
parent e7f11af1ca
commit d376838f77
5 changed files with 86 additions and 2 deletions

View File

@@ -852,6 +852,32 @@ the binary units, e.g. 1, 2\*\*10, 2\*\*20, 2\*\*30 respectively.
See also [--human-readable](#human-readable). See also [--human-readable](#human-readable).
### --assume-listings-sorted
This flag can be used when the source and destination backends are
guaranteed to return the items in the same sorted order and in that
case it will speed up the sync.
Not all backends are guaranteed to return sorted entries (eg local)
but s3 should, so an s3 to s3 sync could benefit from this flag.
If rclone finds an out of order directory entry then it will cancel
the sync with the error:
```console
out of order listing in source (remote:dir)
```
In this case you should remove the `--assume-listings-sorted` flag.
If you are using `--assume-listings-sorted` then rclone will assume
`--no-unicode-normalization` and it will compare file names in a case
sensitive way.
Normally sorting directory entries is not a bottleneck, but it can
become so with syncs of millions of items in a single directory as the
sync will not start until the directory listing is complete.
## Main options ## Main options
### --backup-dir string ### --backup-dir string

View File

@@ -566,6 +566,12 @@ var ConfigOptionsInfo = Options{{
Default: "", Default: "",
Help: "HTTP proxy URL.", Help: "HTTP proxy URL.",
Groups: "Networking", Groups: "Networking",
}, {
Name: "assume_listings_sorted",
Default: false,
Advanced: true,
Help: "If set will not sort listings. If listings aren't sorted the sync may go wrong.",
Groups: "Copy",
}} }}
// ConfigInfo is filesystem config options // ConfigInfo is filesystem config options
@@ -680,6 +686,7 @@ type ConfigInfo struct {
MaxConnections int `config:"max_connections"` MaxConnections int `config:"max_connections"`
NameTransform []string `config:"name_transform"` NameTransform []string `config:"name_transform"`
HTTPProxy string `config:"http_proxy"` HTTPProxy string `config:"http_proxy"`
AssumeListingsSorted bool `config:"assume_listings_sorted"`
} }
func init() { func init() {

View File

@@ -45,6 +45,7 @@ type Sorter struct {
keyFn KeyFn // transform an entry into a sort key keyFn KeyFn // transform an entry into a sort key
cutoff int // number of entries above which we start extsort cutoff int // number of entries above which we start extsort
extSort bool // true if we are ext sorting extSort bool // true if we are ext sorting
noSort bool // true if we aren't sorting
inputChan chan string // for sending data to the ext sort inputChan chan string // for sending data to the ext sort
outputChan <-chan string // for receiving data from the ext sort outputChan <-chan string // for receiving data from the ext sort
errChan <-chan error // for getting errors from the ext sort errChan <-chan error // for getting errors from the ext sort
@@ -78,6 +79,7 @@ func NewSorter(ctx context.Context, f NewObjecter, callback fs.ListRCallback, ke
keyFn: keyFn, keyFn: keyFn,
cutoff: ci.ListCutoff, cutoff: ci.ListCutoff,
errs: errcount.New(), errs: errcount.New(),
noSort: ci.AssumeListingsSorted,
}, nil }, nil
} }
@@ -172,6 +174,9 @@ func (ls *Sorter) startExtSort() (err error) {
// //
// Safe to call from concurrent go routines // Safe to call from concurrent go routines
func (ls *Sorter) Add(entries fs.DirEntries) error { func (ls *Sorter) Add(entries fs.DirEntries) error {
if ls.noSort {
return ls.callback(entries)
}
ls.mu.Lock() ls.mu.Lock()
defer ls.mu.Unlock() defer ls.mu.Unlock()
if ls.extSort { if ls.extSort {
@@ -267,6 +272,9 @@ func (lh *listHelper) Flush() error {
// Send the sorted entries to the callback. // Send the sorted entries to the callback.
func (ls *Sorter) Send() (err error) { func (ls *Sorter) Send() (err error) {
if ls.noSort {
return nil
}
ls.mu.Lock() ls.mu.Lock()
defer ls.mu.Unlock() defer ls.mu.Unlock()

View File

@@ -46,6 +46,46 @@ func TestSorter(t *testing.T) {
assert.Equal(t, fs.DirEntries(nil), ls.entries) assert.Equal(t, fs.DirEntries(nil), ls.entries)
} }
func TestSorterAssumeSorted(t *testing.T) {
ctx, ci := fs.AddConfig(context.Background())
ci.AssumeListingsSorted = true
gotEntry := 0
wantEntries := fs.DirEntries{
mockdir.New("c"),
mockobject.Object("C"),
mockdir.New("b"),
mockobject.Object("B"),
mockdir.New("a"),
mockobject.Object("A"),
}
callback := func(entries fs.DirEntries) error {
for _, entry := range entries {
require.Equal(t, wantEntries[gotEntry], entry)
gotEntry++
}
return nil
}
ls, err := NewSorter(ctx, nil, callback, nil)
require.NoError(t, err)
// Test Add
require.NoError(t, ls.Add(wantEntries[0:2]))
require.NoError(t, ls.Add(wantEntries[2:6]))
assert.Equal(t, 6, gotEntry)
assert.Equal(t, fs.DirEntries(nil), ls.entries)
// Test Send
err = ls.Send()
require.NoError(t, err)
assert.Equal(t, 6, gotEntry)
// Test Cleanup
ls.CleanUp()
assert.Equal(t, 6, gotEntry)
assert.Equal(t, fs.DirEntries(nil), ls.entries)
}
func TestSorterIdentity(t *testing.T) { func TestSorterIdentity(t *testing.T) {
ctx := context.Background() ctx := context.Background()
cmpFn := func(a, b fs.DirEntry) int { cmpFn := func(a, b fs.DirEntry) int {

View File

@@ -13,6 +13,7 @@ import (
"github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/dirtree" "github.com/rclone/rclone/fs/dirtree"
"github.com/rclone/rclone/fs/filter" "github.com/rclone/rclone/fs/filter"
"github.com/rclone/rclone/fs/fserrors"
"github.com/rclone/rclone/fs/list" "github.com/rclone/rclone/fs/list"
"github.com/rclone/rclone/fs/walk" "github.com/rclone/rclone/fs/walk"
"github.com/rclone/rclone/lib/transform" "github.com/rclone/rclone/lib/transform"
@@ -330,7 +331,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
continue continue
} else if srcName < srcPrevName { } else if srcName < srcPrevName {
// this should never happen since we sort the listings // this should never happen since we sort the listings
panic("Out of order listing in source") // however the user may be using the --assume-listings-sorted flag
return fserrors.FatalError(fmt.Errorf("out of order listing in source (%v)", src.Fs()))
} }
} }
if dst != nil && dstPrev != nil { if dst != nil && dstPrev != nil {
@@ -340,7 +342,8 @@ func (m *March) matchListings(srcChan, dstChan <-chan fs.DirEntry, srcOnly, dstO
continue continue
} else if dstName < dstPrevName { } else if dstName < dstPrevName {
// this should never happen since we sort the listings // this should never happen since we sort the listings
panic("Out of order listing in destination") // however the user may be using the --assume-listings-sorted flag
return fserrors.FatalError(fmt.Errorf("out of order listing in destination (%v)", dst.Fs()))
} }
} }
switch { switch {