march: added flag to allow Unicode filenames to remain unique

If your filenames contain two near-identical Unicode characters,
rclone will normalize these, making them identical. This flag
gives you the ability to keep them unique. This might
create unintended side effects, such as duplicating files that
contain certain Unicode characters, when downloading them from
certain cloud providers to a macOS filesystem.

Fixes #4228
This commit is contained in:
Ben Zenker
2020-05-14 19:27:59 -04:00
committed by Nick Craig-Wood
parent 4006345cfb
commit 899c8e0697
6 changed files with 119 additions and 72 deletions

View File

@@ -19,6 +19,7 @@ import (
"github.com/rclone/rclone/fstest/mockobject"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/text/unicode/norm"
)
// Some times used in the tests
@@ -313,6 +314,8 @@ func TestMatchListings(t *testing.T) {
b = mockobject.Object("b")
c = mockobject.Object("c")
d = mockobject.Object("d")
uE1 = mockobject.Object("é") // one of the unicode E characters
uE2 = mockobject.Object("é") // a different unicode E character
dirA = mockdir.New("A")
dirb = mockdir.New("b")
)
@@ -419,6 +422,28 @@ func TestMatchListings(t *testing.T) {
},
transforms: []matchTransformFn{strings.ToLower},
},
{
what: "Unicode near-duplicate that becomes duplicate with normalization",
input: fs.DirEntries{
uE1, uE1,
uE2, uE2,
},
matches: []matchPair{
{uE1, uE1},
},
transforms: []matchTransformFn{norm.NFC.String},
},
{
what: "Unicode near-duplicate with no normalization",
input: fs.DirEntries{
uE1, uE1,
uE2, uE2,
},
matches: []matchPair{
{uE1, uE1},
{uE2, uE2},
},
},
{
what: "File and directory are not duplicates - srcOnly",
input: fs.DirEntries{