mirror of
https://github.com/rclone/rclone.git
synced 2025-12-11 22:14:05 +01:00
filter: add {{ regexp }} syntax to pattern matches - fixes #4074
There has been a desire from more advanced rclone users to have regexp
filtering as well as the glob filtering.
This patch adds regexp filtering using this syntax `{{ regexp }}`
which is currently a syntax error, so is backwards compatibile.
This means regexps can be used everywhere globs can be used, and that
they also can be mixed with globs in the same pattern, eg `*.{{jpe?g}}`
This commit is contained in:
@@ -503,6 +503,31 @@ func TestNewFilterMatchesIgnoreCase(t *testing.T) {
|
||||
assert.False(t, f.InActive())
|
||||
}
|
||||
|
||||
func TestNewFilterMatchesRegexp(t *testing.T) {
|
||||
f, err := NewFilter(nil)
|
||||
require.NoError(t, err)
|
||||
add := func(s string) {
|
||||
err := f.AddRule(s)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
add(`+ /{{file\d+\.png}}`)
|
||||
add(`+ *.{{(?i)jpg}}`)
|
||||
add(`- *`)
|
||||
testInclude(t, f, []includeTest{
|
||||
{"file2.png", 100, 0, true},
|
||||
{"sub/file2.png", 100, 0, false},
|
||||
{"file123.png", 100, 0, true},
|
||||
{"File123.png", 100, 0, false},
|
||||
{"something.jpg", 100, 0, true},
|
||||
{"deep/path/something.JPG", 100, 0, true},
|
||||
{"something.gif", 100, 0, false},
|
||||
})
|
||||
testDirInclude(t, f, []includeDirTest{
|
||||
{"anything at all", true},
|
||||
})
|
||||
assert.False(t, f.InActive())
|
||||
}
|
||||
|
||||
func TestFilterAddDirRuleOrFileRule(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
included bool
|
||||
|
||||
@@ -19,7 +19,7 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
|
||||
}
|
||||
if strings.HasPrefix(glob, "/") {
|
||||
glob = glob[1:]
|
||||
_, _ = re.WriteRune('^')
|
||||
_ = re.WriteByte('^')
|
||||
} else {
|
||||
_, _ = re.WriteString("(^|/)")
|
||||
}
|
||||
@@ -38,15 +38,45 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
|
||||
consecutiveStars = 0
|
||||
return nil
|
||||
}
|
||||
overwriteLastChar := func(c byte) {
|
||||
buf := re.Bytes()
|
||||
buf[len(buf)-1] = c
|
||||
}
|
||||
inBraces := false
|
||||
inBrackets := 0
|
||||
slashed := false
|
||||
inRegexp := false // inside {{ ... }}
|
||||
inRegexpEnd := false // have received }} waiting for more
|
||||
var next, last rune
|
||||
for _, c := range glob {
|
||||
next, last = c, next
|
||||
if slashed {
|
||||
_, _ = re.WriteRune(c)
|
||||
slashed = false
|
||||
continue
|
||||
}
|
||||
if inRegexpEnd {
|
||||
if c == '}' {
|
||||
// Regexp is ending with }} choose longest segment
|
||||
// Replace final ) with }
|
||||
overwriteLastChar('}')
|
||||
_ = re.WriteByte(')')
|
||||
continue
|
||||
} else {
|
||||
inRegexpEnd = false
|
||||
}
|
||||
}
|
||||
if inRegexp {
|
||||
if c == '}' && last == '}' {
|
||||
inRegexp = false
|
||||
inRegexpEnd = true
|
||||
// Replace final } with )
|
||||
overwriteLastChar(')')
|
||||
} else {
|
||||
_, _ = re.WriteRune(c)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if c != '*' {
|
||||
err := insertStars()
|
||||
if err != nil {
|
||||
@@ -78,24 +108,30 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
|
||||
return nil, fmt.Errorf("mismatched ']' in glob %q", glob)
|
||||
case '{':
|
||||
if inBraces {
|
||||
return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob)
|
||||
if last == '{' {
|
||||
inRegexp = true
|
||||
inBraces = false
|
||||
} else {
|
||||
return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob)
|
||||
}
|
||||
} else {
|
||||
inBraces = true
|
||||
_ = re.WriteByte('(')
|
||||
}
|
||||
inBraces = true
|
||||
_, _ = re.WriteRune('(')
|
||||
case '}':
|
||||
if !inBraces {
|
||||
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
|
||||
}
|
||||
_, _ = re.WriteRune(')')
|
||||
_ = re.WriteByte(')')
|
||||
inBraces = false
|
||||
case ',':
|
||||
if inBraces {
|
||||
_, _ = re.WriteRune('|')
|
||||
_ = re.WriteByte('|')
|
||||
} else {
|
||||
_, _ = re.WriteRune(c)
|
||||
}
|
||||
case '.', '+', '(', ')', '|', '^', '$': // regexp meta characters not dealt with above
|
||||
_, _ = re.WriteRune('\\')
|
||||
_ = re.WriteByte('\\')
|
||||
_, _ = re.WriteRune(c)
|
||||
default:
|
||||
_, _ = re.WriteRune(c)
|
||||
@@ -111,7 +147,10 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
|
||||
if inBraces {
|
||||
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
|
||||
}
|
||||
_, _ = re.WriteRune('$')
|
||||
if inRegexp {
|
||||
return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob)
|
||||
}
|
||||
_ = re.WriteByte('$')
|
||||
result, err := regexp.Compile(re.String())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err)
|
||||
@@ -120,8 +159,10 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
|
||||
}
|
||||
|
||||
var (
|
||||
// Can't deal with / or ** in {}
|
||||
tooHardRe = regexp.MustCompile(`{[^{}]*(\*\*|/)[^{}]*}`)
|
||||
// Can't deal with
|
||||
// / or ** in {}
|
||||
// {{ regexp }}
|
||||
tooHardRe = regexp.MustCompile(`({[^{}]*(\*\*|/)[^{}]*})|\{\{|\}\}`)
|
||||
|
||||
// Squash all /
|
||||
squashSlash = regexp.MustCompile(`/{2,}`)
|
||||
|
||||
@@ -32,7 +32,7 @@ func TestGlobToRegexp(t *testing.T) {
|
||||
{`***`, `(^|/)`, `too many stars`},
|
||||
{`ab]c`, `(^|/)`, `mismatched ']'`},
|
||||
{`ab[c`, `(^|/)`, `mismatched '[' and ']'`},
|
||||
{`ab{{cd`, `(^|/)`, `can't nest`},
|
||||
{`ab{x{cd`, `(^|/)`, `can't nest`},
|
||||
{`ab{}}cd`, `(^|/)`, `mismatched '{' and '}'`},
|
||||
{`ab}c`, `(^|/)`, `mismatched '{' and '}'`},
|
||||
{`ab{c`, `(^|/)`, `mismatched '{' and '}'`},
|
||||
@@ -40,16 +40,24 @@ func TestGlobToRegexp(t *testing.T) {
|
||||
{`[a--b]`, `(^|/)`, `bad glob pattern`},
|
||||
{`a\*b`, `(^|/)a\*b$`, ``},
|
||||
{`a\\b`, `(^|/)a\\b$`, ``},
|
||||
{`a{{.*}}b`, `(^|/)a(.*)b$`, ``},
|
||||
{`a{{.*}`, `(^|/)a(.*)b$`, `mismatched '{{' and '}}'`},
|
||||
{`{{regexp}}`, `(^|/)(regexp)$`, ``},
|
||||
{`\{{{regexp}}`, `(^|/)\{(regexp)$`, ``},
|
||||
{`/{{regexp}}`, `^(regexp)$`, ``},
|
||||
{`/{{\d{8}}}`, `^(\d{8})$`, ``},
|
||||
{`/{{\}}}`, `^(\})$`, ``},
|
||||
{`{{(?i)regexp}}`, `(^|/)((?i)regexp)$`, ``},
|
||||
} {
|
||||
for _, ignoreCase := range []bool{false, true} {
|
||||
gotRe, err := GlobToRegexp(test.in, ignoreCase)
|
||||
if test.error == "" {
|
||||
require.NoError(t, err, test.in)
|
||||
prefix := ""
|
||||
if ignoreCase {
|
||||
prefix = "(?i)"
|
||||
}
|
||||
got := gotRe.String()
|
||||
require.NoError(t, err, test.in)
|
||||
assert.Equal(t, prefix+test.want, got, test.in)
|
||||
} else {
|
||||
require.Error(t, err, test.in)
|
||||
@@ -84,6 +92,7 @@ func TestGlobToDirGlobs(t *testing.T) {
|
||||
{`/a/{jpg,png,gif}/*.{jpg,png,gif}`, []string{"/a/{jpg,png,gif}/", "/a/", "/"}},
|
||||
{`a/{a,a*b,a**c}/d/`, []string{"/**"}},
|
||||
{`/a/{a,a*b,a/c,d}/d/`, []string{"/**"}},
|
||||
{`/a/{{.*}}/d/`, []string{"/**"}},
|
||||
{`**`, []string{"**/"}},
|
||||
{`a**`, []string{"a**/"}},
|
||||
{`a**b`, []string{"a**/"}},
|
||||
|
||||
Reference in New Issue
Block a user