filter: add {{ regexp }} syntax to pattern matches - fixes #4074

There has been a desire from more advanced rclone users to have regexp
filtering as well as the glob filtering.

This patch adds regexp filtering using this syntax `{{ regexp }}`
which is currently a syntax error, so is backwards compatibile.

This means regexps can be used everywhere globs can be used, and that
they also can be mixed with globs in the same pattern, eg `*.{{jpe?g}}`
This commit is contained in:
Nick Craig-Wood
2021-10-09 12:56:23 +01:00
parent 74898bac3b
commit 268b808bf8
4 changed files with 166 additions and 13 deletions

View File

@@ -503,6 +503,31 @@ func TestNewFilterMatchesIgnoreCase(t *testing.T) {
assert.False(t, f.InActive())
}
func TestNewFilterMatchesRegexp(t *testing.T) {
f, err := NewFilter(nil)
require.NoError(t, err)
add := func(s string) {
err := f.AddRule(s)
require.NoError(t, err)
}
add(`+ /{{file\d+\.png}}`)
add(`+ *.{{(?i)jpg}}`)
add(`- *`)
testInclude(t, f, []includeTest{
{"file2.png", 100, 0, true},
{"sub/file2.png", 100, 0, false},
{"file123.png", 100, 0, true},
{"File123.png", 100, 0, false},
{"something.jpg", 100, 0, true},
{"deep/path/something.JPG", 100, 0, true},
{"something.gif", 100, 0, false},
})
testDirInclude(t, f, []includeDirTest{
{"anything at all", true},
})
assert.False(t, f.InActive())
}
func TestFilterAddDirRuleOrFileRule(t *testing.T) {
for _, test := range []struct {
included bool

View File

@@ -19,7 +19,7 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
}
if strings.HasPrefix(glob, "/") {
glob = glob[1:]
_, _ = re.WriteRune('^')
_ = re.WriteByte('^')
} else {
_, _ = re.WriteString("(^|/)")
}
@@ -38,15 +38,45 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
consecutiveStars = 0
return nil
}
overwriteLastChar := func(c byte) {
buf := re.Bytes()
buf[len(buf)-1] = c
}
inBraces := false
inBrackets := 0
slashed := false
inRegexp := false // inside {{ ... }}
inRegexpEnd := false // have received }} waiting for more
var next, last rune
for _, c := range glob {
next, last = c, next
if slashed {
_, _ = re.WriteRune(c)
slashed = false
continue
}
if inRegexpEnd {
if c == '}' {
// Regexp is ending with }} choose longest segment
// Replace final ) with }
overwriteLastChar('}')
_ = re.WriteByte(')')
continue
} else {
inRegexpEnd = false
}
}
if inRegexp {
if c == '}' && last == '}' {
inRegexp = false
inRegexpEnd = true
// Replace final } with )
overwriteLastChar(')')
} else {
_, _ = re.WriteRune(c)
}
continue
}
if c != '*' {
err := insertStars()
if err != nil {
@@ -78,24 +108,30 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
return nil, fmt.Errorf("mismatched ']' in glob %q", glob)
case '{':
if inBraces {
return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob)
if last == '{' {
inRegexp = true
inBraces = false
} else {
return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob)
}
} else {
inBraces = true
_ = re.WriteByte('(')
}
inBraces = true
_, _ = re.WriteRune('(')
case '}':
if !inBraces {
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
}
_, _ = re.WriteRune(')')
_ = re.WriteByte(')')
inBraces = false
case ',':
if inBraces {
_, _ = re.WriteRune('|')
_ = re.WriteByte('|')
} else {
_, _ = re.WriteRune(c)
}
case '.', '+', '(', ')', '|', '^', '$': // regexp meta characters not dealt with above
_, _ = re.WriteRune('\\')
_ = re.WriteByte('\\')
_, _ = re.WriteRune(c)
default:
_, _ = re.WriteRune(c)
@@ -111,7 +147,10 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
if inBraces {
return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
}
_, _ = re.WriteRune('$')
if inRegexp {
return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob)
}
_ = re.WriteByte('$')
result, err := regexp.Compile(re.String())
if err != nil {
return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err)
@@ -120,8 +159,10 @@ func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
}
var (
// Can't deal with / or ** in {}
tooHardRe = regexp.MustCompile(`{[^{}]*(\*\*|/)[^{}]*}`)
// Can't deal with
// / or ** in {}
// {{ regexp }}
tooHardRe = regexp.MustCompile(`({[^{}]*(\*\*|/)[^{}]*})|\{\{|\}\}`)
// Squash all /
squashSlash = regexp.MustCompile(`/{2,}`)

View File

@@ -32,7 +32,7 @@ func TestGlobToRegexp(t *testing.T) {
{`***`, `(^|/)`, `too many stars`},
{`ab]c`, `(^|/)`, `mismatched ']'`},
{`ab[c`, `(^|/)`, `mismatched '[' and ']'`},
{`ab{{cd`, `(^|/)`, `can't nest`},
{`ab{x{cd`, `(^|/)`, `can't nest`},
{`ab{}}cd`, `(^|/)`, `mismatched '{' and '}'`},
{`ab}c`, `(^|/)`, `mismatched '{' and '}'`},
{`ab{c`, `(^|/)`, `mismatched '{' and '}'`},
@@ -40,16 +40,24 @@ func TestGlobToRegexp(t *testing.T) {
{`[a--b]`, `(^|/)`, `bad glob pattern`},
{`a\*b`, `(^|/)a\*b$`, ``},
{`a\\b`, `(^|/)a\\b$`, ``},
{`a{{.*}}b`, `(^|/)a(.*)b$`, ``},
{`a{{.*}`, `(^|/)a(.*)b$`, `mismatched '{{' and '}}'`},
{`{{regexp}}`, `(^|/)(regexp)$`, ``},
{`\{{{regexp}}`, `(^|/)\{(regexp)$`, ``},
{`/{{regexp}}`, `^(regexp)$`, ``},
{`/{{\d{8}}}`, `^(\d{8})$`, ``},
{`/{{\}}}`, `^(\})$`, ``},
{`{{(?i)regexp}}`, `(^|/)((?i)regexp)$`, ``},
} {
for _, ignoreCase := range []bool{false, true} {
gotRe, err := GlobToRegexp(test.in, ignoreCase)
if test.error == "" {
require.NoError(t, err, test.in)
prefix := ""
if ignoreCase {
prefix = "(?i)"
}
got := gotRe.String()
require.NoError(t, err, test.in)
assert.Equal(t, prefix+test.want, got, test.in)
} else {
require.Error(t, err, test.in)
@@ -84,6 +92,7 @@ func TestGlobToDirGlobs(t *testing.T) {
{`/a/{jpg,png,gif}/*.{jpg,png,gif}`, []string{"/a/{jpg,png,gif}/", "/a/", "/"}},
{`a/{a,a*b,a**c}/d/`, []string{"/**"}},
{`/a/{a,a*b,a/c,d}/d/`, []string{"/**"}},
{`/a/{{.*}}/d/`, []string{"/**"}},
{`**`, []string{"**/"}},
{`a**`, []string{"a**/"}},
{`a**b`, []string{"a**/"}},