copyurl: reworked code, added concurrency and tests

- Added Tests
- Fixed file name handling
- Added concurrent downloads
- Limited downloads to --transfers
- Fixes #8127
This commit is contained in:
dougal
2025-09-10 18:24:40 +01:00
committed by Nick Craig-Wood
parent 8c5af2f51c
commit cce399515f
2 changed files with 254 additions and 95 deletions

View File

@@ -13,7 +13,9 @@ import (
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config/flags"
"github.com/rclone/rclone/fs/operations"
"github.com/rclone/rclone/lib/errcount"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"
)
var (
@@ -33,7 +35,7 @@ func init() {
flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename", "")
flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name", "")
flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file", "")
flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a .csv file of links to process multiple urls", "")
flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a CSV file of links to process multiple URLs", "")
}
var commandDefinition = &cobra.Command{
@@ -57,9 +59,16 @@ destination if there is one with the same name.
Setting |--stdout| or making the output file name |-|
will cause the output to be written to standard output.
Setting |--urls| allows you to input a .csv file of urls in format:
URL,FILENAME. If a filename is missing one will be autogenerated for the file,
and it will otherwise continue as normal.
Setting |--urls| allows you to input a CSV file of URLs in format: URL,
FILENAME. If |--urls| is in use then replace the URL in the arguments with the
file containing the URLs, e.g.:
|||sh
rclone copyurl --urls myurls.csv remote:dir
|||
Missing filenames will be autogenerated equivalent to using |--auto-filename|.
Note that |--stdout| and |--print-filename| are incompatible with |--urls|.
This will do |--transfers| copies in parallel. Note that if |--auto-filename|
is desired for all URLs then a file with only URLs and no filename can be used.
### Troubleshooting
@@ -75,102 +84,95 @@ If you can't get |rclone copyurl| to work then here are some things you can try:
"groups": "Important",
},
RunE: func(command *cobra.Command, args []string) (err error) {
//Check they gave all the args correctly
cmd.CheckArgs(1, 2, command, args)
//Create list to store the list of URLS
var urlList [][]string
if urls {
//Read the .csv file provided if --urls is enabled
File, error := os.Open(args[0])
if error != nil {
return errors.New("failed to open .csv file")
cmd.Run(true, true, command, func() error {
if !urls {
return run(args)
}
reader := csv.NewReader(File)
return runURLS(args)
})
return nil
},
}
var copyURL = operations.CopyURL // for testing
// runURLS processes a .csv file of urls and filenames
func runURLS(args []string) (err error) {
if stdout {
return errors.New("can't use --stdout with --urls")
}
if printFilename {
return errors.New("can't use --print-filename with --urls")
}
dstFs := cmd.NewFsDir(args[1:])
f, err := os.Open(args[0])
if err != nil {
return fmt.Errorf("failed to open .csv file: %w", err)
}
defer fs.CheckClose(f, &err)
reader := csv.NewReader(f)
reader.FieldsPerRecord = -1
data, error := reader.ReadAll()
if error != nil {
return errors.New("failed reading .csv file")
}
//Save the list of urls to the urlList variable
urlList = data
} else {
//If its the non multi-url command, save only that, without attempting to read a file
urlList = make([][]string, 1)
urlList[0] = args
urlList, err := reader.ReadAll()
if err != nil {
return fmt.Errorf("failed reading .csv file: %w", err)
}
//Save per-file variables
dstFileNames := make([]string, len(urlList))
fsdsts := make([]fs.Fs, len(urlList))
autonames := make([]bool, len(urlList))
ec := errcount.New()
g, gCtx := errgroup.WithContext(context.Background())
ci := fs.GetConfig(gCtx)
g.SetLimit(ci.Transfers)
//Save the original input folder, prevents it being overwritten when adding onto the end
root := args[1]
for _, urlEntry := range urlList {
if len(urlEntry) == 0 {
continue
}
g.Go(func() error {
url := urlEntry[0]
var filename string
if len(urlEntry) > 1 {
filename = urlEntry[1]
}
_, err := copyURL(gCtx, dstFs, filename, url, filename == "", headerFilename, noClobber)
if err != nil {
fs.Errorf(filename, "failed to copy URL %q: %v", url, err)
ec.Add(err)
}
return nil
})
}
ec.Add(g.Wait())
return ec.Err("not all URLs copied successfully")
}
// run runs the command for a single URL
func run(args []string) error {
var err error
var dstFileName string
var fsdst fs.Fs
if !stdout {
if len(args) < 2 {
return errors.New("need 2 arguments if not using --stdout")
}
if args[1] == "-" {
stdout = true
} else if autoFilename {
fsdst = cmd.NewFsDir(args[1:])
} else {
//Go over each of the URLs need transferring
for i := range urlList {
//Save autoFilenames
if autoFilename {
autonames[i] = true
} else if urls {
//If we are using multiple URLs, and they dont provide a name for one, set it to autogenerate one for it.
//This is because itd suck having it cancel midway through if you forgot to add the name of one.
autonames[i] = (len(urlList[i]) == 1 || urlList[i][1] == "")
fsdst, dstFileName = cmd.NewFsDstFile(args[1:])
}
}
if autonames[i] {
//If it is autonaming, generate the name.
//Set args[1] to the root, resetting it to the main directory. (Clears filenames from it)
args[1] = root
fsdsts[i] = cmd.NewFsDir(args[1:])
} else {
//If using multiple URLs, set it to the root and the name.
//Because again, args[1] gets appended to each round, so it has to be reset.
if len(urlList) > 1 {
args[1] = root + urlList[i][1]
}
//Otherwise your clear to go, as in the regular format,
//args[1] already contains the destination and filename for a single file
fsdsts[i], dstFileNames[i] = cmd.NewFsDstFile(args[1:])
}
}
}
}
cmd.Run(true, true, command, func() error {
//Go over each URL, to upload them
for i, url := range urlList {
//Genereate a new fs.Object for the file
var dst fs.Object
if stdout {
err = operations.CopyURLToWriter(context.Background(), url[0], os.Stdout)
err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout)
} else {
//CopyURLs, with each individual link's information
dst, err = operations.CopyURL(context.Background(), fsdsts[i], dstFileNames[i], url[0], autonames[i], headerFilename, noClobber)
dst, err = copyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, headerFilename, noClobber)
if printFilename && err == nil && dst != nil {
fmt.Println(dst.Remote())
}
}
if err != nil {
println(err)
return err
}
}
return nil
})
return nil
},
}

157
cmd/copyurl/copyurl_test.go Normal file
View File

@@ -0,0 +1,157 @@
package copyurl
import (
"context"
"errors"
"os"
"path/filepath"
"sync"
"sync/atomic"
"testing"
_ "github.com/rclone/rclone/backend/local"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/operations"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func resetGlobals() {
autoFilename = false
headerFilename = false
printFilename = false
stdout = false
noClobber = false
urls = false
copyURL = operations.CopyURL
}
func TestRun_RequiresTwoArgsWhenNotStdout(t *testing.T) {
t.Cleanup(resetGlobals)
resetGlobals()
err := run([]string{"https://example.com/foo"})
require.Error(t, err)
assert.Contains(t, err.Error(), "need 2 arguments if not using --stdout")
}
func TestRun_CallsCopyURL_WithExplicitFilename_Success(t *testing.T) {
t.Cleanup(resetGlobals)
resetGlobals()
tmp := t.TempDir()
dstPath := filepath.Join(tmp, "out.txt")
var called int32
copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) {
atomic.AddInt32(&called, 1)
assert.Equal(t, "https://example.com/file", url)
assert.Equal(t, "out.txt", dstFileName)
assert.False(t, auto)
assert.False(t, header)
assert.False(t, noclobber)
return nil, nil
}
err := run([]string{"https://example.com/file", dstPath})
require.NoError(t, err)
assert.Equal(t, int32(1), atomic.LoadInt32(&called))
}
func TestRun_CallsCopyURL_WithAutoFilename_AndPropagatesError(t *testing.T) {
t.Cleanup(resetGlobals)
resetGlobals()
tmp := t.TempDir()
autoFilename = true
want := errors.New("boom")
var called int32
copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) {
atomic.AddInt32(&called, 1)
assert.Equal(t, "", dstFileName) // auto filename -> empty
assert.True(t, auto)
return nil, want
}
err := run([]string{"https://example.com/auto/name", tmp})
require.Error(t, err)
assert.Equal(t, want, err)
assert.Equal(t, int32(1), atomic.LoadInt32(&called))
}
func TestRunURLS_ErrorsWithStdoutAndWithPrintFilename(t *testing.T) {
t.Cleanup(resetGlobals)
resetGlobals()
stdout = true
err := runURLS([]string{"dummy.csv", "destDir"})
require.Error(t, err)
assert.Contains(t, err.Error(), "can't use --stdout with --urls")
resetGlobals()
printFilename = true
err = runURLS([]string{"dummy.csv", "destDir"})
require.Error(t, err)
assert.Contains(t, err.Error(), "can't use --print-filename with --urls")
}
func TestRunURLS_ProcessesCSV_ParallelCalls_AndAggregatesError(t *testing.T) {
t.Cleanup(resetGlobals)
resetGlobals()
tmp := t.TempDir()
csvPath := filepath.Join(tmp, "urls.csv")
csvContent := []byte(
"https://example.com/a,aaa.txt\n" + // success
"https://example.com/b\n" + // auto filename
"https://example.com/c,ccc.txt\n") // error
require.NoError(t, os.WriteFile(csvPath, csvContent, 0o600))
// destination dir (local backend)
dest := t.TempDir()
// mock copyURL: succeed for /a and /b, fail for /c
var calls int32
var mu sync.Mutex
var seen []string
copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) {
atomic.AddInt32(&calls, 1)
mu.Lock()
seen = append(seen, url+"|"+dstFileName)
mu.Unlock()
switch {
case url == "https://example.com/a":
require.Equal(t, "aaa.txt", dstFileName)
return nil, nil
case url == "https://example.com/b":
require.Equal(t, "", dstFileName) // auto-name path
return nil, nil
case url == "https://example.com/c":
return nil, errors.New("network down")
default:
return nil, nil
}
}
err := runURLS([]string{csvPath, dest})
require.Error(t, err)
assert.Contains(t, err.Error(), "not all URLs copied successfully")
// 3 lines => 3 calls
assert.Equal(t, int32(3), atomic.LoadInt32(&calls))
// sanity: all expected URLs were seen
assert.ElementsMatch(t,
[]string{
"https://example.com/a|aaa.txt",
"https://example.com/b|",
"https://example.com/c|ccc.txt",
},
seen,
)
}