mirror of
https://github.com/rclone/rclone.git
synced 2025-12-12 06:24:14 +01:00
copyurl: reworked code, added concurrency and tests
- Added Tests - Fixed file name handling - Added concurrent downloads - Limited downloads to --transfers - Fixes #8127
This commit is contained in:
@@ -13,7 +13,9 @@ import (
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/config/flags"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
"github.com/rclone/rclone/lib/errcount"
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -33,7 +35,7 @@ func init() {
|
||||
flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename", "")
|
||||
flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name", "")
|
||||
flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file", "")
|
||||
flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a .csv file of links to process multiple urls", "")
|
||||
flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a CSV file of links to process multiple URLs", "")
|
||||
}
|
||||
|
||||
var commandDefinition = &cobra.Command{
|
||||
@@ -57,9 +59,16 @@ destination if there is one with the same name.
|
||||
Setting |--stdout| or making the output file name |-|
|
||||
will cause the output to be written to standard output.
|
||||
|
||||
Setting |--urls| allows you to input a .csv file of urls in format:
|
||||
URL,FILENAME. If a filename is missing one will be autogenerated for the file,
|
||||
and it will otherwise continue as normal.
|
||||
Setting |--urls| allows you to input a CSV file of URLs in format: URL,
|
||||
FILENAME. If |--urls| is in use then replace the URL in the arguments with the
|
||||
file containing the URLs, e.g.:
|
||||
|||sh
|
||||
rclone copyurl --urls myurls.csv remote:dir
|
||||
|||
|
||||
Missing filenames will be autogenerated equivalent to using |--auto-filename|.
|
||||
Note that |--stdout| and |--print-filename| are incompatible with |--urls|.
|
||||
This will do |--transfers| copies in parallel. Note that if |--auto-filename|
|
||||
is desired for all URLs then a file with only URLs and no filename can be used.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
@@ -75,102 +84,95 @@ If you can't get |rclone copyurl| to work then here are some things you can try:
|
||||
"groups": "Important",
|
||||
},
|
||||
RunE: func(command *cobra.Command, args []string) (err error) {
|
||||
|
||||
//Check they gave all the args correctly
|
||||
cmd.CheckArgs(1, 2, command, args)
|
||||
|
||||
//Create list to store the list of URLS
|
||||
var urlList [][]string
|
||||
|
||||
if urls {
|
||||
//Read the .csv file provided if --urls is enabled
|
||||
File, error := os.Open(args[0])
|
||||
if error != nil {
|
||||
return errors.New("failed to open .csv file")
|
||||
cmd.Run(true, true, command, func() error {
|
||||
if !urls {
|
||||
return run(args)
|
||||
}
|
||||
reader := csv.NewReader(File)
|
||||
return runURLS(args)
|
||||
})
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
var copyURL = operations.CopyURL // for testing
|
||||
|
||||
// runURLS processes a .csv file of urls and filenames
|
||||
func runURLS(args []string) (err error) {
|
||||
if stdout {
|
||||
return errors.New("can't use --stdout with --urls")
|
||||
}
|
||||
if printFilename {
|
||||
return errors.New("can't use --print-filename with --urls")
|
||||
}
|
||||
dstFs := cmd.NewFsDir(args[1:])
|
||||
|
||||
f, err := os.Open(args[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open .csv file: %w", err)
|
||||
}
|
||||
defer fs.CheckClose(f, &err)
|
||||
reader := csv.NewReader(f)
|
||||
reader.FieldsPerRecord = -1
|
||||
data, error := reader.ReadAll()
|
||||
if error != nil {
|
||||
return errors.New("failed reading .csv file")
|
||||
}
|
||||
//Save the list of urls to the urlList variable
|
||||
urlList = data
|
||||
|
||||
} else {
|
||||
//If its the non multi-url command, save only that, without attempting to read a file
|
||||
urlList = make([][]string, 1)
|
||||
urlList[0] = args
|
||||
urlList, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading .csv file: %w", err)
|
||||
}
|
||||
|
||||
//Save per-file variables
|
||||
dstFileNames := make([]string, len(urlList))
|
||||
fsdsts := make([]fs.Fs, len(urlList))
|
||||
autonames := make([]bool, len(urlList))
|
||||
ec := errcount.New()
|
||||
g, gCtx := errgroup.WithContext(context.Background())
|
||||
ci := fs.GetConfig(gCtx)
|
||||
g.SetLimit(ci.Transfers)
|
||||
|
||||
//Save the original input folder, prevents it being overwritten when adding onto the end
|
||||
root := args[1]
|
||||
for _, urlEntry := range urlList {
|
||||
if len(urlEntry) == 0 {
|
||||
continue
|
||||
}
|
||||
g.Go(func() error {
|
||||
url := urlEntry[0]
|
||||
var filename string
|
||||
if len(urlEntry) > 1 {
|
||||
filename = urlEntry[1]
|
||||
}
|
||||
_, err := copyURL(gCtx, dstFs, filename, url, filename == "", headerFilename, noClobber)
|
||||
if err != nil {
|
||||
fs.Errorf(filename, "failed to copy URL %q: %v", url, err)
|
||||
ec.Add(err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
ec.Add(g.Wait())
|
||||
return ec.Err("not all URLs copied successfully")
|
||||
}
|
||||
|
||||
// run runs the command for a single URL
|
||||
func run(args []string) error {
|
||||
var err error
|
||||
var dstFileName string
|
||||
var fsdst fs.Fs
|
||||
if !stdout {
|
||||
if len(args) < 2 {
|
||||
return errors.New("need 2 arguments if not using --stdout")
|
||||
}
|
||||
if args[1] == "-" {
|
||||
stdout = true
|
||||
} else if autoFilename {
|
||||
fsdst = cmd.NewFsDir(args[1:])
|
||||
} else {
|
||||
|
||||
//Go over each of the URLs need transferring
|
||||
for i := range urlList {
|
||||
//Save autoFilenames
|
||||
if autoFilename {
|
||||
autonames[i] = true
|
||||
} else if urls {
|
||||
|
||||
//If we are using multiple URLs, and they dont provide a name for one, set it to autogenerate one for it.
|
||||
//This is because itd suck having it cancel midway through if you forgot to add the name of one.
|
||||
|
||||
autonames[i] = (len(urlList[i]) == 1 || urlList[i][1] == "")
|
||||
fsdst, dstFileName = cmd.NewFsDstFile(args[1:])
|
||||
}
|
||||
}
|
||||
|
||||
if autonames[i] {
|
||||
//If it is autonaming, generate the name.
|
||||
//Set args[1] to the root, resetting it to the main directory. (Clears filenames from it)
|
||||
args[1] = root
|
||||
fsdsts[i] = cmd.NewFsDir(args[1:])
|
||||
} else {
|
||||
//If using multiple URLs, set it to the root and the name.
|
||||
//Because again, args[1] gets appended to each round, so it has to be reset.
|
||||
if len(urlList) > 1 {
|
||||
args[1] = root + urlList[i][1]
|
||||
}
|
||||
//Otherwise your clear to go, as in the regular format,
|
||||
//args[1] already contains the destination and filename for a single file
|
||||
fsdsts[i], dstFileNames[i] = cmd.NewFsDstFile(args[1:])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cmd.Run(true, true, command, func() error {
|
||||
//Go over each URL, to upload them
|
||||
for i, url := range urlList {
|
||||
//Genereate a new fs.Object for the file
|
||||
var dst fs.Object
|
||||
if stdout {
|
||||
err = operations.CopyURLToWriter(context.Background(), url[0], os.Stdout)
|
||||
err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout)
|
||||
} else {
|
||||
//CopyURLs, with each individual link's information
|
||||
dst, err = operations.CopyURL(context.Background(), fsdsts[i], dstFileNames[i], url[0], autonames[i], headerFilename, noClobber)
|
||||
dst, err = copyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, headerFilename, noClobber)
|
||||
if printFilename && err == nil && dst != nil {
|
||||
fmt.Println(dst.Remote())
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
println(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
157
cmd/copyurl/copyurl_test.go
Normal file
157
cmd/copyurl/copyurl_test.go
Normal file
@@ -0,0 +1,157 @@
|
||||
package copyurl
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
_ "github.com/rclone/rclone/backend/local"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func resetGlobals() {
|
||||
autoFilename = false
|
||||
headerFilename = false
|
||||
printFilename = false
|
||||
stdout = false
|
||||
noClobber = false
|
||||
urls = false
|
||||
copyURL = operations.CopyURL
|
||||
}
|
||||
|
||||
func TestRun_RequiresTwoArgsWhenNotStdout(t *testing.T) {
|
||||
t.Cleanup(resetGlobals)
|
||||
resetGlobals()
|
||||
|
||||
err := run([]string{"https://example.com/foo"})
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "need 2 arguments if not using --stdout")
|
||||
}
|
||||
|
||||
func TestRun_CallsCopyURL_WithExplicitFilename_Success(t *testing.T) {
|
||||
t.Cleanup(resetGlobals)
|
||||
resetGlobals()
|
||||
|
||||
tmp := t.TempDir()
|
||||
dstPath := filepath.Join(tmp, "out.txt")
|
||||
|
||||
var called int32
|
||||
|
||||
copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) {
|
||||
atomic.AddInt32(&called, 1)
|
||||
assert.Equal(t, "https://example.com/file", url)
|
||||
assert.Equal(t, "out.txt", dstFileName)
|
||||
assert.False(t, auto)
|
||||
assert.False(t, header)
|
||||
assert.False(t, noclobber)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
err := run([]string{"https://example.com/file", dstPath})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, int32(1), atomic.LoadInt32(&called))
|
||||
}
|
||||
|
||||
func TestRun_CallsCopyURL_WithAutoFilename_AndPropagatesError(t *testing.T) {
|
||||
t.Cleanup(resetGlobals)
|
||||
resetGlobals()
|
||||
|
||||
tmp := t.TempDir()
|
||||
autoFilename = true
|
||||
|
||||
want := errors.New("boom")
|
||||
var called int32
|
||||
|
||||
copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) {
|
||||
atomic.AddInt32(&called, 1)
|
||||
assert.Equal(t, "", dstFileName) // auto filename -> empty
|
||||
assert.True(t, auto)
|
||||
return nil, want
|
||||
}
|
||||
|
||||
err := run([]string{"https://example.com/auto/name", tmp})
|
||||
require.Error(t, err)
|
||||
assert.Equal(t, want, err)
|
||||
assert.Equal(t, int32(1), atomic.LoadInt32(&called))
|
||||
}
|
||||
|
||||
func TestRunURLS_ErrorsWithStdoutAndWithPrintFilename(t *testing.T) {
|
||||
t.Cleanup(resetGlobals)
|
||||
resetGlobals()
|
||||
|
||||
stdout = true
|
||||
err := runURLS([]string{"dummy.csv", "destDir"})
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "can't use --stdout with --urls")
|
||||
|
||||
resetGlobals()
|
||||
printFilename = true
|
||||
err = runURLS([]string{"dummy.csv", "destDir"})
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "can't use --print-filename with --urls")
|
||||
}
|
||||
|
||||
func TestRunURLS_ProcessesCSV_ParallelCalls_AndAggregatesError(t *testing.T) {
|
||||
t.Cleanup(resetGlobals)
|
||||
resetGlobals()
|
||||
|
||||
tmp := t.TempDir()
|
||||
csvPath := filepath.Join(tmp, "urls.csv")
|
||||
csvContent := []byte(
|
||||
"https://example.com/a,aaa.txt\n" + // success
|
||||
"https://example.com/b\n" + // auto filename
|
||||
"https://example.com/c,ccc.txt\n") // error
|
||||
require.NoError(t, os.WriteFile(csvPath, csvContent, 0o600))
|
||||
|
||||
// destination dir (local backend)
|
||||
dest := t.TempDir()
|
||||
|
||||
// mock copyURL: succeed for /a and /b, fail for /c
|
||||
|
||||
var calls int32
|
||||
var mu sync.Mutex
|
||||
var seen []string
|
||||
|
||||
copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) {
|
||||
atomic.AddInt32(&calls, 1)
|
||||
mu.Lock()
|
||||
seen = append(seen, url+"|"+dstFileName)
|
||||
mu.Unlock()
|
||||
|
||||
switch {
|
||||
case url == "https://example.com/a":
|
||||
require.Equal(t, "aaa.txt", dstFileName)
|
||||
return nil, nil
|
||||
case url == "https://example.com/b":
|
||||
require.Equal(t, "", dstFileName) // auto-name path
|
||||
return nil, nil
|
||||
case url == "https://example.com/c":
|
||||
return nil, errors.New("network down")
|
||||
default:
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
err := runURLS([]string{csvPath, dest})
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "not all URLs copied successfully")
|
||||
// 3 lines => 3 calls
|
||||
assert.Equal(t, int32(3), atomic.LoadInt32(&calls))
|
||||
|
||||
// sanity: all expected URLs were seen
|
||||
assert.ElementsMatch(t,
|
||||
[]string{
|
||||
"https://example.com/a|aaa.txt",
|
||||
"https://example.com/b|",
|
||||
"https://example.com/c|ccc.txt",
|
||||
},
|
||||
seen,
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user