mirror of
https://github.com/rclone/rclone.git
synced 2025-12-11 22:14:05 +01:00
Move all backends into backend directory
This commit is contained in:
425
backend/b2/upload.go
Normal file
425
backend/b2/upload.go
Normal file
@@ -0,0 +1,425 @@
|
||||
// Upload large files for b2
|
||||
//
|
||||
// Docs - https://www.backblaze.com/b2/docs/large_files.html
|
||||
|
||||
package b2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ncw/rclone/backend/b2/api"
|
||||
"github.com/ncw/rclone/fs"
|
||||
"github.com/ncw/rclone/rest"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
type hashAppendingReader struct {
|
||||
h hash.Hash
|
||||
in io.Reader
|
||||
hexSum string
|
||||
hexReader io.Reader
|
||||
}
|
||||
|
||||
// Read returns bytes all bytes from the original reader, then the hex sum
|
||||
// of what was read so far, then EOF.
|
||||
func (har *hashAppendingReader) Read(b []byte) (int, error) {
|
||||
if har.hexReader == nil {
|
||||
n, err := har.in.Read(b)
|
||||
if err == io.EOF {
|
||||
har.in = nil // allow GC
|
||||
err = nil // allow reading hexSum before EOF
|
||||
|
||||
har.hexSum = hex.EncodeToString(har.h.Sum(nil))
|
||||
har.hexReader = strings.NewReader(har.hexSum)
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
return har.hexReader.Read(b)
|
||||
}
|
||||
|
||||
// AdditionalLength returns how many bytes the appended hex sum will take up.
|
||||
func (har *hashAppendingReader) AdditionalLength() int {
|
||||
return hex.EncodedLen(har.h.Size())
|
||||
}
|
||||
|
||||
// HexSum returns the hash sum as hex. It's only available after the original
|
||||
// reader has EOF'd. It's an empty string before that.
|
||||
func (har *hashAppendingReader) HexSum() string {
|
||||
return har.hexSum
|
||||
}
|
||||
|
||||
// newHashAppendingReader takes a Reader and a Hash and will append the hex sum
|
||||
// after the original reader reaches EOF. The increased size depends on the
|
||||
// given hash, which may be queried through AdditionalLength()
|
||||
func newHashAppendingReader(in io.Reader, h hash.Hash) *hashAppendingReader {
|
||||
withHash := io.TeeReader(in, h)
|
||||
return &hashAppendingReader{h: h, in: withHash}
|
||||
}
|
||||
|
||||
// largeUpload is used to control the upload of large files which need chunking
|
||||
type largeUpload struct {
|
||||
f *Fs // parent Fs
|
||||
o *Object // object being uploaded
|
||||
in io.Reader // read the data from here
|
||||
id string // ID of the file being uploaded
|
||||
size int64 // total size
|
||||
parts int64 // calculated number of parts, if known
|
||||
sha1s []string // slice of SHA1s for each part
|
||||
uploadMu sync.Mutex // lock for upload variable
|
||||
uploads []*api.GetUploadPartURLResponse // result of get upload URL calls
|
||||
}
|
||||
|
||||
// newLargeUpload starts an upload of object o from in with metadata in src
|
||||
func (f *Fs) newLargeUpload(o *Object, in io.Reader, src fs.ObjectInfo) (up *largeUpload, err error) {
|
||||
remote := o.remote
|
||||
size := src.Size()
|
||||
parts := int64(0)
|
||||
sha1SliceSize := int64(maxParts)
|
||||
if size == -1 {
|
||||
fs.Debugf(o, "Streaming upload with --b2-chunk-size %s allows uploads of up to %s and will fail only when that limit is reached.", fs.SizeSuffix(chunkSize), fs.SizeSuffix(maxParts*chunkSize))
|
||||
} else {
|
||||
parts = size / int64(chunkSize)
|
||||
if size%int64(chunkSize) != 0 {
|
||||
parts++
|
||||
}
|
||||
if parts > maxParts {
|
||||
return nil, errors.Errorf("%q too big (%d bytes) makes too many parts %d > %d - increase --b2-chunk-size", remote, size, parts, maxParts)
|
||||
}
|
||||
sha1SliceSize = parts
|
||||
}
|
||||
|
||||
modTime := src.ModTime()
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
Path: "/b2_start_large_file",
|
||||
}
|
||||
bucketID, err := f.getBucketID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var request = api.StartLargeFileRequest{
|
||||
BucketID: bucketID,
|
||||
Name: o.fs.root + remote,
|
||||
ContentType: fs.MimeType(src),
|
||||
Info: map[string]string{
|
||||
timeKey: timeString(modTime),
|
||||
},
|
||||
}
|
||||
// Set the SHA1 if known
|
||||
if calculatedSha1, err := src.Hash(fs.HashSHA1); err == nil && calculatedSha1 != "" {
|
||||
request.Info[sha1Key] = calculatedSha1
|
||||
}
|
||||
var response api.StartLargeFileResponse
|
||||
err = f.pacer.Call(func() (bool, error) {
|
||||
resp, err := f.srv.CallJSON(&opts, &request, &response)
|
||||
return f.shouldRetry(resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
up = &largeUpload{
|
||||
f: f,
|
||||
o: o,
|
||||
in: in,
|
||||
id: response.ID,
|
||||
size: size,
|
||||
parts: parts,
|
||||
sha1s: make([]string, sha1SliceSize),
|
||||
}
|
||||
return up, nil
|
||||
}
|
||||
|
||||
// getUploadURL returns the upload info with the UploadURL and the AuthorizationToken
|
||||
//
|
||||
// This should be returned with returnUploadURL when finished
|
||||
func (up *largeUpload) getUploadURL() (upload *api.GetUploadPartURLResponse, err error) {
|
||||
up.uploadMu.Lock()
|
||||
defer up.uploadMu.Unlock()
|
||||
if len(up.uploads) == 0 {
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
Path: "/b2_get_upload_part_url",
|
||||
}
|
||||
var request = api.GetUploadPartURLRequest{
|
||||
ID: up.id,
|
||||
}
|
||||
err := up.f.pacer.Call(func() (bool, error) {
|
||||
resp, err := up.f.srv.CallJSON(&opts, &request, &upload)
|
||||
return up.f.shouldRetry(resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "failed to get upload URL")
|
||||
}
|
||||
} else {
|
||||
upload, up.uploads = up.uploads[0], up.uploads[1:]
|
||||
}
|
||||
return upload, nil
|
||||
}
|
||||
|
||||
// returnUploadURL returns the UploadURL to the cache
|
||||
func (up *largeUpload) returnUploadURL(upload *api.GetUploadPartURLResponse) {
|
||||
if upload == nil {
|
||||
return
|
||||
}
|
||||
up.uploadMu.Lock()
|
||||
up.uploads = append(up.uploads, upload)
|
||||
up.uploadMu.Unlock()
|
||||
}
|
||||
|
||||
// clearUploadURL clears the current UploadURL and the AuthorizationToken
|
||||
func (up *largeUpload) clearUploadURL() {
|
||||
up.uploadMu.Lock()
|
||||
up.uploads = nil
|
||||
up.uploadMu.Unlock()
|
||||
}
|
||||
|
||||
// Transfer a chunk
|
||||
func (up *largeUpload) transferChunk(part int64, body []byte) error {
|
||||
err := up.f.pacer.Call(func() (bool, error) {
|
||||
fs.Debugf(up.o, "Sending chunk %d length %d", part, len(body))
|
||||
|
||||
// Get upload URL
|
||||
upload, err := up.getUploadURL()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
in := newHashAppendingReader(bytes.NewReader(body), sha1.New())
|
||||
size := int64(len(body)) + int64(in.AdditionalLength())
|
||||
|
||||
// Authorization
|
||||
//
|
||||
// An upload authorization token, from b2_get_upload_part_url.
|
||||
//
|
||||
// X-Bz-Part-Number
|
||||
//
|
||||
// A number from 1 to 10000. The parts uploaded for one file
|
||||
// must have contiguous numbers, starting with 1.
|
||||
//
|
||||
// Content-Length
|
||||
//
|
||||
// The number of bytes in the file being uploaded. Note that
|
||||
// this header is required; you cannot leave it out and just
|
||||
// use chunked encoding. The minimum size of every part but
|
||||
// the last one is 100MB.
|
||||
//
|
||||
// X-Bz-Content-Sha1
|
||||
//
|
||||
// The SHA1 checksum of the this part of the file. B2 will
|
||||
// check this when the part is uploaded, to make sure that the
|
||||
// data arrived correctly. The same SHA1 checksum must be
|
||||
// passed to b2_finish_large_file.
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
RootURL: upload.UploadURL,
|
||||
Body: fs.AccountPart(up.o, in),
|
||||
ExtraHeaders: map[string]string{
|
||||
"Authorization": upload.AuthorizationToken,
|
||||
"X-Bz-Part-Number": fmt.Sprintf("%d", part),
|
||||
sha1Header: "hex_digits_at_end",
|
||||
},
|
||||
ContentLength: &size,
|
||||
}
|
||||
|
||||
var response api.UploadPartResponse
|
||||
|
||||
resp, err := up.f.srv.CallJSON(&opts, nil, &response)
|
||||
retry, err := up.f.shouldRetry(resp, err)
|
||||
// On retryable error clear PartUploadURL
|
||||
if retry {
|
||||
fs.Debugf(up.o, "Clearing part upload URL because of error: %v", err)
|
||||
upload = nil
|
||||
}
|
||||
up.returnUploadURL(upload)
|
||||
up.sha1s[part-1] = in.HexSum()
|
||||
return retry, err
|
||||
})
|
||||
if err != nil {
|
||||
fs.Debugf(up.o, "Error sending chunk %d: %v", part, err)
|
||||
} else {
|
||||
fs.Debugf(up.o, "Done sending chunk %d", part)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// finish closes off the large upload
|
||||
func (up *largeUpload) finish() error {
|
||||
fs.Debugf(up.o, "Finishing large file upload with %d parts", up.parts)
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
Path: "/b2_finish_large_file",
|
||||
}
|
||||
var request = api.FinishLargeFileRequest{
|
||||
ID: up.id,
|
||||
SHA1s: up.sha1s,
|
||||
}
|
||||
var response api.FileInfo
|
||||
err := up.f.pacer.Call(func() (bool, error) {
|
||||
resp, err := up.f.srv.CallJSON(&opts, &request, &response)
|
||||
return up.f.shouldRetry(resp, err)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return up.o.decodeMetaDataFileInfo(&response)
|
||||
}
|
||||
|
||||
// cancel aborts the large upload
|
||||
func (up *largeUpload) cancel() error {
|
||||
opts := rest.Opts{
|
||||
Method: "POST",
|
||||
Path: "/b2_cancel_large_file",
|
||||
}
|
||||
var request = api.CancelLargeFileRequest{
|
||||
ID: up.id,
|
||||
}
|
||||
var response api.CancelLargeFileResponse
|
||||
err := up.f.pacer.Call(func() (bool, error) {
|
||||
resp, err := up.f.srv.CallJSON(&opts, &request, &response)
|
||||
return up.f.shouldRetry(resp, err)
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (up *largeUpload) managedTransferChunk(wg *sync.WaitGroup, errs chan error, part int64, buf []byte) {
|
||||
wg.Add(1)
|
||||
go func(part int64, buf []byte) {
|
||||
defer wg.Done()
|
||||
defer up.f.putUploadBlock(buf)
|
||||
err := up.transferChunk(part, buf)
|
||||
if err != nil {
|
||||
select {
|
||||
case errs <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}(part, buf)
|
||||
}
|
||||
|
||||
func (up *largeUpload) finishOrCancelOnError(err error, errs chan error) error {
|
||||
if err == nil {
|
||||
select {
|
||||
case err = <-errs:
|
||||
default:
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
fs.Debugf(up.o, "Cancelling large file upload due to error: %v", err)
|
||||
cancelErr := up.cancel()
|
||||
if cancelErr != nil {
|
||||
fs.Errorf(up.o, "Failed to cancel large file upload: %v", cancelErr)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return up.finish()
|
||||
}
|
||||
|
||||
// Stream uploads the chunks from the input, starting with a required initial
|
||||
// chunk. Assumes the file size is unknown and will upload until the input
|
||||
// reaches EOF.
|
||||
func (up *largeUpload) Stream(initialUploadBlock []byte) (err error) {
|
||||
fs.Debugf(up.o, "Starting streaming of large file (id %q)", up.id)
|
||||
errs := make(chan error, 1)
|
||||
hasMoreParts := true
|
||||
var wg sync.WaitGroup
|
||||
fs.AccountByPart(up.o) // Cancel whole file accounting before reading
|
||||
|
||||
// Transfer initial chunk
|
||||
up.size = int64(len(initialUploadBlock))
|
||||
up.managedTransferChunk(&wg, errs, 1, initialUploadBlock)
|
||||
|
||||
outer:
|
||||
for part := int64(2); hasMoreParts; part++ {
|
||||
// Check any errors
|
||||
select {
|
||||
case err = <-errs:
|
||||
break outer
|
||||
default:
|
||||
}
|
||||
|
||||
// Get a block of memory
|
||||
buf := up.f.getUploadBlock()
|
||||
|
||||
// Read the chunk
|
||||
n, err := io.ReadFull(up.in, buf)
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
fs.Debugf(up.o, "Read less than a full chunk, making this the last one.")
|
||||
buf = buf[:n]
|
||||
hasMoreParts = false
|
||||
err = nil
|
||||
} else if err == io.EOF {
|
||||
fs.Debugf(up.o, "Could not read any more bytes, previous chunk was the last.")
|
||||
up.f.putUploadBlock(buf)
|
||||
hasMoreParts = false
|
||||
err = nil
|
||||
break outer
|
||||
} else if err != nil {
|
||||
// other kinds of errors indicate failure
|
||||
up.f.putUploadBlock(buf)
|
||||
break outer
|
||||
}
|
||||
|
||||
// Keep stats up to date
|
||||
up.parts = part
|
||||
up.size += int64(n)
|
||||
if part > maxParts {
|
||||
err = errors.Errorf("%q too big (%d bytes so far) makes too many parts %d > %d - increase --b2-chunk-size", up.o, up.size, up.parts, maxParts)
|
||||
break outer
|
||||
}
|
||||
|
||||
// Transfer the chunk
|
||||
up.managedTransferChunk(&wg, errs, part, buf)
|
||||
}
|
||||
wg.Wait()
|
||||
up.sha1s = up.sha1s[:up.parts]
|
||||
|
||||
return up.finishOrCancelOnError(err, errs)
|
||||
}
|
||||
|
||||
// Upload uploads the chunks from the input
|
||||
func (up *largeUpload) Upload() error {
|
||||
fs.Debugf(up.o, "Starting upload of large file in %d chunks (id %q)", up.parts, up.id)
|
||||
remaining := up.size
|
||||
errs := make(chan error, 1)
|
||||
var wg sync.WaitGroup
|
||||
var err error
|
||||
fs.AccountByPart(up.o) // Cancel whole file accounting before reading
|
||||
outer:
|
||||
for part := int64(1); part <= up.parts; part++ {
|
||||
// Check any errors
|
||||
select {
|
||||
case err = <-errs:
|
||||
break outer
|
||||
default:
|
||||
}
|
||||
|
||||
reqSize := remaining
|
||||
if reqSize >= int64(chunkSize) {
|
||||
reqSize = int64(chunkSize)
|
||||
}
|
||||
|
||||
// Get a block of memory
|
||||
buf := up.f.getUploadBlock()[:reqSize]
|
||||
|
||||
// Read the chunk
|
||||
_, err = io.ReadFull(up.in, buf)
|
||||
if err != nil {
|
||||
up.f.putUploadBlock(buf)
|
||||
break outer
|
||||
}
|
||||
|
||||
// Transfer the chunk
|
||||
up.managedTransferChunk(&wg, errs, part, buf)
|
||||
remaining -= reqSize
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
return up.finishOrCancelOnError(err, errs)
|
||||
}
|
||||
Reference in New Issue
Block a user