new backend: hidrive - fixes #1069

This commit is contained in:
Ovidiu Victor Tatar
2022-07-07 19:58:22 +02:00
committed by Nick Craig-Wood
parent 502226bfc8
commit b4d847cadd
17 changed files with 3450 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
package api
import (
"encoding/json"
"net/url"
"path"
"strings"
"time"
)
// Some presets for different amounts of information that can be requested for fields;
// it is recommended to only request the information that is actually needed.
var (
HiDriveObjectNoMetadataFields = []string{"name", "type"}
HiDriveObjectWithMetadataFields = append(HiDriveObjectNoMetadataFields, "id", "size", "mtime", "chash")
HiDriveObjectWithDirectoryMetadataFields = append(HiDriveObjectWithMetadataFields, "nmembers")
DirectoryContentFields = []string{"nmembers"}
)
// QueryParameters represents the parameters passed to an API-call.
type QueryParameters struct {
url.Values
}
// NewQueryParameters initializes an instance of QueryParameters and
// returns a pointer to it.
func NewQueryParameters() *QueryParameters {
return &QueryParameters{url.Values{}}
}
// SetFileInDirectory sets the appropriate parameters
// to specify a path to a file in a directory.
// This is used by requests that work with paths for files that do not exist yet.
// (For example when creating a file).
// Most requests use the format produced by SetPath(...).
func (p *QueryParameters) SetFileInDirectory(filePath string) {
directory, file := path.Split(path.Clean(filePath))
p.Set("dir", path.Clean(directory))
p.Set("name", file)
// NOTE: It would be possible to switch to pid-based requests
// by modifying this function.
}
// SetPath sets the appropriate parameters to access the given path.
func (p *QueryParameters) SetPath(objectPath string) {
p.Set("path", path.Clean(objectPath))
// NOTE: It would be possible to switch to pid-based requests
// by modifying this function.
}
// SetTime sets the key to the time-value. It replaces any existing values.
func (p *QueryParameters) SetTime(key string, value time.Time) error {
valueAPI := Time(value)
valueBytes, err := json.Marshal(&valueAPI)
if err != nil {
return err
}
p.Set(key, string(valueBytes))
return nil
}
// AddList adds the given values as a list
// with each value separated by the separator.
// It appends to any existing values associated with key.
func (p *QueryParameters) AddList(key string, separator string, values ...string) {
original := p.Get(key)
p.Set(key, strings.Join(values, separator))
if original != "" {
p.Set(key, original+separator+p.Get(key))
}
}
// AddFields sets the appropriate parameter to access the given fields.
// The given fields will be appended to any other existing fields.
func (p *QueryParameters) AddFields(prefix string, fields ...string) {
modifiedFields := make([]string, len(fields))
for i, field := range fields {
modifiedFields[i] = prefix + field
}
p.AddList("fields", ",", modifiedFields...)
}

View File

@@ -0,0 +1,135 @@
// Package api has type definitions and code related to API-calls for the HiDrive-API.
package api
import (
"encoding/json"
"fmt"
"net/url"
"strconv"
"time"
)
// Time represents date and time information for the API.
type Time time.Time
// MarshalJSON turns Time into JSON (in Unix-time/UTC).
func (t *Time) MarshalJSON() ([]byte, error) {
secs := time.Time(*t).Unix()
return []byte(strconv.FormatInt(secs, 10)), nil
}
// UnmarshalJSON turns JSON into Time.
func (t *Time) UnmarshalJSON(data []byte) error {
secs, err := strconv.ParseInt(string(data), 10, 64)
if err != nil {
return err
}
*t = Time(time.Unix(secs, 0))
return nil
}
// Error is returned from the API when things go wrong.
type Error struct {
Code json.Number `json:"code"`
ContextInfo json.RawMessage
Message string `json:"msg"`
}
// Error returns a string for the error and satisfies the error interface.
func (e *Error) Error() string {
out := fmt.Sprintf("Error %q", e.Code.String())
if e.Message != "" {
out += ": " + e.Message
}
if e.ContextInfo != nil {
out += fmt.Sprintf(" (%+v)", e.ContextInfo)
}
return out
}
// Check Error satisfies the error interface.
var _ error = (*Error)(nil)
// possible types for HiDriveObject
const (
HiDriveObjectTypeDirectory = "dir"
HiDriveObjectTypeFile = "file"
HiDriveObjectTypeSymlink = "symlink"
)
// HiDriveObject describes a folder, a symlink or a file.
// Depending on the type and content, not all fields are present.
type HiDriveObject struct {
Type string `json:"type"`
ID string `json:"id"`
ParentID string `json:"parent_id"`
Name string `json:"name"`
Path string `json:"path"`
Size int64 `json:"size"`
MemberCount int64 `json:"nmembers"`
ModifiedAt Time `json:"mtime"`
ChangedAt Time `json:"ctime"`
MetaHash string `json:"mhash"`
MetaOnlyHash string `json:"mohash"`
NameHash string `json:"nhash"`
ContentHash string `json:"chash"`
IsTeamfolder bool `json:"teamfolder"`
Readable bool `json:"readable"`
Writable bool `json:"writable"`
Shareable bool `json:"shareable"`
MIMEType string `json:"mime_type"`
}
// ModTime returns the modification time of the HiDriveObject.
func (i *HiDriveObject) ModTime() time.Time {
t := time.Time(i.ModifiedAt)
if t.IsZero() {
t = time.Time(i.ChangedAt)
}
return t
}
// UnmarshalJSON turns JSON into HiDriveObject and
// introduces specific default-values where necessary.
func (i *HiDriveObject) UnmarshalJSON(data []byte) error {
type objectAlias HiDriveObject
defaultObject := objectAlias{
Size: -1,
MemberCount: -1,
}
err := json.Unmarshal(data, &defaultObject)
if err != nil {
return err
}
name, err := url.PathUnescape(defaultObject.Name)
if err == nil {
defaultObject.Name = name
}
*i = HiDriveObject(defaultObject)
return nil
}
// DirectoryContent describes the content of a directory.
type DirectoryContent struct {
TotalCount int64 `json:"nmembers"`
Entries []HiDriveObject `json:"members"`
}
// UnmarshalJSON turns JSON into DirectoryContent and
// introduces specific default-values where necessary.
func (d *DirectoryContent) UnmarshalJSON(data []byte) error {
type directoryContentAlias DirectoryContent
defaultDirectoryContent := directoryContentAlias{
TotalCount: -1,
}
err := json.Unmarshal(data, &defaultDirectoryContent)
if err != nil {
return err
}
*d = DirectoryContent(defaultDirectoryContent)
return nil
}

888
backend/hidrive/helpers.go Normal file
View File

@@ -0,0 +1,888 @@
package hidrive
// This file is for helper-functions which may provide more general and
// specialized functionality than the generic interfaces.
// There are two sections:
// 1. methods bound to Fs
// 2. other functions independent from Fs used throughout the package
// NOTE: Functions accessing paths expect any relative paths
// to be resolved prior to execution with resolvePath(...).
import (
"bytes"
"context"
"errors"
"io"
"net/http"
"path"
"strconv"
"sync"
"time"
"github.com/rclone/rclone/backend/hidrive/api"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/accounting"
"github.com/rclone/rclone/fs/fserrors"
"github.com/rclone/rclone/lib/ranges"
"github.com/rclone/rclone/lib/readers"
"github.com/rclone/rclone/lib/rest"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
)
const (
// MaximumUploadBytes represents the maximum amount of bytes
// a single upload-operation will support.
MaximumUploadBytes = 2147483647 // = 2GiB - 1
// iterationChunkSize represents the chunk size used to iterate directory contents.
iterationChunkSize = 5000
)
var (
// retryErrorCodes is a slice of error codes that we will always retry.
retryErrorCodes = []int{
429, // Too Many Requests
500, // Internal Server Error
502, // Bad Gateway
503, // Service Unavailable
504, // Gateway Timeout
509, // Bandwidth Limit Exceeded
}
// ErrorFileExists is returned when a query tries to create a file
// that already exists.
ErrorFileExists = errors.New("destination file already exists")
)
// MemberType represents the possible types of entries a directory can contain.
type MemberType string
// possible values for MemberType
const (
AllMembers MemberType = "all"
NoMembers MemberType = "none"
DirectoryMembers MemberType = api.HiDriveObjectTypeDirectory
FileMembers MemberType = api.HiDriveObjectTypeFile
SymlinkMembers MemberType = api.HiDriveObjectTypeSymlink
)
// SortByField represents possible fields to sort entries of a directory by.
type SortByField string
// possible values for SortByField
const (
descendingSort string = "-"
SortByName SortByField = "name"
SortByModTime SortByField = "mtime"
SortByObjectType SortByField = "type"
SortBySize SortByField = "size"
SortByNameDescending SortByField = SortByField(descendingSort) + SortByName
SortByModTimeDescending SortByField = SortByField(descendingSort) + SortByModTime
SortByObjectTypeDescending SortByField = SortByField(descendingSort) + SortByObjectType
SortBySizeDescending SortByField = SortByField(descendingSort) + SortBySize
)
var (
// Unsorted disables sorting and can therefore not be combined with other values.
Unsorted = []SortByField{"none"}
// DefaultSorted does not specify how to sort and
// therefore implies the default sort order.
DefaultSorted = []SortByField{}
)
// CopyOrMoveOperationType represents the possible types of copy- and move-operations.
type CopyOrMoveOperationType int
// possible values for CopyOrMoveOperationType
const (
MoveOriginal CopyOrMoveOperationType = iota
CopyOriginal
CopyOriginalPreserveModTime
)
// OnExistAction represents possible actions the API should take,
// when a request tries to create a path that already exists.
type OnExistAction string
// possible values for OnExistAction
const (
// IgnoreOnExist instructs the API not to execute
// the request in case of a conflict, but to return an error.
IgnoreOnExist OnExistAction = "ignore"
// AutoNameOnExist instructs the API to automatically rename
// any conflicting request-objects.
AutoNameOnExist OnExistAction = "autoname"
// OverwriteOnExist instructs the API to overwrite any conflicting files.
// This can only be used, if the request operates on files directly.
// (For example when moving/copying a file.)
// For most requests this action will simply be ignored.
OverwriteOnExist OnExistAction = "overwrite"
)
// shouldRetry returns a boolean as to whether this resp and err deserve to be retried.
// It tries to expire/invalidate the token, if necessary.
// It returns the err as a convenience.
func (f *Fs) shouldRetry(ctx context.Context, resp *http.Response, err error) (bool, error) {
if fserrors.ContextError(ctx, &err) {
return false, err
}
if resp != nil && (resp.StatusCode == 401 || isHTTPError(err, 401)) && len(resp.Header["Www-Authenticate"]) > 0 {
fs.Debugf(f, "Token might be invalid: %v", err)
if f.tokenRenewer != nil {
iErr := f.tokenRenewer.Expire()
if iErr == nil {
return true, err
}
}
}
return fserrors.ShouldRetry(err) || fserrors.ShouldRetryHTTP(resp, retryErrorCodes), err
}
// resolvePath resolves the given (relative) path and
// returns a path suitable for API-calls.
// This will consider the root-path of the fs and any needed prefixes.
//
// Any relative paths passed to functions that access these paths should
// be resolved with this first!
func (f *Fs) resolvePath(objectPath string) string {
resolved := path.Join(f.opt.RootPrefix, f.root, f.opt.Enc.FromStandardPath(objectPath))
return resolved
}
// iterateOverDirectory calls the given function callback
// on each item found in a given directory.
//
// If callback ever returns true then this exits early with found = true.
func (f *Fs) iterateOverDirectory(ctx context.Context, directory string, searchOnly MemberType, callback func(*api.HiDriveObject) bool, fields []string, sortBy []SortByField) (found bool, err error) {
parameters := api.NewQueryParameters()
parameters.SetPath(directory)
parameters.AddFields("members.", fields...)
parameters.AddFields("", api.DirectoryContentFields...)
parameters.Set("members", string(searchOnly))
for _, v := range sortBy {
// The explicit conversion is necessary for each element.
parameters.AddList("sort", ",", string(v))
}
opts := rest.Opts{
Method: "GET",
Path: "/dir",
Parameters: parameters.Values,
}
iterateContent := func(result *api.DirectoryContent, err error) (bool, error) {
if err != nil {
return false, err
}
for _, item := range result.Entries {
item.Name = f.opt.Enc.ToStandardName(item.Name)
if callback(&item) {
return true, nil
}
}
return false, nil
}
return f.paginateDirectoryAccess(ctx, &opts, iterationChunkSize, 0, iterateContent)
}
// paginateDirectoryAccess executes requests specified via ctx and opts
// which should produce api.DirectoryContent.
// This will paginate the requests using limit starting at the given offset.
//
// The given function callback is called on each api.DirectoryContent found
// along with any errors that occurred.
// If callback ever returns true then this exits early with found = true.
// If callback ever returns an error then this exits early with that error.
func (f *Fs) paginateDirectoryAccess(ctx context.Context, opts *rest.Opts, limit int64, offset int64, callback func(*api.DirectoryContent, error) (bool, error)) (found bool, err error) {
for {
opts.Parameters.Set("limit", strconv.FormatInt(offset, 10)+","+strconv.FormatInt(limit, 10))
var result api.DirectoryContent
var resp *http.Response
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.CallJSON(ctx, opts, nil, &result)
return f.shouldRetry(ctx, resp, err)
})
found, err = callback(&result, err)
if found || err != nil {
return found, err
}
offset += int64(len(result.Entries))
if offset >= result.TotalCount || limit > int64(len(result.Entries)) {
break
}
}
return false, nil
}
// fetchMetadataForPath reads the metadata from the path.
func (f *Fs) fetchMetadataForPath(ctx context.Context, path string, fields []string) (*api.HiDriveObject, error) {
parameters := api.NewQueryParameters()
parameters.SetPath(path)
parameters.AddFields("", fields...)
opts := rest.Opts{
Method: "GET",
Path: "/meta",
Parameters: parameters.Values,
}
var result api.HiDriveObject
var resp *http.Response
var err error
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
return f.shouldRetry(ctx, resp, err)
})
if err != nil {
return nil, err
}
return &result, nil
}
// copyOrMove copies or moves a directory or file
// from the source-path to the destination-path.
//
// The operation will only be successful
// if the parent-directory of the destination-path exists.
//
// NOTE: Use the explicit methods instead of directly invoking this method.
// (Those are: copyDirectory, moveDirectory, copyFile, moveFile.)
func (f *Fs) copyOrMove(ctx context.Context, isDirectory bool, operationType CopyOrMoveOperationType, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) {
parameters := api.NewQueryParameters()
parameters.Set("src", source)
parameters.Set("dst", destination)
if onExist == AutoNameOnExist ||
(onExist == OverwriteOnExist && !isDirectory) {
parameters.Set("on_exist", string(onExist))
}
endpoint := "/"
if isDirectory {
endpoint += "dir"
} else {
endpoint += "file"
}
switch operationType {
case MoveOriginal:
endpoint += "/move"
case CopyOriginalPreserveModTime:
parameters.Set("preserve_mtime", strconv.FormatBool(true))
fallthrough
case CopyOriginal:
endpoint += "/copy"
}
opts := rest.Opts{
Method: "POST",
Path: endpoint,
Parameters: parameters.Values,
}
var result api.HiDriveObject
var resp *http.Response
var err error
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
return f.shouldRetry(ctx, resp, err)
})
if err != nil {
return nil, err
}
return &result, nil
}
// copyDirectory moves the directory at the source-path to the destination-path and
// returns the resulting api-object if successful.
//
// The operation will only be successful
// if the parent-directory of the destination-path exists.
func (f *Fs) copyDirectory(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) {
return f.copyOrMove(ctx, true, CopyOriginalPreserveModTime, source, destination, onExist)
}
// moveDirectory moves the directory at the source-path to the destination-path and
// returns the resulting api-object if successful.
//
// The operation will only be successful
// if the parent-directory of the destination-path exists.
func (f *Fs) moveDirectory(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) {
return f.copyOrMove(ctx, true, MoveOriginal, source, destination, onExist)
}
// copyFile copies the file at the source-path to the destination-path and
// returns the resulting api-object if successful.
//
// The operation will only be successful
// if the parent-directory of the destination-path exists.
//
// NOTE: This operation will expand sparse areas in the content of the source-file
// to blocks of 0-bytes in the destination-file.
func (f *Fs) copyFile(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) {
return f.copyOrMove(ctx, false, CopyOriginalPreserveModTime, source, destination, onExist)
}
// moveFile moves the file at the source-path to the destination-path and
// returns the resulting api-object if successful.
//
// The operation will only be successful
// if the parent-directory of the destination-path exists.
//
// NOTE: This operation may expand sparse areas in the content of the source-file
// to blocks of 0-bytes in the destination-file.
func (f *Fs) moveFile(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) {
return f.copyOrMove(ctx, false, MoveOriginal, source, destination, onExist)
}
// createDirectory creates the directory at the given path and
// returns the resulting api-object if successful.
//
// The directory will only be created if its parent-directory exists.
// This returns fs.ErrorDirNotFound if the parent-directory is not found.
// This returns fs.ErrorDirExists if the directory already exists.
func (f *Fs) createDirectory(ctx context.Context, directory string, onExist OnExistAction) (*api.HiDriveObject, error) {
parameters := api.NewQueryParameters()
parameters.SetPath(directory)
if onExist == AutoNameOnExist {
parameters.Set("on_exist", string(onExist))
}
opts := rest.Opts{
Method: "POST",
Path: "/dir",
Parameters: parameters.Values,
}
var result api.HiDriveObject
var resp *http.Response
var err error
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
return f.shouldRetry(ctx, resp, err)
})
switch {
case err == nil:
return &result, nil
case isHTTPError(err, 404):
return nil, fs.ErrorDirNotFound
case isHTTPError(err, 409):
return nil, fs.ErrorDirExists
}
return nil, err
}
// createDirectories creates the directory at the given path
// along with any missing parent directories and
// returns the resulting api-object (of the created directory) if successful.
//
// This returns fs.ErrorDirExists if the directory already exists.
//
// If an error occurs while the parent directories are being created,
// any directories already created will NOT be deleted again.
func (f *Fs) createDirectories(ctx context.Context, directory string, onExist OnExistAction) (*api.HiDriveObject, error) {
result, err := f.createDirectory(ctx, directory, onExist)
if err == nil {
return result, nil
}
if err != fs.ErrorDirNotFound {
return nil, err
}
parentDirectory := path.Dir(directory)
_, err = f.createDirectories(ctx, parentDirectory, onExist)
if err != nil && err != fs.ErrorDirExists {
return nil, err
}
// NOTE: Ignoring fs.ErrorDirExists does no harm,
// since it does not mean the child directory cannot be created.
return f.createDirectory(ctx, directory, onExist)
}
// deleteDirectory deletes the directory at the given path.
//
// If recursive is false, the directory will only be deleted if it is empty.
// If recursive is true, the directory will be deleted regardless of its content.
// This returns fs.ErrorDirNotFound if the directory is not found.
// This returns fs.ErrorDirectoryNotEmpty if the directory is not empty and
// recursive is false.
func (f *Fs) deleteDirectory(ctx context.Context, directory string, recursive bool) error {
parameters := api.NewQueryParameters()
parameters.SetPath(directory)
parameters.Set("recursive", strconv.FormatBool(recursive))
opts := rest.Opts{
Method: "DELETE",
Path: "/dir",
Parameters: parameters.Values,
NoResponse: true,
}
var resp *http.Response
var err error
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.Call(ctx, &opts)
return f.shouldRetry(ctx, resp, err)
})
switch {
case isHTTPError(err, 404):
return fs.ErrorDirNotFound
case isHTTPError(err, 409):
return fs.ErrorDirectoryNotEmpty
}
return err
}
// deleteObject deletes the object/file at the given path.
//
// This returns fs.ErrorObjectNotFound if the object is not found.
func (f *Fs) deleteObject(ctx context.Context, path string) error {
parameters := api.NewQueryParameters()
parameters.SetPath(path)
opts := rest.Opts{
Method: "DELETE",
Path: "/file",
Parameters: parameters.Values,
NoResponse: true,
}
var resp *http.Response
var err error
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.Call(ctx, &opts)
return f.shouldRetry(ctx, resp, err)
})
if isHTTPError(err, 404) {
return fs.ErrorObjectNotFound
}
return err
}
// createFile creates a file at the given path
// with the content of the io.ReadSeeker.
// This guarantees that existing files will not be overwritten.
// The maximum size of the content is limited by MaximumUploadBytes.
// The io.ReadSeeker should be resettable by seeking to its start.
// If modTime is not the zero time instant,
// it will be set as the file's modification time after the operation.
//
// This returns fs.ErrorDirNotFound
// if the parent directory of the file is not found.
// This returns ErrorFileExists if a file already exists at the specified path.
func (f *Fs) createFile(ctx context.Context, path string, content io.ReadSeeker, modTime time.Time, onExist OnExistAction) (*api.HiDriveObject, error) {
parameters := api.NewQueryParameters()
parameters.SetFileInDirectory(path)
if onExist == AutoNameOnExist {
parameters.Set("on_exist", string(onExist))
}
var err error
if !modTime.IsZero() {
err = parameters.SetTime("mtime", modTime)
if err != nil {
return nil, err
}
}
opts := rest.Opts{
Method: "POST",
Path: "/file",
Body: content,
ContentType: "application/octet-stream",
Parameters: parameters.Values,
}
var result api.HiDriveObject
var resp *http.Response
err = f.pacer.Call(func() (bool, error) {
// Reset the reading index (in case this is a retry).
if _, err = content.Seek(0, io.SeekStart); err != nil {
return false, err
}
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
return f.shouldRetry(ctx, resp, err)
})
switch {
case err == nil:
return &result, nil
case isHTTPError(err, 404):
return nil, fs.ErrorDirNotFound
case isHTTPError(err, 409):
return nil, ErrorFileExists
}
return nil, err
}
// overwriteFile updates the content of the file at the given path
// with the content of the io.ReadSeeker.
// If the file does not exist it will be created.
// The maximum size of the content is limited by MaximumUploadBytes.
// The io.ReadSeeker should be resettable by seeking to its start.
// If modTime is not the zero time instant,
// it will be set as the file's modification time after the operation.
//
// This returns fs.ErrorDirNotFound
// if the parent directory of the file is not found.
func (f *Fs) overwriteFile(ctx context.Context, path string, content io.ReadSeeker, modTime time.Time) (*api.HiDriveObject, error) {
parameters := api.NewQueryParameters()
parameters.SetFileInDirectory(path)
var err error
if !modTime.IsZero() {
err = parameters.SetTime("mtime", modTime)
if err != nil {
return nil, err
}
}
opts := rest.Opts{
Method: "PUT",
Path: "/file",
Body: content,
ContentType: "application/octet-stream",
Parameters: parameters.Values,
}
var result api.HiDriveObject
var resp *http.Response
err = f.pacer.Call(func() (bool, error) {
// Reset the reading index (in case this is a retry).
if _, err = content.Seek(0, io.SeekStart); err != nil {
return false, err
}
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
return f.shouldRetry(ctx, resp, err)
})
switch {
case err == nil:
return &result, nil
case isHTTPError(err, 404):
return nil, fs.ErrorDirNotFound
}
return nil, err
}
// uploadFileChunked updates the content of the existing file at the given path
// with the content of the io.Reader.
// Returns the position of the last successfully written byte, stopping before the first failed write.
// If nothing was written this will be 0.
// Returns the resulting api-object if successful.
//
// Replaces the file contents by uploading multiple chunks of the given size in parallel.
// Therefore this can and be used to upload files of any size efficiently.
// The number of parallel transfers is limited by transferLimit which should larger than 0.
// If modTime is not the zero time instant,
// it will be set as the file's modification time after the operation.
//
// NOTE: This method uses updateFileChunked and may create sparse files,
// if the upload of a chunk fails unexpectedly.
// See note about sparse files in patchFile.
// If any of the uploads fail, the process will be aborted and
// the first error that occurred will be returned.
// This is not an atomic operation,
// therefore if the upload fails the file may be partially modified.
//
// This returns fs.ErrorObjectNotFound if the object is not found.
func (f *Fs) uploadFileChunked(ctx context.Context, path string, content io.Reader, modTime time.Time, chunkSize int, transferLimit int64) (okSize uint64, info *api.HiDriveObject, err error) {
okSize, err = f.updateFileChunked(ctx, path, content, 0, chunkSize, transferLimit)
if err == nil {
info, err = f.resizeFile(ctx, path, okSize, modTime)
}
return okSize, info, err
}
// updateFileChunked updates the content of the existing file at the given path
// starting at the given offset.
// Returns the position of the last successfully written byte, stopping before the first failed write.
// If nothing was written this will be 0.
//
// Replaces the file contents starting from the given byte offset
// with the content of the io.Reader.
// If the offset is beyond the file end, the file is extended up to the offset.
//
// The upload is done multiple chunks of the given size in parallel.
// Therefore this can and be used to upload files of any size efficiently.
// The number of parallel transfers is limited by transferLimit which should larger than 0.
//
// NOTE: Because it is inefficient to set the modification time with every chunk,
// setting it to a specific value must be done in a separate request
// after this operation finishes.
//
// NOTE: This method uses patchFile and may create sparse files,
// especially if the upload of a chunk fails unexpectedly.
// See note about sparse files in patchFile.
// If any of the uploads fail, the process will be aborted and
// the first error that occurred will be returned.
// This is not an atomic operation,
// therefore if the upload fails the file may be partially modified.
//
// This returns fs.ErrorObjectNotFound if the object is not found.
func (f *Fs) updateFileChunked(ctx context.Context, path string, content io.Reader, offset uint64, chunkSize int, transferLimit int64) (okSize uint64, err error) {
var (
okChunksMu sync.Mutex // protects the variables below
okChunks []ranges.Range
)
g, gCtx := errgroup.WithContext(ctx)
transferSemaphore := semaphore.NewWeighted(transferLimit)
var readErr error
startMoreTransfers := true
zeroTime := time.Time{}
for chunk := uint64(0); startMoreTransfers; chunk++ {
// Acquire semaphore to limit number of transfers in parallel.
readErr = transferSemaphore.Acquire(gCtx, 1)
if readErr != nil {
break
}
// Read a chunk of data.
chunkReader, bytesRead, readErr := readerForChunk(content, chunkSize)
if bytesRead < chunkSize {
startMoreTransfers = false
}
if readErr != nil || bytesRead <= 0 {
break
}
// Transfer the chunk.
chunkOffset := uint64(chunkSize)*chunk + offset
g.Go(func() error {
// After this upload is done,
// signal that another transfer can be started.
defer transferSemaphore.Release(1)
uploadErr := f.patchFile(gCtx, path, cachedReader(chunkReader), chunkOffset, zeroTime)
if uploadErr == nil {
// Remember successfully written chunks.
okChunksMu.Lock()
okChunks = append(okChunks, ranges.Range{Pos: int64(chunkOffset), Size: int64(bytesRead)})
okChunksMu.Unlock()
fs.Debugf(f, "Done uploading chunk of size %v at offset %v.", bytesRead, chunkOffset)
} else {
fs.Infof(f, "Error while uploading chunk at offset %v. Error is %v.", chunkOffset, uploadErr)
}
return uploadErr
})
}
if readErr != nil {
// Log the error in case it is later ignored because of an upload-error.
fs.Infof(f, "Error while reading/preparing to upload a chunk. Error is %v.", readErr)
}
err = g.Wait()
// Compute the first continuous range of the file content,
// which does not contain any failed chunks.
// Do not forget to add the file content up to the starting offset,
// which is presumed to be already correct.
rs := ranges.Ranges{}
rs.Insert(ranges.Range{Pos: 0, Size: int64(offset)})
for _, chunkRange := range okChunks {
rs.Insert(chunkRange)
}
if len(rs) > 0 && rs[0].Pos == 0 {
okSize = uint64(rs[0].Size)
}
if err != nil {
return okSize, err
}
if readErr != nil {
return okSize, readErr
}
return okSize, nil
}
// patchFile updates the content of the existing file at the given path
// starting at the given offset.
//
// Replaces the file contents starting from the given byte offset
// with the content of the io.ReadSeeker.
// If the offset is beyond the file end, the file is extended up to the offset.
// The maximum size of the update is limited by MaximumUploadBytes.
// The io.ReadSeeker should be resettable by seeking to its start.
// If modTime is not the zero time instant,
// it will be set as the file's modification time after the operation.
//
// NOTE: By extending the file up to the offset this may create sparse files,
// which allocate less space on the file system than their apparent size indicates,
// since holes between data chunks are "real" holes
// and not regions made up of consecutive 0-bytes.
// Subsequent operations (such as copying data)
// usually expand the holes into regions of 0-bytes.
//
// This returns fs.ErrorObjectNotFound if the object is not found.
func (f *Fs) patchFile(ctx context.Context, path string, content io.ReadSeeker, offset uint64, modTime time.Time) error {
parameters := api.NewQueryParameters()
parameters.SetPath(path)
parameters.Set("offset", strconv.FormatUint(offset, 10))
if !modTime.IsZero() {
err := parameters.SetTime("mtime", modTime)
if err != nil {
return err
}
}
opts := rest.Opts{
Method: "PATCH",
Path: "/file",
Body: content,
ContentType: "application/octet-stream",
Parameters: parameters.Values,
NoResponse: true,
}
var resp *http.Response
var err error
err = f.pacer.Call(func() (bool, error) {
// Reset the reading index (in case this is a retry).
_, err = content.Seek(0, io.SeekStart)
if err != nil {
return false, err
}
resp, err = f.srv.Call(ctx, &opts)
if isHTTPError(err, 423) {
return true, err
}
return f.shouldRetry(ctx, resp, err)
})
if isHTTPError(err, 404) {
return fs.ErrorObjectNotFound
}
return err
}
// resizeFile updates the existing file at the given path to be of the given size
// and returns the resulting api-object if successful.
//
// If the given size is smaller than the current filesize,
// the file is cut/truncated at that position.
// If the given size is larger, the file is extended up to that position.
// If modTime is not the zero time instant,
// it will be set as the file's modification time after the operation.
//
// NOTE: By extending the file this may create sparse files,
// which allocate less space on the file system than their apparent size indicates,
// since holes between data chunks are "real" holes
// and not regions made up of consecutive 0-bytes.
// Subsequent operations (such as copying data)
// usually expand the holes into regions of 0-bytes.
//
// This returns fs.ErrorObjectNotFound if the object is not found.
func (f *Fs) resizeFile(ctx context.Context, path string, size uint64, modTime time.Time) (*api.HiDriveObject, error) {
parameters := api.NewQueryParameters()
parameters.SetPath(path)
parameters.Set("size", strconv.FormatUint(size, 10))
if !modTime.IsZero() {
err := parameters.SetTime("mtime", modTime)
if err != nil {
return nil, err
}
}
opts := rest.Opts{
Method: "POST",
Path: "/file/truncate",
Parameters: parameters.Values,
}
var result api.HiDriveObject
var resp *http.Response
var err error
err = f.pacer.Call(func() (bool, error) {
resp, err = f.srv.CallJSON(ctx, &opts, nil, &result)
return f.shouldRetry(ctx, resp, err)
})
switch {
case err == nil:
return &result, nil
case isHTTPError(err, 404):
return nil, fs.ErrorObjectNotFound
}
return nil, err
}
// ------------------------------------------------------------
// isHTTPError compares the numerical status code
// of an api.Error to the given HTTP status.
//
// If the given error is not an api.Error or
// a numerical status code could not be determined, this returns false.
// Otherwise this returns whether the status code of the error is equal to the given status.
func isHTTPError(err error, status int64) bool {
if apiErr, ok := err.(*api.Error); ok {
errStatus, decodeErr := apiErr.Code.Int64()
if decodeErr == nil && errStatus == status {
return true
}
}
return false
}
// createHiDriveScopes creates oauth-scopes
// from the given user-role and access-permissions.
//
// If the arguments are empty, they will not be included in the result.
func createHiDriveScopes(role string, access string) []string {
switch {
case role != "" && access != "":
return []string{access + "," + role}
case role != "":
return []string{role}
case access != "":
return []string{access}
}
return []string{}
}
// cachedReader returns a version of the reader that caches its contents and
// can therefore be reset using Seek.
func cachedReader(reader io.Reader) io.ReadSeeker {
bytesReader, ok := reader.(*bytes.Reader)
if ok {
return bytesReader
}
repeatableReader, ok := reader.(*readers.RepeatableReader)
if ok {
return repeatableReader
}
return readers.NewRepeatableReader(reader)
}
// readerForChunk reads a chunk of bytes from reader (after handling any accounting).
// Returns a new io.Reader (chunkReader) for that chunk
// and the number of bytes that have been read from reader.
func readerForChunk(reader io.Reader, length int) (chunkReader io.Reader, bytesRead int, err error) {
// Unwrap any accounting from the input if present.
reader, wrap := accounting.UnWrap(reader)
// Read a chunk of data.
buffer := make([]byte, length)
bytesRead, err = io.ReadFull(reader, buffer)
if err == io.EOF || err == io.ErrUnexpectedEOF {
err = nil
}
if err != nil {
return nil, bytesRead, err
}
// Truncate unused capacity.
buffer = buffer[:bytesRead]
// Use wrap to put any accounting back for chunkReader.
return wrap(bytes.NewReader(buffer)), bytesRead, nil
}

1002
backend/hidrive/hidrive.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,45 @@
// Test HiDrive filesystem interface
package hidrive
import (
"testing"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fstest/fstests"
)
// TestIntegration runs integration tests against the remote.
func TestIntegration(t *testing.T) {
name := "TestHiDrive"
fstests.Run(t, &fstests.Opt{
RemoteName: name + ":",
NilObject: (*Object)(nil),
ChunkedUpload: fstests.ChunkedUploadConfig{
MinChunkSize: 1,
MaxChunkSize: MaximumUploadBytes,
CeilChunkSize: nil,
NeedMultipleChunks: false,
},
})
}
// Change the configured UploadChunkSize.
// Will only be called while no transfer is in progress.
func (f *Fs) SetUploadChunkSize(chunksize fs.SizeSuffix) (fs.SizeSuffix, error) {
var old fs.SizeSuffix
old, f.opt.UploadChunkSize = f.opt.UploadChunkSize, chunksize
return old, nil
}
// Change the configured UploadCutoff.
// Will only be called while no transfer is in progress.
func (f *Fs) SetUploadCutoff(cutoff fs.SizeSuffix) (fs.SizeSuffix, error) {
var old fs.SizeSuffix
old, f.opt.UploadCutoff = f.opt.UploadCutoff, cutoff
return old, nil
}
var (
_ fstests.SetUploadChunkSizer = (*Fs)(nil)
_ fstests.SetUploadCutoffer = (*Fs)(nil)
)

View File

@@ -0,0 +1,410 @@
// Package hidrivehash implements the HiDrive hashing algorithm which combines SHA-1 hashes hierarchically to a single top-level hash.
//
// Note: This implementation does not grant access to any partial hashes generated.
//
// See: https://developer.hidrive.com/wp-content/uploads/2021/07/HiDrive_Synchronization-v3.3-rev28.pdf
// (link to newest version: https://static.hidrive.com/dev/0001)
package hidrivehash
import (
"bytes"
"crypto/sha1"
"encoding"
"encoding/binary"
"errors"
"fmt"
"hash"
"io"
"github.com/rclone/rclone/backend/hidrive/hidrivehash/internal"
)
const (
// BlockSize of the checksum in bytes.
BlockSize = 4096
// Size of the checksum in bytes.
Size = sha1.Size
// sumsPerLevel is the number of checksums
sumsPerLevel = 256
)
var (
// zeroSum is a special hash consisting of 20 null-bytes.
// This will be the hash of any empty file (or ones containing only null-bytes).
zeroSum = [Size]byte{}
// ErrorInvalidEncoding is returned when a hash should be decoded from a binary form that is invalid.
ErrorInvalidEncoding = errors.New("encoded binary form is invalid for this hash")
// ErrorHashFull is returned when a hash reached its capacity and cannot accept any more input.
ErrorHashFull = errors.New("hash reached its capacity")
)
// writeByBlock writes len(p) bytes from p to the io.Writer in blocks of size blockSize.
// It returns the number of bytes written from p (0 <= n <= len(p))
// and any error encountered that caused the write to stop early.
//
// A pointer bytesInBlock to a counter needs to be supplied,
// that is used to keep track how many bytes have been written to the writer already.
// A pointer onlyNullBytesInBlock to a boolean needs to be supplied,
// that is used to keep track whether the block so far only consists of null-bytes.
// The callback onBlockWritten is called whenever a full block has been written to the writer
// and is given as input the number of bytes that still need to be written.
func writeByBlock(p []byte, writer io.Writer, blockSize uint32, bytesInBlock *uint32, onlyNullBytesInBlock *bool, onBlockWritten func(remaining int) error) (n int, err error) {
total := len(p)
nullBytes := make([]byte, blockSize)
for len(p) > 0 {
toWrite := int(blockSize - *bytesInBlock)
if toWrite > len(p) {
toWrite = len(p)
}
c, err := writer.Write(p[:toWrite])
*bytesInBlock += uint32(c)
*onlyNullBytesInBlock = *onlyNullBytesInBlock && bytes.Equal(nullBytes[:toWrite], p[:toWrite])
// Discard data written through a reslice
p = p[c:]
if err != nil {
return total - len(p), err
}
if *bytesInBlock == blockSize {
err = onBlockWritten(len(p))
if err != nil {
return total - len(p), err
}
*bytesInBlock = 0
*onlyNullBytesInBlock = true
}
}
return total, nil
}
// level is a hash.Hash that is used to aggregate the checksums produced by the level hierarchically beneath it.
// It is used to represent any level-n hash, except for level-0.
type level struct {
checksum [Size]byte // aggregated checksum of this level
sumCount uint32 // number of sums contained in this level so far
bytesInHasher uint32 // number of bytes written into hasher so far
onlyNullBytesInHasher bool // whether the hasher only contains null-bytes so far
hasher hash.Hash
}
// NewLevel returns a new hash.Hash computing any level-n hash, except level-0.
func NewLevel() hash.Hash {
l := &level{}
l.Reset()
return l
}
// Add takes a position-embedded SHA-1 checksum and adds it to the level.
func (l *level) Add(sha1sum []byte) {
var tmp uint
var carry bool
for i := Size - 1; i >= 0; i-- {
tmp = uint(sha1sum[i]) + uint(l.checksum[i])
if carry {
tmp++
}
carry = tmp > 255
l.checksum[i] = byte(tmp)
}
}
// IsFull returns whether the number of checksums added to this level reached its capacity.
func (l *level) IsFull() bool {
return l.sumCount >= sumsPerLevel
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// Contrary to the specification from hash.Hash, this DOES return an error,
// specifically ErrorHashFull if and only if IsFull() returns true.
func (l *level) Write(p []byte) (n int, err error) {
if l.IsFull() {
return 0, ErrorHashFull
}
onBlockWritten := func(remaining int) error {
if !l.onlyNullBytesInHasher {
c, err := l.hasher.Write([]byte{byte(l.sumCount)})
l.bytesInHasher += uint32(c)
if err != nil {
return err
}
l.Add(l.hasher.Sum(nil))
}
l.sumCount++
l.hasher.Reset()
if remaining > 0 && l.IsFull() {
return ErrorHashFull
}
return nil
}
return writeByBlock(p, l.hasher, uint32(l.BlockSize()), &l.bytesInHasher, &l.onlyNullBytesInHasher, onBlockWritten)
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (l *level) Sum(b []byte) []byte {
return append(b, l.checksum[:]...)
}
// Reset resets the Hash to its initial state.
func (l *level) Reset() {
l.checksum = zeroSum // clear the current checksum
l.sumCount = 0
l.bytesInHasher = 0
l.onlyNullBytesInHasher = true
l.hasher = sha1.New()
}
// Size returns the number of bytes Sum will return.
func (l *level) Size() int {
return Size
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (l *level) BlockSize() int {
return Size
}
// MarshalBinary encodes the hash into a binary form and returns the result.
func (l *level) MarshalBinary() ([]byte, error) {
b := make([]byte, Size+4+4+1)
copy(b, l.checksum[:])
binary.BigEndian.PutUint32(b[Size:], l.sumCount)
binary.BigEndian.PutUint32(b[Size+4:], l.bytesInHasher)
if l.onlyNullBytesInHasher {
b[Size+4+4] = 1
}
encodedHasher, err := l.hasher.(encoding.BinaryMarshaler).MarshalBinary()
if err != nil {
return nil, err
}
b = append(b, encodedHasher...)
return b, nil
}
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
// The hash will replace its internal state accordingly.
func (l *level) UnmarshalBinary(b []byte) error {
if len(b) < Size+4+4+1 {
return ErrorInvalidEncoding
}
copy(l.checksum[:], b)
l.sumCount = binary.BigEndian.Uint32(b[Size:])
l.bytesInHasher = binary.BigEndian.Uint32(b[Size+4:])
switch b[Size+4+4] {
case 0:
l.onlyNullBytesInHasher = false
case 1:
l.onlyNullBytesInHasher = true
default:
return ErrorInvalidEncoding
}
err := l.hasher.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[Size+4+4+1:])
return err
}
// hidriveHash is the hash computing the actual checksum used by HiDrive by combining multiple level-hashes.
type hidriveHash struct {
levels []*level // collection of level-hashes, one for each level starting at level-1
lastSumWritten [Size]byte // the last checksum written to any of the levels
bytesInBlock uint32 // bytes written into blockHash so far
onlyNullBytesInBlock bool // whether the hasher only contains null-bytes so far
blockHash hash.Hash
}
// New returns a new hash.Hash computing the HiDrive checksum.
func New() hash.Hash {
h := &hidriveHash{}
h.Reset()
return h
}
// aggregateToLevel writes the checksum to the level at the given index
// and if necessary propagates any changes to levels above.
func (h *hidriveHash) aggregateToLevel(index int, sum []byte) {
for i := index; ; i++ {
if i >= len(h.levels) {
h.levels = append(h.levels, NewLevel().(*level))
}
_, err := h.levels[i].Write(sum)
copy(h.lastSumWritten[:], sum)
if err != nil {
panic(fmt.Errorf("level-hash should not have produced an error: %w", err))
}
if !h.levels[i].IsFull() {
break
}
sum = h.levels[i].Sum(nil)
h.levels[i].Reset()
}
}
// Write (via the embedded io.Writer interface) adds more data to the running hash.
// It never returns an error.
func (h *hidriveHash) Write(p []byte) (n int, err error) {
onBlockWritten := func(remaining int) error {
var sum []byte
if h.onlyNullBytesInBlock {
sum = zeroSum[:]
} else {
sum = h.blockHash.Sum(nil)
}
h.blockHash.Reset()
h.aggregateToLevel(0, sum)
return nil
}
return writeByBlock(p, h.blockHash, uint32(BlockSize), &h.bytesInBlock, &h.onlyNullBytesInBlock, onBlockWritten)
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (h *hidriveHash) Sum(b []byte) []byte {
// Save internal state.
state, err := h.MarshalBinary()
if err != nil {
panic(fmt.Errorf("saving the internal state should not have produced an error: %w", err))
}
if h.bytesInBlock > 0 {
// Fill remainder of block with null-bytes.
filler := make([]byte, h.BlockSize()-int(h.bytesInBlock))
_, err = h.Write(filler)
if err != nil {
panic(fmt.Errorf("filling with null-bytes should not have an error: %w", err))
}
}
checksum := zeroSum
for i := 0; i < len(h.levels); i++ {
level := h.levels[i]
if i < len(h.levels)-1 {
// Aggregate non-empty non-final levels.
if level.sumCount >= 1 {
h.aggregateToLevel(i+1, level.Sum(nil))
level.Reset()
}
} else {
// Determine sum of final level.
if level.sumCount > 1 {
copy(checksum[:], level.Sum(nil))
} else {
// This is needed, otherwise there is no way to return
// the non-position-embedded checksum.
checksum = h.lastSumWritten
}
}
}
// Restore internal state.
err = h.UnmarshalBinary(state)
if err != nil {
panic(fmt.Errorf("restoring the internal state should not have produced an error: %w", err))
}
return append(b, checksum[:]...)
}
// Reset resets the Hash to its initial state.
func (h *hidriveHash) Reset() {
h.levels = nil
h.lastSumWritten = zeroSum // clear the last written checksum
h.bytesInBlock = 0
h.onlyNullBytesInBlock = true
h.blockHash = sha1.New()
}
// Size returns the number of bytes Sum will return.
func (h *hidriveHash) Size() int {
return Size
}
// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (h *hidriveHash) BlockSize() int {
return BlockSize
}
// MarshalBinary encodes the hash into a binary form and returns the result.
func (h *hidriveHash) MarshalBinary() ([]byte, error) {
b := make([]byte, Size+4+1+8)
copy(b, h.lastSumWritten[:])
binary.BigEndian.PutUint32(b[Size:], h.bytesInBlock)
if h.onlyNullBytesInBlock {
b[Size+4] = 1
}
binary.BigEndian.PutUint64(b[Size+4+1:], uint64(len(h.levels)))
for _, level := range h.levels {
encodedLevel, err := level.MarshalBinary()
if err != nil {
return nil, err
}
encodedLength := make([]byte, 8)
binary.BigEndian.PutUint64(encodedLength, uint64(len(encodedLevel)))
b = append(b, encodedLength...)
b = append(b, encodedLevel...)
}
encodedBlockHash, err := h.blockHash.(encoding.BinaryMarshaler).MarshalBinary()
if err != nil {
return nil, err
}
b = append(b, encodedBlockHash...)
return b, nil
}
// UnmarshalBinary decodes the binary form generated by MarshalBinary.
// The hash will replace its internal state accordingly.
func (h *hidriveHash) UnmarshalBinary(b []byte) error {
if len(b) < Size+4+1+8 {
return ErrorInvalidEncoding
}
copy(h.lastSumWritten[:], b)
h.bytesInBlock = binary.BigEndian.Uint32(b[Size:])
switch b[Size+4] {
case 0:
h.onlyNullBytesInBlock = false
case 1:
h.onlyNullBytesInBlock = true
default:
return ErrorInvalidEncoding
}
amount := binary.BigEndian.Uint64(b[Size+4+1:])
h.levels = make([]*level, int(amount))
offset := Size + 4 + 1 + 8
for i := range h.levels {
length := int(binary.BigEndian.Uint64(b[offset:]))
offset += 8
h.levels[i] = NewLevel().(*level)
err := h.levels[i].UnmarshalBinary(b[offset : offset+length])
if err != nil {
return err
}
offset += length
}
err := h.blockHash.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[offset:])
return err
}
// Sum returns the HiDrive checksum of the data.
func Sum(data []byte) [Size]byte {
h := New().(*hidriveHash)
_, _ = h.Write(data)
var result [Size]byte
copy(result[:], h.Sum(nil))
return result
}
// Check the interfaces are satisfied.
var (
_ hash.Hash = (*level)(nil)
_ encoding.BinaryMarshaler = (*level)(nil)
_ encoding.BinaryUnmarshaler = (*level)(nil)
_ internal.LevelHash = (*level)(nil)
_ hash.Hash = (*hidriveHash)(nil)
_ encoding.BinaryMarshaler = (*hidriveHash)(nil)
_ encoding.BinaryUnmarshaler = (*hidriveHash)(nil)
)

View File

@@ -0,0 +1,395 @@
package hidrivehash_test
import (
"crypto/sha1"
"encoding"
"encoding/hex"
"fmt"
"io"
"testing"
"github.com/rclone/rclone/backend/hidrive/hidrivehash"
"github.com/rclone/rclone/backend/hidrive/hidrivehash/internal"
"github.com/stretchr/testify/assert"
)
// helper functions to set up test-tables
func sha1ArrayAsSlice(sum [sha1.Size]byte) []byte {
return sum[:]
}
func mustDecode(hexstring string) []byte {
result, err := hex.DecodeString(hexstring)
if err != nil {
panic(err)
}
return result
}
// ------------------------------------------------------------
var testTableLevelPositionEmbedded = []struct {
ins [][]byte
outs [][]byte
name string
}{
{
[][]byte{
sha1ArrayAsSlice([20]byte{245, 202, 195, 223, 121, 198, 189, 112, 138, 202, 222, 2, 146, 156, 127, 16, 208, 233, 98, 88}),
sha1ArrayAsSlice([20]byte{78, 188, 156, 219, 173, 54, 81, 55, 47, 220, 222, 207, 201, 21, 57, 252, 255, 239, 251, 186}),
},
[][]byte{
sha1ArrayAsSlice([20]byte{245, 202, 195, 223, 121, 198, 189, 112, 138, 202, 222, 2, 146, 156, 127, 16, 208, 233, 98, 88}),
sha1ArrayAsSlice([20]byte{68, 135, 96, 187, 38, 253, 14, 167, 186, 167, 188, 210, 91, 177, 185, 13, 208, 217, 94, 18}),
},
"documentation-v3.2rev27-example L0 (position-embedded)",
},
{
[][]byte{
sha1ArrayAsSlice([20]byte{68, 254, 92, 166, 52, 37, 104, 180, 22, 123, 249, 144, 182, 78, 64, 74, 57, 117, 225, 195}),
sha1ArrayAsSlice([20]byte{75, 211, 153, 190, 125, 179, 67, 49, 60, 149, 98, 246, 142, 20, 11, 254, 159, 162, 129, 237}),
sha1ArrayAsSlice([20]byte{150, 2, 9, 153, 97, 153, 189, 104, 147, 14, 77, 203, 244, 243, 25, 212, 67, 48, 111, 107}),
},
[][]byte{
sha1ArrayAsSlice([20]byte{68, 254, 92, 166, 52, 37, 104, 180, 22, 123, 249, 144, 182, 78, 64, 74, 57, 117, 225, 195}),
sha1ArrayAsSlice([20]byte{144, 209, 246, 100, 177, 216, 171, 229, 83, 17, 92, 135, 68, 98, 76, 72, 217, 24, 99, 176}),
sha1ArrayAsSlice([20]byte{38, 211, 255, 254, 19, 114, 105, 77, 230, 31, 170, 83, 57, 85, 102, 29, 28, 72, 211, 27}),
},
"documentation-example L0 (position-embedded)",
},
{
[][]byte{
sha1ArrayAsSlice([20]byte{173, 123, 132, 245, 176, 172, 43, 183, 121, 40, 66, 252, 101, 249, 188, 193, 160, 189, 2, 116}),
sha1ArrayAsSlice([20]byte{40, 34, 8, 238, 37, 5, 237, 184, 79, 105, 10, 167, 171, 254, 13, 229, 132, 112, 254, 8}),
sha1ArrayAsSlice([20]byte{39, 112, 26, 86, 190, 35, 100, 101, 28, 131, 122, 191, 254, 144, 239, 107, 253, 124, 104, 203}),
},
[][]byte{
sha1ArrayAsSlice([20]byte{173, 123, 132, 245, 176, 172, 43, 183, 121, 40, 66, 252, 101, 249, 188, 193, 160, 189, 2, 116}),
sha1ArrayAsSlice([20]byte{213, 157, 141, 227, 213, 178, 25, 111, 200, 145, 77, 164, 17, 247, 202, 167, 37, 46, 0, 124}),
sha1ArrayAsSlice([20]byte{253, 13, 168, 58, 147, 213, 125, 212, 229, 20, 200, 100, 16, 136, 186, 19, 34, 170, 105, 71}),
},
"documentation-example L1 (position-embedded)",
},
}
var testTableLevel = []struct {
ins [][]byte
outs [][]byte
name string
}{
{
[][]byte{
mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"),
mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"),
mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"),
},
[][]byte{
mustDecode("44fe5ca6342568b4167bf990b64e404a3975e1c3"),
mustDecode("90d1f664b1d8abe553115c8744624c48d91863b0"),
mustDecode("26d3fffe1372694de61faa533955661d1c48d31b"),
},
"documentation-example L0",
},
{
[][]byte{
mustDecode("75a9f88fb219ef1dd31adf41c93e2efaac8d0245"),
mustDecode("daedc425199501b1e86b5eaba5649cbde205e6ae"),
mustDecode("286ac5283f99c4e0f11683900a3e39661c375dd6"),
},
[][]byte{
mustDecode("ad7b84f5b0ac2bb7792842fc65f9bcc1a0bd0274"),
mustDecode("d59d8de3d5b2196fc8914da411f7caa7252e007c"),
mustDecode("fd0da83a93d57dd4e514c8641088ba1322aa6947"),
},
"documentation-example L1",
},
{
[][]byte{
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("75a9f88fb219ef1dd31adf41c93e2efaac8d0245"),
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("daedc425199501b1e86b5eaba5649cbde205e6ae"),
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("286ac5283f99c4e0f11683900a3e39661c375dd6"),
mustDecode("0000000000000000000000000000000000000000"),
},
[][]byte{
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("0000000000000000000000000000000000000000"),
mustDecode("a197464ec19f2b2b2bc6b21f6c939c7e57772843"),
mustDecode("a197464ec19f2b2b2bc6b21f6c939c7e57772843"),
mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"),
mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"),
mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"),
mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"),
mustDecode("8f56351897b4e1d100646fa122c924347721b2f5"),
mustDecode("8f56351897b4e1d100646fa122c924347721b2f5"),
},
"mixed-with-empties",
},
}
var testTable = []struct {
data []byte
// pattern describes how to use data to construct the hash-input.
// For every entry n at even indices this repeats the data n times.
// For every entry m at odd indices this repeats a null-byte m times.
// The input-data is constructed by concatinating the results in order.
pattern []int64
out []byte
name string
}{
{
[]byte("#ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz\n"),
[]int64{64},
mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"),
"documentation-example L0",
},
{
[]byte("#ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz\n"),
[]int64{64 * 256},
mustDecode("75a9f88fb219ef1dd31adf41c93e2efaac8d0245"),
"documentation-example L1",
},
{
[]byte("#ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz\n"),
[]int64{64 * 256, 0, 64 * 128, 4096 * 128, 64*2 + 32},
mustDecode("fd0da83a93d57dd4e514c8641088ba1322aa6947"),
"documentation-example L2",
},
{
[]byte("hello rclone\n"),
[]int64{316},
mustDecode("72370f9c18a2c20b31d71f3f4cee7a3cd2703737"),
"not-block-aligned",
},
{
[]byte("hello rclone\n"),
[]int64{13, 4096 * 3, 4},
mustDecode("a6990b81791f0d2db750b38f046df321c975aa60"),
"not-block-aligned-with-null-bytes",
},
{
[]byte{},
[]int64{},
mustDecode("0000000000000000000000000000000000000000"),
"empty",
},
{
[]byte{},
[]int64{0, 4096 * 256 * 256},
mustDecode("0000000000000000000000000000000000000000"),
"null-bytes",
},
}
// ------------------------------------------------------------
func TestLevelAdd(t *testing.T) {
for _, test := range testTableLevelPositionEmbedded {
l := hidrivehash.NewLevel().(internal.LevelHash)
t.Run(test.name, func(t *testing.T) {
for i := range test.ins {
l.Add(test.ins[i])
assert.Equal(t, test.outs[i], l.Sum(nil))
}
})
}
}
func TestLevelWrite(t *testing.T) {
for _, test := range testTableLevel {
l := hidrivehash.NewLevel()
t.Run(test.name, func(t *testing.T) {
for i := range test.ins {
l.Write(test.ins[i])
assert.Equal(t, test.outs[i], l.Sum(nil))
}
})
}
}
func TestLevelIsFull(t *testing.T) {
content := [hidrivehash.Size]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}
l := hidrivehash.NewLevel()
for i := 0; i < 256; i++ {
assert.False(t, l.(internal.LevelHash).IsFull())
written, err := l.Write(content[:])
assert.Equal(t, len(content), written)
if !assert.NoError(t, err) {
t.FailNow()
}
}
assert.True(t, l.(internal.LevelHash).IsFull())
written, err := l.Write(content[:])
assert.True(t, l.(internal.LevelHash).IsFull())
assert.Equal(t, 0, written)
assert.ErrorIs(t, err, hidrivehash.ErrorHashFull)
}
func TestLevelReset(t *testing.T) {
l := hidrivehash.NewLevel()
zeroHash := l.Sum(nil)
_, err := l.Write([]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19})
if assert.NoError(t, err) {
assert.NotEqual(t, zeroHash, l.Sum(nil))
l.Reset()
assert.Equal(t, zeroHash, l.Sum(nil))
}
}
func TestLevelSize(t *testing.T) {
l := hidrivehash.NewLevel()
assert.Equal(t, 20, l.Size())
}
func TestLevelBlockSize(t *testing.T) {
l := hidrivehash.NewLevel()
assert.Equal(t, 20, l.BlockSize())
}
func TestLevelBinaryMarshaler(t *testing.T) {
content := []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}
l := hidrivehash.NewLevel().(internal.LevelHash)
l.Write(content[:10])
encoded, err := l.MarshalBinary()
if assert.NoError(t, err) {
d := hidrivehash.NewLevel().(internal.LevelHash)
err = d.UnmarshalBinary(encoded)
if assert.NoError(t, err) {
assert.Equal(t, l.Sum(nil), d.Sum(nil))
l.Write(content[10:])
d.Write(content[10:])
assert.Equal(t, l.Sum(nil), d.Sum(nil))
}
}
}
func TestLevelInvalidEncoding(t *testing.T) {
l := hidrivehash.NewLevel().(internal.LevelHash)
err := l.UnmarshalBinary([]byte{})
assert.ErrorIs(t, err, hidrivehash.ErrorInvalidEncoding)
}
// ------------------------------------------------------------
type infiniteReader struct {
source []byte
offset int
}
func (m *infiniteReader) Read(b []byte) (int, error) {
count := copy(b, m.source[m.offset:])
m.offset += count
m.offset %= len(m.source)
return count, nil
}
func writeInChunks(writer io.Writer, chunkSize int64, data []byte, pattern []int64) error {
readers := make([]io.Reader, len(pattern))
nullBytes := [4096]byte{}
for i, n := range pattern {
if i%2 == 0 {
readers[i] = io.LimitReader(&infiniteReader{data, 0}, n*int64(len(data)))
} else {
readers[i] = io.LimitReader(&infiniteReader{nullBytes[:], 0}, n)
}
}
reader := io.MultiReader(readers...)
for {
_, err := io.CopyN(writer, reader, chunkSize)
if err != nil {
if err == io.EOF {
err = nil
}
return err
}
}
}
func TestWrite(t *testing.T) {
for _, test := range testTable {
t.Run(test.name, func(t *testing.T) {
h := hidrivehash.New()
err := writeInChunks(h, int64(h.BlockSize()), test.data, test.pattern)
if assert.NoError(t, err) {
normalSum := h.Sum(nil)
assert.Equal(t, test.out, normalSum)
// Test if different block-sizes produce differing results.
for _, blockSize := range []int64{397, 512, 4091, 8192, 10000} {
t.Run(fmt.Sprintf("block-size %v", blockSize), func(t *testing.T) {
h := hidrivehash.New()
err := writeInChunks(h, blockSize, test.data, test.pattern)
if assert.NoError(t, err) {
assert.Equal(t, normalSum, h.Sum(nil))
}
})
}
}
})
}
}
func TestReset(t *testing.T) {
h := hidrivehash.New()
zeroHash := h.Sum(nil)
_, err := h.Write([]byte{1})
if assert.NoError(t, err) {
assert.NotEqual(t, zeroHash, h.Sum(nil))
h.Reset()
assert.Equal(t, zeroHash, h.Sum(nil))
}
}
func TestSize(t *testing.T) {
h := hidrivehash.New()
assert.Equal(t, 20, h.Size())
}
func TestBlockSize(t *testing.T) {
h := hidrivehash.New()
assert.Equal(t, 4096, h.BlockSize())
}
func TestBinaryMarshaler(t *testing.T) {
for _, test := range testTable {
h := hidrivehash.New()
d := hidrivehash.New()
half := len(test.pattern) / 2
t.Run(test.name, func(t *testing.T) {
err := writeInChunks(h, int64(h.BlockSize()), test.data, test.pattern[:half])
assert.NoError(t, err)
encoded, err := h.(encoding.BinaryMarshaler).MarshalBinary()
if assert.NoError(t, err) {
err = d.(encoding.BinaryUnmarshaler).UnmarshalBinary(encoded)
if assert.NoError(t, err) {
assert.Equal(t, h.Sum(nil), d.Sum(nil))
err = writeInChunks(h, int64(h.BlockSize()), test.data, test.pattern[half:])
assert.NoError(t, err)
err = writeInChunks(d, int64(d.BlockSize()), test.data, test.pattern[half:])
assert.NoError(t, err)
assert.Equal(t, h.Sum(nil), d.Sum(nil))
}
}
})
}
}
func TestInvalidEncoding(t *testing.T) {
h := hidrivehash.New()
err := h.(encoding.BinaryUnmarshaler).UnmarshalBinary([]byte{})
assert.ErrorIs(t, err, hidrivehash.ErrorInvalidEncoding)
}
func TestSum(t *testing.T) {
assert.Equal(t, [hidrivehash.Size]byte{}, hidrivehash.Sum([]byte{}))
content := []byte{1}
h := hidrivehash.New()
h.Write(content)
sum := hidrivehash.Sum(content)
assert.Equal(t, h.Sum(nil), sum[:])
}

View File

@@ -0,0 +1,17 @@
package internal
import (
"encoding"
"hash"
)
// LevelHash is an internal interface for level-hashes.
type LevelHash interface {
encoding.BinaryMarshaler
encoding.BinaryUnmarshaler
hash.Hash
// Add takes a position-embedded checksum and adds it to the level.
Add(sum []byte)
// IsFull returns whether the number of checksums added to this level reached its capacity.
IsFull() bool
}