mirror of
https://github.com/rclone/rclone.git
synced 2025-12-11 22:14:05 +01:00
cmd: add hashSUM file support (#5352)
Currently rclone check supports matching two file trees by sizes and hashes. This change adds support for SUM files produced by GNU utilities like sha1sum. Fixes #1005 Note: checksum by default checks, hashsum by default prints sums. New flag is named "--checkfile" but carries hash name. Summary of introduced command forms: ``` rclone check sums.sha1 remote:path --checkfile sha1 rclone checksum sha1 sums.sha1 remote:path rclone hashsum sha1 remote:path --checkfile sums.sha1 rclone sha1sum remote:path --checkfile sums.sha1 rclone md5sum remote:path --checkfile sums.md5 ```
This commit is contained in:
@@ -1,16 +1,19 @@
|
||||
package operations
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/accounting"
|
||||
"github.com/rclone/rclone/fs/filter"
|
||||
"github.com/rclone/rclone/fs/fserrors"
|
||||
"github.com/rclone/rclone/fs/hash"
|
||||
"github.com/rclone/rclone/fs/march"
|
||||
@@ -56,15 +59,15 @@ type checkMarch struct {
|
||||
|
||||
// report outputs the fileName to out if required and to the combined log
|
||||
func (c *checkMarch) report(o fs.DirEntry, out io.Writer, sigil rune) {
|
||||
c.reportFilename(o.String(), out, sigil)
|
||||
}
|
||||
|
||||
func (c *checkMarch) reportFilename(filename string, out io.Writer, sigil rune) {
|
||||
if out != nil {
|
||||
c.ioMu.Lock()
|
||||
_, _ = fmt.Fprintf(out, "%v\n", o)
|
||||
c.ioMu.Unlock()
|
||||
syncFprintf(out, "%s\n", filename)
|
||||
}
|
||||
if c.opt.Combined != nil {
|
||||
c.ioMu.Lock()
|
||||
_, _ = fmt.Fprintf(c.opt.Combined, "%c %v\n", sigil, o)
|
||||
c.ioMu.Unlock()
|
||||
syncFprintf(c.opt.Combined, "%c %s\n", sigil, filename)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -224,11 +227,19 @@ func CheckFn(ctx context.Context, opt *CheckOpt) error {
|
||||
err := m.Run(ctx)
|
||||
c.wg.Wait() // wait for background go-routines
|
||||
|
||||
return c.reportResults(ctx, err)
|
||||
}
|
||||
|
||||
func (c *checkMarch) reportResults(ctx context.Context, err error) error {
|
||||
if c.dstFilesMissing > 0 {
|
||||
fs.Logf(c.opt.Fdst, "%d files missing", c.dstFilesMissing)
|
||||
}
|
||||
if c.srcFilesMissing > 0 {
|
||||
fs.Logf(c.opt.Fsrc, "%d files missing", c.srcFilesMissing)
|
||||
entity := "files"
|
||||
if c.opt.Fsrc == nil {
|
||||
entity = "hashes"
|
||||
}
|
||||
fs.Logf(c.opt.Fsrc, "%d %s missing", c.srcFilesMissing, entity)
|
||||
}
|
||||
|
||||
fs.Logf(c.opt.Fdst, "%d differences found", accounting.Stats(ctx).GetErrors())
|
||||
@@ -358,3 +369,210 @@ func CheckDownload(ctx context.Context, opt *CheckOpt) error {
|
||||
}
|
||||
return CheckFn(ctx, &optCopy)
|
||||
}
|
||||
|
||||
// CheckSum checks filesystem hashes against a SUM file
|
||||
func CheckSum(ctx context.Context, fsrc, fsum fs.Fs, sumFile string, hashType hash.Type, opt *CheckOpt, download bool) error {
|
||||
var options CheckOpt
|
||||
if opt != nil {
|
||||
options = *opt
|
||||
} else {
|
||||
// default options for hashsum -c
|
||||
options.Combined = os.Stdout
|
||||
}
|
||||
// CheckSum treats Fsrc and Fdst specially:
|
||||
options.Fsrc = nil // no file system here, corresponds to the sum list
|
||||
options.Fdst = fsrc // denotes the file system to check
|
||||
opt = &options // override supplied argument
|
||||
|
||||
if !download && (hashType == hash.None || !opt.Fdst.Hashes().Contains(hashType)) {
|
||||
return errors.Errorf("%s: hash type is not supported by file system: %s", hashType, opt.Fdst)
|
||||
}
|
||||
|
||||
if sumFile == "" {
|
||||
return errors.Errorf("not a sum file: %s", fsum)
|
||||
}
|
||||
sumObj, err := fsum.NewObject(ctx, sumFile)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot open sum file")
|
||||
}
|
||||
hashes, err := ParseSumFile(ctx, sumObj)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse sum file")
|
||||
}
|
||||
|
||||
ci := fs.GetConfig(ctx)
|
||||
c := &checkMarch{
|
||||
tokens: make(chan struct{}, ci.Checkers),
|
||||
opt: *opt,
|
||||
}
|
||||
lastErr := ListFn(ctx, opt.Fdst, func(obj fs.Object) {
|
||||
c.checkSum(ctx, obj, download, hashes, hashType)
|
||||
})
|
||||
c.wg.Wait() // wait for background go-routines
|
||||
|
||||
// make census of unhandled sums
|
||||
fi := filter.GetConfig(ctx)
|
||||
for filename, hash := range hashes {
|
||||
if hash == "" { // the sum has been successfully consumed
|
||||
continue
|
||||
}
|
||||
if !fi.IncludeRemote(filename) { // the file was filtered out
|
||||
continue
|
||||
}
|
||||
// filesystem missed the file, sum wasn't consumed
|
||||
err := errors.Errorf("File not in %v", opt.Fdst)
|
||||
fs.Errorf(filename, "%v", err)
|
||||
_ = fs.CountError(err)
|
||||
if lastErr == nil {
|
||||
lastErr = err
|
||||
}
|
||||
atomic.AddInt32(&c.dstFilesMissing, 1)
|
||||
c.reportFilename(filename, opt.MissingOnDst, '+')
|
||||
}
|
||||
|
||||
return c.reportResults(ctx, lastErr)
|
||||
}
|
||||
|
||||
// checkSum checks single object against golden hashes
|
||||
func (c *checkMarch) checkSum(ctx context.Context, obj fs.Object, download bool, hashes HashSums, hashType hash.Type) {
|
||||
remote := obj.Remote()
|
||||
c.ioMu.Lock()
|
||||
sumHash, sumFound := hashes[remote]
|
||||
hashes[remote] = "" // mark sum as consumed
|
||||
c.ioMu.Unlock()
|
||||
|
||||
if !sumFound && c.opt.OneWay {
|
||||
return
|
||||
}
|
||||
|
||||
var err error
|
||||
tr := accounting.Stats(ctx).NewCheckingTransfer(obj)
|
||||
defer tr.Done(ctx, err)
|
||||
|
||||
if !sumFound {
|
||||
err = errors.New("sum not found")
|
||||
_ = fs.CountError(err)
|
||||
fs.Errorf(obj, "%v", err)
|
||||
atomic.AddInt32(&c.differences, 1)
|
||||
atomic.AddInt32(&c.srcFilesMissing, 1)
|
||||
c.report(obj, c.opt.MissingOnSrc, '-')
|
||||
return
|
||||
}
|
||||
|
||||
if !download {
|
||||
var objHash string
|
||||
objHash, err = obj.Hash(ctx, hashType)
|
||||
c.matchSum(ctx, sumHash, objHash, obj, err, hashType)
|
||||
return
|
||||
}
|
||||
|
||||
c.wg.Add(1)
|
||||
c.tokens <- struct{}{} // put a token to limit concurrency
|
||||
go func() {
|
||||
var (
|
||||
objHash string
|
||||
err error
|
||||
in io.ReadCloser
|
||||
)
|
||||
defer func() {
|
||||
c.matchSum(ctx, sumHash, objHash, obj, err, hashType)
|
||||
<-c.tokens // get the token back to free up a slot
|
||||
c.wg.Done()
|
||||
}()
|
||||
if in, err = obj.Open(ctx); err != nil {
|
||||
return
|
||||
}
|
||||
tr := accounting.Stats(ctx).NewTransfer(obj)
|
||||
in = tr.Account(ctx, in).WithBuffer() // account and buffer the transfer
|
||||
defer func() {
|
||||
tr.Done(ctx, nil) // will close the stream
|
||||
}()
|
||||
hashVals, err2 := hash.StreamTypes(in, hash.NewHashSet(hashType))
|
||||
if err2 != nil {
|
||||
err = err2 // pass to matchSum
|
||||
return
|
||||
}
|
||||
objHash = hashVals[hashType]
|
||||
}()
|
||||
}
|
||||
|
||||
// matchSum sums up the results of hashsum matching for an object
|
||||
func (c *checkMarch) matchSum(ctx context.Context, sumHash, objHash string, obj fs.Object, err error, hashType hash.Type) {
|
||||
switch {
|
||||
case err != nil:
|
||||
_ = fs.CountError(err)
|
||||
fs.Errorf(obj, "Failed to calculate hash: %v", err)
|
||||
c.report(obj, c.opt.Error, '!')
|
||||
case sumHash == "":
|
||||
err = errors.New("duplicate file")
|
||||
_ = fs.CountError(err)
|
||||
fs.Errorf(obj, "%v", err)
|
||||
c.report(obj, c.opt.Error, '!')
|
||||
case objHash == "":
|
||||
fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash)
|
||||
fs.Debugf(obj, "%v - could not check hash (%v)", hashType, c.opt.Fdst)
|
||||
atomic.AddInt32(&c.noHashes, 1)
|
||||
atomic.AddInt32(&c.matches, 1)
|
||||
c.report(obj, c.opt.Match, '=')
|
||||
case objHash == sumHash:
|
||||
fs.Debugf(obj, "%v = %s OK", hashType, sumHash)
|
||||
atomic.AddInt32(&c.matches, 1)
|
||||
c.report(obj, c.opt.Match, '=')
|
||||
default:
|
||||
err = errors.New("files differ")
|
||||
_ = fs.CountError(err)
|
||||
fs.Debugf(nil, "%v = %s (sum)", hashType, sumHash)
|
||||
fs.Debugf(obj, "%v = %s (%v)", hashType, objHash, c.opt.Fdst)
|
||||
fs.Errorf(obj, "%v", err)
|
||||
atomic.AddInt32(&c.differences, 1)
|
||||
c.report(obj, c.opt.Differ, '*')
|
||||
}
|
||||
}
|
||||
|
||||
// HashSums represents a parsed SUM file
|
||||
type HashSums map[string]string
|
||||
|
||||
// ParseSumFile parses a hash SUM file and returns hashes as a map
|
||||
func ParseSumFile(ctx context.Context, sumFile fs.Object) (HashSums, error) {
|
||||
rd, err := sumFile.Open(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
parser := bufio.NewReader(rd)
|
||||
|
||||
const maxWarn = 4
|
||||
numWarn := 0
|
||||
|
||||
re := regexp.MustCompile(`^([^ ]+) [ *](.+)$`)
|
||||
hashes := HashSums{}
|
||||
for lineNo := 0; true; lineNo++ {
|
||||
lineBytes, _, err := parser.ReadLine()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
line := string(lineBytes)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if fields := re.FindStringSubmatch(line); fields != nil {
|
||||
hashes[fields[2]] = fields[1]
|
||||
continue
|
||||
}
|
||||
|
||||
numWarn++
|
||||
if numWarn < maxWarn {
|
||||
fs.Logf(sumFile, "improperly formatted checksum line %d", lineNo)
|
||||
} else if numWarn == maxWarn {
|
||||
fs.Logf(sumFile, "more warnings suppressed...")
|
||||
}
|
||||
}
|
||||
|
||||
if err = rd.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user