Use md5server pool as each one is good for 16 tasks, use channels for md5/sha256 hashers for better reuse

This commit is contained in:
DataHoarder 2021-12-07 14:50:34 +01:00
parent b2bd300ba2
commit b951d638c0

View file

@ -7,6 +7,7 @@ import (
"fmt"
"github.com/minio/md5-simd"
"github.com/minio/sha256-simd"
"hash"
"io"
"math"
"os"
@ -22,33 +23,25 @@ type HashFileResult struct {
md5 string
}
func HashFile(results chan<- HashFileResult, jobs chan string, path string, md5server *md5simd.Server, taskCount *int64) {
defer atomic.AddInt64(taskCount, -1)
func HashFile(results chan<- HashFileResult, md5hasher *md5simd.Hasher, sha256hasher *hash.Hash, path string) {
fh, err := os.Open(path)
if err != nil {
results <- HashFileResult{
error: err,
path: path,
}
<-jobs
return
}
defer fh.Close()
sha256sum := sha256.New()
md5sum := (*md5server).NewHash()
defer md5sum.Close()
io.Copy(io.MultiWriter(sha256sum, md5sum), fh)
io.Copy(io.MultiWriter(*sha256hasher, *md5hasher), fh)
results <- HashFileResult{
error: nil,
path: path,
sha256: hex.EncodeToString(sha256sum.Sum(nil)),
md5: hex.EncodeToString(md5sum.Sum(nil)),
sha256: hex.EncodeToString((*sha256hasher).Sum(nil)),
md5: hex.EncodeToString((*md5hasher).Sum(nil)),
}
<-jobs
}
func PrintHashFileResult(result *HashFileResult) {
@ -69,13 +62,24 @@ func main() {
scanner := bufio.NewScanner(os.Stdin)
defer os.Stdin.Close()
md5server := md5simd.NewServer()
defer md5server.Close()
var md5servers []md5simd.Server
md5hashers := make(chan md5simd.Hasher, *taskLimit)
sha256hashers := make(chan hash.Hash, *taskLimit)
for j := 0; j < *taskLimit; j++ {
serverIndex := j / 16
if (serverIndex + 1) > len(md5servers) {
md5servers = append(md5servers, md5simd.NewServer())
}
hasher := md5servers[serverIndex].NewHash()
md5hashers <- hasher
sha256hashers <- sha256.New()
}
resultChannel := make(chan HashFileResult)
jobs := make(chan string, *taskLimit)
atomic.AddInt64(&taskCount, 1)
go func() {
defer atomic.AddInt64(&taskCount, -1)
@ -84,9 +88,17 @@ func main() {
path := strings.TrimRight(text, "\n\r")
jobs <- path
atomic.AddInt64(&taskCount, 1)
go HashFile(resultChannel, jobs, path, &md5server, &taskCount)
go func() {
defer atomic.AddInt64(&taskCount, -1)
md5hasher := <-md5hashers
sha256hasher := <-sha256hashers
md5hasher.Reset()
sha256hasher.Reset()
HashFile(resultChannel, &md5hasher, &sha256hasher, path)
md5hashers <- md5hasher
sha256hashers <- sha256hasher
}()
}
}()
@ -102,4 +114,15 @@ func main() {
for result := range resultChannel {
PrintHashFileResult(&result)
}
close(md5hashers)
close(sha256hashers)
for md5hasher := range md5hashers {
md5hasher.Close()
}
for _, md5server := range md5servers {
md5server.Close()
}
}