2021-12-07 12:38:54 +00:00
package main
import (
"bufio"
"encoding/hex"
"flag"
"fmt"
"github.com/minio/md5-simd"
"github.com/minio/sha256-simd"
2021-12-07 13:50:34 +00:00
"hash"
2021-12-07 12:38:54 +00:00
"io"
"math"
"os"
"runtime"
"strings"
"sync/atomic"
)
type HashFileResult struct {
error error
path string
sha256 string
md5 string
}
2021-12-07 13:50:34 +00:00
func HashFile ( results chan <- HashFileResult , md5hasher * md5simd . Hasher , sha256hasher * hash . Hash , path string ) {
2021-12-07 12:38:54 +00:00
fh , err := os . Open ( path )
if err != nil {
results <- HashFileResult {
error : err ,
path : path ,
}
return
}
defer fh . Close ( )
2021-12-07 13:50:34 +00:00
io . Copy ( io . MultiWriter ( * sha256hasher , * md5hasher ) , fh )
2021-12-07 12:38:54 +00:00
results <- HashFileResult {
error : nil ,
path : path ,
2021-12-07 13:50:34 +00:00
sha256 : hex . EncodeToString ( ( * sha256hasher ) . Sum ( nil ) ) ,
md5 : hex . EncodeToString ( ( * md5hasher ) . Sum ( nil ) ) ,
2021-12-07 12:38:54 +00:00
}
}
func PrintHashFileResult ( result * HashFileResult ) {
if result . error != nil {
fmt . Fprintln ( os . Stderr , result . path , "error: " , result . error )
} else {
fmt . Println ( result . sha256 , result . md5 , result . path )
}
}
func main ( ) {
taskLimit := flag . Int ( "tasklimit" , int ( math . Ceil ( float64 ( runtime . NumCPU ( ) ) * 1.5 ) ) , "Maximum number of concurrent hashing tasks. Change to avoid fdlimit issues. Defaults to number of CPU cores * 1.5" )
flag . Parse ( )
var taskCount int64
scanner := bufio . NewScanner ( os . Stdin )
defer os . Stdin . Close ( )
2021-12-07 13:50:34 +00:00
var md5servers [ ] md5simd . Server
md5hashers := make ( chan md5simd . Hasher , * taskLimit )
sha256hashers := make ( chan hash . Hash , * taskLimit )
2021-12-07 12:38:54 +00:00
2021-12-07 13:50:34 +00:00
for j := 0 ; j < * taskLimit ; j ++ {
serverIndex := j / 16
2021-12-07 12:38:54 +00:00
2021-12-07 13:50:34 +00:00
if ( serverIndex + 1 ) > len ( md5servers ) {
md5servers = append ( md5servers , md5simd . NewServer ( ) )
}
hasher := md5servers [ serverIndex ] . NewHash ( )
md5hashers <- hasher
sha256hashers <- sha256 . New ( )
}
resultChannel := make ( chan HashFileResult )
2021-12-07 12:38:54 +00:00
atomic . AddInt64 ( & taskCount , 1 )
go func ( ) {
defer atomic . AddInt64 ( & taskCount , - 1 )
for scanner . Scan ( ) {
text := scanner . Text ( )
path := strings . TrimRight ( text , "\n\r" )
atomic . AddInt64 ( & taskCount , 1 )
2021-12-07 13:50:34 +00:00
go func ( ) {
defer atomic . AddInt64 ( & taskCount , - 1 )
md5hasher := <- md5hashers
sha256hasher := <- sha256hashers
md5hasher . Reset ( )
sha256hasher . Reset ( )
HashFile ( resultChannel , & md5hasher , & sha256hasher , path )
md5hashers <- md5hasher
sha256hashers <- sha256hasher
} ( )
2021-12-07 12:38:54 +00:00
}
} ( )
//Already print before finishing, use atomic ints instead of a WaitGroup
for atomic . LoadInt64 ( & taskCount ) > 0 {
result := <- resultChannel
PrintHashFileResult ( & result )
}
close ( resultChannel )
for result := range resultChannel {
PrintHashFileResult ( & result )
}
2021-12-07 13:50:34 +00:00
close ( md5hashers )
close ( sha256hashers )
for md5hasher := range md5hashers {
md5hasher . Close ( )
}
for _ , md5server := range md5servers {
md5server . Close ( )
}
2021-12-07 12:38:54 +00:00
}