From 432590f93006e12aa959074df168c815fd518704 Mon Sep 17 00:00:00 2001 From: WeebDataHoarder <57538841+WeebDataHoarder@users.noreply.github.com> Date: Mon, 15 Apr 2024 03:05:21 +0200 Subject: [PATCH] Move argon2 / float packages to their own folders, cleanup vm Run --- argon2.go | 58 ------------ argon2/argon2.go | 44 +++++++++ asm/round.go | 11 +-- bytecode.go | 205 ---------------------------------------- cache.go | 5 +- config.go | 28 +++--- register.go | 23 ++++- softfloat/const.go | 37 ++++++++ softfloat/funcs.go | 35 +++++++ softfloat/softfloat.go | 27 ++++++ vm.go | 176 ++++++++++++++++------------------- vm_bytecode.go | 207 +++++++++++++++++++++++++++++++++++++++++ vm_instruction.go | 16 ++-- 13 files changed, 480 insertions(+), 392 deletions(-) delete mode 100644 argon2.go create mode 100644 argon2/argon2.go delete mode 100644 bytecode.go create mode 100644 softfloat/const.go create mode 100644 softfloat/funcs.go create mode 100644 softfloat/softfloat.go create mode 100644 vm_bytecode.go diff --git a/argon2.go b/argon2.go deleted file mode 100644 index 863320a..0000000 --- a/argon2.go +++ /dev/null @@ -1,58 +0,0 @@ -package randomx - -import "golang.org/x/crypto/blake2b" - -import ( - _ "golang.org/x/crypto/argon2" - _ "unsafe" -) - -// see reference configuration.h -// Cache size in KiB. Must be a power of 2. -const RANDOMX_ARGON_MEMORY = 262144 - -// Number of Argon2d iterations for Cache initialization. -const RANDOMX_ARGON_ITERATIONS = 3 - -// Number of parallel lanes for Cache initialization. -const RANDOMX_ARGON_LANES = 1 - -// Argon2d salt -const RANDOMX_ARGON_SALT = "RandomX\x03" -const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1 - -const ArgonBlockSize uint32 = 1024 - -type argonBlock [128]uint64 - -const syncPoints = 4 - -//go:linkname argon2_initHash golang.org/x/crypto/argon2.initHash -func argon2_initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte - -//go:linkname argon2_initBlocks golang.org/x/crypto/argon2.initBlocks -func argon2_initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []argonBlock - -//go:linkname argon2_processBlocks golang.org/x/crypto/argon2.processBlocks -func argon2_processBlocks(B []argonBlock, time, memory, threads uint32, mode int) - -// argon2_buildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call -func argon2_buildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []argonBlock { - if time < 1 { - panic("argon2: number of rounds too small") - } - if threads < 1 { - panic("argon2: parallelism degree too low") - } - const mode = 0 /* argon2d */ - h0 := argon2_initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode) - - memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads)) - if memory < 2*syncPoints*uint32(threads) { - memory = 2 * syncPoints * uint32(threads) - } - B := argon2_initBlocks(&h0, memory, uint32(threads)) - argon2_processBlocks(B, time, memory, uint32(threads), mode) - - return B -} diff --git a/argon2/argon2.go b/argon2/argon2.go new file mode 100644 index 0000000..bd962e4 --- /dev/null +++ b/argon2/argon2.go @@ -0,0 +1,44 @@ +package argon2 + +import "golang.org/x/crypto/blake2b" + +import ( + _ "golang.org/x/crypto/argon2" + _ "unsafe" +) + +const BlockSize uint32 = 1024 + +type Block [BlockSize / 8]uint64 + +const syncPoints = 4 + +//go:linkname initHash golang.org/x/crypto/argon2.initHash +func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte + +//go:linkname initBlocks golang.org/x/crypto/argon2.initBlocks +func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []Block + +//go:linkname processBlocks golang.org/x/crypto/argon2.processBlocks +func processBlocks(B []Block, time, memory, threads uint32, mode int) + +// BuildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call +func BuildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []Block { + if time < 1 { + panic("argon2: number of rounds too small") + } + if threads < 1 { + panic("argon2: parallelism degree too low") + } + const mode = 0 /* argon2d */ + h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode) + + memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads)) + if memory < 2*syncPoints*uint32(threads) { + memory = 2 * syncPoints * uint32(threads) + } + B := initBlocks(&h0, memory, uint32(threads)) + processBlocks(B, time, memory, uint32(threads), mode) + + return B +} diff --git a/asm/round.go b/asm/round.go index 659fc76..cc9acbc 100644 --- a/asm/round.go +++ b/asm/round.go @@ -1,14 +1,7 @@ package asm -type RoundingMode uint8 +import "git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat" -const ( - RoundingModeToNearest = RoundingMode(iota) - RoundingModeToNegative - RoundingModeToPositive - RoundingModeToZero -) - -func SetRoundingMode(mode RoundingMode) { +func SetRoundingMode(mode softfloat.RoundingMode) { setRoundingMode(uint8(mode)) } diff --git a/bytecode.go b/bytecode.go deleted file mode 100644 index 4878e07..0000000 --- a/bytecode.go +++ /dev/null @@ -1,205 +0,0 @@ -package randomx - -import ( - "git.gammaspectra.live/P2Pool/go-randomx/v2/asm" - "math" - "math/bits" -) - -type ByteCodeInstruction struct { - Dst, Src byte - ImmB uint8 - Opcode ByteCodeInstructionOp - MemMask uint32 - Imm uint64 - /* - union { - int_reg_t* idst; - rx_vec_f128* fdst; - }; - union { - int_reg_t* isrc; - rx_vec_f128* fsrc; - }; - union { - uint64_t imm; - int64_t simm; - }; - InstructionType type; - union { - int16_t target; - uint16_t shift; - }; - uint32_t memMask; - */ - -} - -func (i ByteCodeInstruction) jumpTarget() int { - return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst))) -} - -func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 { - return uint32(ptr+i.Imm) & i.MemMask -} - -func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 { - return uint32(i.Imm) & i.MemMask -} - -type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction - -func (c *ByteCode) Execute(f RegisterFile, pad *ScratchPad, eMask [2]uint64) RegisterFile { - for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ { - i := &c[pc] - switch i.Opcode { - case VM_IADD_RS: - f.r[i.Dst] += (f.r[i.Src] << i.ImmB) + i.Imm - case VM_IADD_M: - f.r[i.Dst] += pad.Load64(i.getScratchpadAddress(f.r[i.Src])) - case VM_IADD_MZ: - f.r[i.Dst] += pad.Load64(uint32(i.Imm)) - case VM_ISUB_R: - f.r[i.Dst] -= f.r[i.Src] - case VM_ISUB_I: - f.r[i.Dst] -= i.Imm - case VM_ISUB_M: - f.r[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.r[i.Src])) - case VM_ISUB_MZ: - f.r[i.Dst] -= pad.Load64(uint32(i.Imm)) - case VM_IMUL_R: - f.r[i.Dst] *= f.r[i.Src] - case VM_IMUL_I: - // also handles imul_rcp - f.r[i.Dst] *= i.Imm - case VM_IMUL_M: - f.r[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.r[i.Src])) - case VM_IMUL_MZ: - f.r[i.Dst] *= pad.Load64(uint32(i.Imm)) - case VM_IMULH_R: - f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], f.r[i.Src]) - case VM_IMULH_M: - f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], pad.Load64(i.getScratchpadAddress(f.r[i.Src]))) - case VM_IMULH_MZ: - f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], pad.Load64(uint32(i.Imm))) - case VM_ISMULH_R: - f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(f.r[i.Src])) - case VM_ISMULH_M: - f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.r[i.Src])))) - case VM_ISMULH_MZ: - f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(pad.Load64(uint32(i.Imm)))) - case VM_INEG_R: - //f.r[i.Dst] = (^(f.r[i.Dst])) + 1 // 2's complement negative - f.r[i.Dst] = -f.r[i.Dst] - case VM_IXOR_R: - f.r[i.Dst] ^= f.r[i.Src] - case VM_IXOR_I: - f.r[i.Dst] ^= i.Imm - case VM_IXOR_M: - f.r[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.r[i.Src])) - case VM_IXOR_MZ: - f.r[i.Dst] ^= pad.Load64(uint32(i.Imm)) - case VM_IROR_R: - f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], 0-int(f.r[i.Src]&63)) - case VM_IROR_I: - //todo: can merge into VM_IROL_I - f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], 0-int(i.Imm&63)) - case VM_IROL_R: - f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], int(f.r[i.Src]&63)) - case VM_IROL_I: - f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], int(i.Imm&63)) - case VM_ISWAP_R: - f.r[i.Dst], f.r[i.Src] = f.r[i.Src], f.r[i.Dst] - case VM_FSWAP_RF: - f.f[i.Dst][HIGH], f.f[i.Dst][LOW] = f.f[i.Dst][LOW], f.f[i.Dst][HIGH] - case VM_FSWAP_RE: - f.e[i.Dst][HIGH], f.e[i.Dst][LOW] = f.e[i.Dst][LOW], f.e[i.Dst][HIGH] - case VM_FADD_R: - f.f[i.Dst][LOW] += f.a[i.Src][LOW] - f.f[i.Dst][HIGH] += f.a[i.Src][HIGH] - case VM_FADD_M: - lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src])) - f.f[i.Dst][LOW] += lo - f.f[i.Dst][HIGH] += hi - case VM_FSUB_R: - f.f[i.Dst][LOW] -= f.a[i.Src][LOW] - f.f[i.Dst][HIGH] -= f.a[i.Src][HIGH] - case VM_FSUB_M: - lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src])) - f.f[i.Dst][LOW] -= lo - f.f[i.Dst][HIGH] -= hi - case VM_FSCAL_R: - // no dependent on rounding modes - f.f[i.Dst][LOW] = math.Float64frombits(math.Float64bits(f.f[i.Dst][LOW]) ^ 0x80F0000000000000) - f.f[i.Dst][HIGH] = math.Float64frombits(math.Float64bits(f.f[i.Dst][HIGH]) ^ 0x80F0000000000000) - case VM_FMUL_R: - f.e[i.Dst][LOW] *= f.a[i.Src][LOW] - f.e[i.Dst][HIGH] *= f.a[i.Src][HIGH] - case VM_FDIV_M: - lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src])) - f.e[i.Dst][LOW] /= MaskRegisterExponentMantissa(lo, eMask[LOW]) - f.e[i.Dst][HIGH] /= MaskRegisterExponentMantissa(hi, eMask[HIGH]) - case VM_FSQRT_R: - f.e[i.Dst][LOW] = math.Sqrt(f.e[i.Dst][LOW]) - f.e[i.Dst][HIGH] = math.Sqrt(f.e[i.Dst][HIGH]) - case VM_CBRANCH: - f.r[i.Src] += i.Imm - if (f.r[i.Src] & uint64(i.MemMask)) == 0 { - pc = i.jumpTarget() - } - case VM_CFROUND: - tmp := (bits.RotateLeft64(f.r[i.Src], 0-int(i.Imm))) % 4 // rotate right - asm.SetRoundingMode(asm.RoundingMode(tmp)) - case VM_ISTORE: - pad.Store64(i.getScratchpadAddress(f.r[i.Dst]), f.r[i.Src]) - case VM_NOP: // we do nothing - } - } - return f -} - -type ByteCodeInstructionOp int - -const ( - VM_NOP = ByteCodeInstructionOp(iota) - VM_IADD_RS - VM_IADD_M - VM_IADD_MZ - VM_ISUB_R - VM_ISUB_I - VM_ISUB_M - VM_ISUB_MZ - VM_IMUL_R - VM_IMUL_I - VM_IMUL_M - VM_IMUL_MZ - VM_IMULH_R - VM_IMULH_M - VM_IMULH_MZ - VM_ISMULH_R - VM_ISMULH_M - VM_ISMULH_MZ - VM_INEG_R - VM_IXOR_R - VM_IXOR_I - VM_IXOR_M - VM_IXOR_MZ - VM_IROR_R - VM_IROR_I - VM_IROL_R - VM_IROL_I - VM_ISWAP_R - VM_FSWAP_RF - VM_FSWAP_RE - VM_FADD_R - VM_FADD_M - VM_FSUB_R - VM_FSUB_M - VM_FSCAL_R - VM_FMUL_R - VM_FDIV_M - VM_FSQRT_R - VM_CBRANCH - VM_CFROUND - VM_ISTORE -) diff --git a/cache.go b/cache.go index bdf3eb8..8d88ee2 100644 --- a/cache.go +++ b/cache.go @@ -1,6 +1,7 @@ package randomx import ( + "git.gammaspectra.live/P2Pool/go-randomx/v2/argon2" "git.gammaspectra.live/P2Pool/go-randomx/v2/keys" "runtime" "slices" @@ -66,9 +67,9 @@ func (cache *Randomx_Cache) Init(key []byte) { kkey := slices.Clone(key) - argonBlocks := argon2_buildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0) + argonBlocks := argon2.BuildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0) - memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks)) + memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argon2.Block{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks)) cache.Blocks = memoryBlocks diff --git a/config.go b/config.go index f53241b..7fe7c20 100644 --- a/config.go +++ b/config.go @@ -29,6 +29,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package randomx +import "git.gammaspectra.live/P2Pool/go-randomx/v2/argon2" + +// see reference configuration.h +// Cache size in KiB. Must be a power of 2. +const RANDOMX_ARGON_MEMORY = 262144 + +// Number of Argon2d iterations for Cache initialization. +const RANDOMX_ARGON_ITERATIONS = 3 + +// Number of parallel lanes for Cache initialization. +const RANDOMX_ARGON_LANES = 1 + +// Argon2d salt +const RANDOMX_ARGON_SALT = "RandomX\x03" +const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1 + // Number of random Cache accesses per Dataset item. Minimum is 2. const RANDOMX_CACHE_ACCESSES = 8 @@ -74,7 +90,7 @@ const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3 const CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & (^(CacheLineSize - 1)) -const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(ArgonBlockSize) +const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(argon2.BlockSize) const ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / 8 const ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / 8 @@ -90,16 +106,6 @@ const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1 const STOREL3CONDITION = 14 -const mantissaSize = 52 -const exponentSize = 11 -const mantissaMask = (uint64(1) << mantissaSize) - 1 -const exponentMask = (uint64(1) << exponentSize) - 1 -const exponentBias = 1023 -const dynamicExponentBits = 4 -const staticExponentBits = 4 -const constExponentBits uint64 = 0x300 -const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1 - const RANDOMX_FLAG_DEFAULT = uint64(0) const RANDOMX_FLAG_JIT = uint64(1 << iota) diff --git a/register.go b/register.go index e7ef1b1..fe9f85b 100644 --- a/register.go +++ b/register.go @@ -1,5 +1,10 @@ package randomx +import ( + "git.gammaspectra.live/P2Pool/go-randomx/v2/asm" + "git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat" +) + const RegistersCount = 8 const RegistersCountFloat = 4 @@ -9,10 +14,20 @@ const HIGH = 1 type RegisterLine [RegistersCount]uint64 type RegisterFile struct { - r RegisterLine - f [RegistersCountFloat][2]float64 - e [RegistersCountFloat][2]float64 - a [RegistersCountFloat][2]float64 + R RegisterLine + F [RegistersCountFloat][2]float64 + E [RegistersCountFloat][2]float64 + A [RegistersCountFloat][2]float64 + + FPRC softfloat.RoundingMode +} + +func (f *RegisterFile) SetRoundingMode(mode softfloat.RoundingMode) { + if f.FPRC == mode { + return + } + f.FPRC = mode + asm.SetRoundingMode(mode) } type MemoryRegisters struct { diff --git a/softfloat/const.go b/softfloat/const.go new file mode 100644 index 0000000..fe15bbf --- /dev/null +++ b/softfloat/const.go @@ -0,0 +1,37 @@ +package softfloat + +const ( + mantbits64 uint = 52 + expbits64 uint = 11 + bias64 = -1<<(expbits64-1) + 1 + + nan64 uint64 = (1<> 59 //0..31 + mantissa := entropy & mantissaMask + exponent += exponentBias + exponent &= exponentMask + exponent = exponent << mantbits64 + return math.Float64frombits(exponent | mantissa) +} + +func StaticExponent(entropy uint64) uint64 { + exponent := constExponentBits + exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits + exponent <<= mantbits64 + return exponent +} + +func EMask(entropy uint64) uint64 { + return (entropy & mask22bit) | StaticExponent(entropy) +} + +func Xor(a, b float64) float64 { + return math.Float64frombits(math.Float64bits(a) ^ math.Float64bits(b)) +} diff --git a/softfloat/softfloat.go b/softfloat/softfloat.go new file mode 100644 index 0000000..4424c82 --- /dev/null +++ b/softfloat/softfloat.go @@ -0,0 +1,27 @@ +package softfloat + +import ( + _ "runtime" + _ "unsafe" +) + +//go:linkname funpack64 runtime.funpack64 +func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) + +//go:linkname fpack64 runtime.fpack64 +func fpack64(sign, mant uint64, exp int, trunc uint64) uint64 + +//go:linkname fadd64 runtime.fadd64 +func fadd64(f, g uint64) uint64 + +//go:linkname fsub64 runtime.fsub64 +func fsub64(f, g uint64) uint64 + +//go:linkname fneg64 runtime.fneg64 +func fneg64(f uint64) uint64 + +//go:linkname fmul64 runtime.fmul64 +func fmul64(f uint64) uint64 + +//go:linkname fdiv64 runtime.fdiv64 +func fdiv64(f uint64) uint64 diff --git a/vm.go b/vm.go index 6a5145c..a51eb3c 100644 --- a/vm.go +++ b/vm.go @@ -31,9 +31,10 @@ package randomx import ( "git.gammaspectra.live/P2Pool/go-randomx/v2/aes" - "git.gammaspectra.live/P2Pool/go-randomx/v2/asm" + "git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat" "math" "runtime" + "unsafe" ) import "encoding/binary" import "golang.org/x/crypto/blake2b" @@ -45,17 +46,10 @@ type REG struct { type VM struct { StateStart [64]byte - buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes - Prog []byte ScratchPad ScratchPad ByteCode ByteCode - // program configuration see program.hpp - - entropy [16]uint64 - - reg RegisterFile // the register file mem MemoryRegisters config Config // configuration datasetOffset uint64 @@ -66,48 +60,47 @@ type VM struct { } -func MaskRegisterExponentMantissa(f float64, mode uint64) float64 { - return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode) -} - type Config struct { eMask [2]uint64 readReg [4]uint64 } // Run calculate hash based on input -func (vm *VM) Run(inputHash [64]byte) { +// Warning: Underlying callers will run asm.SetRoundingMode directly +// It is the caller's responsibility to set and restore the mode to softfloat.RoundingModeToNearest between full executions +// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes +func (vm *VM) Run(inputHash [64]byte, roundingMode softfloat.RoundingMode) (reg RegisterFile) { - aes.FillAes4Rx4(inputHash, vm.buffer[:]) + reg.FPRC = roundingMode - for i := range vm.entropy { - vm.entropy[i] = binary.LittleEndian.Uint64(vm.buffer[i*8:]) - } + // buffer first 128 bytes are entropy below rest are program bytes + var buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte + aes.FillAes4Rx4(inputHash, buffer[:]) - vm.Prog = vm.buffer[len(vm.entropy)*8:] + entropy := (*[16]uint64)(unsafe.Pointer(&buffer)) - clear(vm.reg.r[:]) + prog := buffer[len(entropy)*8:] // do more initialization before we run - for i := range vm.entropy[:8] { - vm.reg.a[i/2][i%2] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[i])) + for i := range entropy[:8] { + reg.A[i/2][i%2] = softfloat.SmallPositiveFloatBits(entropy[i]) } - vm.mem.ma = vm.entropy[8] & CacheLineAlignMask - vm.mem.mx = vm.entropy[10] + vm.mem.ma = entropy[8] & CacheLineAlignMask + vm.mem.mx = entropy[10] - addressRegisters := vm.entropy[12] + addressRegisters := entropy[12] for i := range vm.config.readReg { vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1) addressRegisters >>= 1 } - vm.datasetOffset = (vm.entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize - vm.config.eMask[LOW] = getFloatMask(vm.entropy[14]) - vm.config.eMask[HIGH] = getFloatMask(vm.entropy[15]) + vm.datasetOffset = (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize + vm.config.eMask[LOW] = softfloat.EMask(entropy[14]) + vm.config.eMask[HIGH] = softfloat.EMask(entropy[15]) - vm.CompileToBytecode() + vm.ByteCode = CompileProgramToByteCode(prog) spAddr0 := vm.mem.mx spAddr1 := vm.mem.ma @@ -115,50 +108,52 @@ func (vm *VM) Run(inputHash [64]byte) { var rlCache RegisterLine for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ { - spMix := vm.reg.r[vm.config.readReg[0]] ^ vm.reg.r[vm.config.readReg[1]] + spMix := reg.R[vm.config.readReg[0]] ^ reg.R[vm.config.readReg[1]] spAddr0 ^= spMix spAddr0 &= ScratchpadL3Mask64 spAddr1 ^= spMix >> 32 spAddr1 &= ScratchpadL3Mask64 + //TODO: optimize these loads! for i := uint64(0); i < RegistersCount; i++ { - vm.reg.r[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i)) + reg.R[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i)) } for i := uint64(0); i < RegistersCountFloat; i++ { - vm.reg.f[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i)) + reg.F[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i)) } for i := uint64(0); i < RegistersCountFloat; i++ { - vm.reg.e[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat))) + reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat))) - vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW]) - vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH]) + reg.E[i][LOW] = softfloat.MaskRegisterExponentMantissa(reg.E[i][LOW], vm.config.eMask[LOW]) + reg.E[i][HIGH] = softfloat.MaskRegisterExponentMantissa(reg.E[i][HIGH], vm.config.eMask[HIGH]) } - vm.reg = vm.ByteCode.Execute(vm.reg, &vm.ScratchPad, vm.config.eMask) + // Run the actual bytecode + vm.ByteCode.Execute(®, &vm.ScratchPad, vm.config.eMask) - vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]] + vm.mem.mx ^= reg.R[vm.config.readReg[2]] ^ reg.R[vm.config.readReg[3]] vm.mem.mx &= CacheLineAlignMask vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx) // execute diffuser superscalar program to get dataset 64 bytes - vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r, &rlCache) + vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, ®.R, &rlCache) // swap the elements vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx for i := uint64(0); i < RegistersCount; i++ { - vm.ScratchPad.Store64(uint32(spAddr1+8*i), vm.reg.r[i]) + vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i]) } for i := uint64(0); i < RegistersCountFloat; i++ { - vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW])) - vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH])) + reg.F[i][LOW] = softfloat.Xor(reg.F[i][LOW], reg.E[i][LOW]) + reg.F[i][HIGH] = softfloat.Xor(reg.F[i][HIGH], reg.E[i][HIGH]) - vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(vm.reg.f[i][LOW])) - vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(vm.reg.f[i][HIGH])) + vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW])) + vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH])) } spAddr0 = 0 @@ -166,56 +161,52 @@ func (vm *VM) Run(inputHash [64]byte) { } + return reg + } func (vm *VM) InitScratchpad(seed *[64]byte) { vm.ScratchPad.Init(seed) } -func (vm *VM) CalculateHash(input []byte, output *[32]byte) { +func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile { + var buf [8]byte + hash512, _ := blake2b.New512(nil) // Lock thread due to rounding mode flags runtime.LockOSThread() defer runtime.UnlockOSThread() - //restore rounding mode to golang expected one - defer asm.SetRoundingMode(asm.RoundingModeToNearest) - // reset rounding mode if new hash being calculated - asm.SetRoundingMode(asm.RoundingModeToNearest) - - tempHash := blake2b.Sum512(input) - - vm.InitScratchpad(&tempHash) - - hash512, _ := blake2b.New512(nil) + roundingMode := softfloat.RoundingModeToNearest for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ { - vm.Run(tempHash) + reg := vm.Run(tempHash, roundingMode) + roundingMode = reg.FPRC hash512.Reset() - for i := range vm.reg.r { - binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i]) + for i := range reg.R { + binary.LittleEndian.PutUint64(buf[:], reg.R[i]) hash512.Write(buf[:]) } - for i := range vm.reg.f { - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW])) + for i := range reg.F { + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW])) hash512.Write(buf[:]) - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH])) + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH])) hash512.Write(buf[:]) } - for i := range vm.reg.e { - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW])) + for i := range reg.E { + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW])) hash512.Write(buf[:]) - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH])) + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH])) hash512.Write(buf[:]) } - for i := range vm.reg.a { - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][LOW])) + for i := range reg.A { + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][LOW])) hash512.Write(buf[:]) - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][HIGH])) + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][HIGH])) hash512.Write(buf[:]) } @@ -223,7 +214,22 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) { } // final loop executes here - vm.Run(tempHash) + reg := vm.Run(tempHash, roundingMode) + roundingMode = reg.FPRC + + reg.SetRoundingMode(softfloat.RoundingModeToNearest) + + return reg +} + +func (vm *VM) CalculateHash(input []byte, output *[32]byte) { + var buf [8]byte + + tempHash := blake2b.Sum512(input) + + vm.InitScratchpad(&tempHash) + + reg := vm.RunLoops(tempHash) // now hash the scratch pad and place into register a aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash) @@ -232,22 +238,22 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) { hash256.Reset() - for i := range vm.reg.r { - binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i]) + for i := range reg.R { + binary.LittleEndian.PutUint64(buf[:], reg.R[i]) hash256.Write(buf[:]) } - for i := range vm.reg.f { - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW])) + for i := range reg.F { + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW])) hash256.Write(buf[:]) - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH])) + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH])) hash256.Write(buf[:]) } - for i := range vm.reg.e { - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW])) + for i := range reg.E { + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW])) hash256.Write(buf[:]) - binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH])) + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH])) hash256.Write(buf[:]) } @@ -256,25 +262,3 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) { hash256.Sum(output[:0]) } - -const mask22bit = (uint64(1) << 22) - 1 - -func getSmallPositiveFloatBits(entropy uint64) uint64 { - exponent := entropy >> 59 //0..31 - mantissa := entropy & mantissaMask - exponent += exponentBias - exponent &= exponentMask - exponent = exponent << mantissaSize - return exponent | mantissa -} - -func getStaticExponent(entropy uint64) uint64 { - exponent := constExponentBits - exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits - exponent <<= mantissaSize - return exponent -} - -func getFloatMask(entropy uint64) uint64 { - return (entropy & mask22bit) | getStaticExponent(entropy) -} diff --git a/vm_bytecode.go b/vm_bytecode.go new file mode 100644 index 0000000..9c582fd --- /dev/null +++ b/vm_bytecode.go @@ -0,0 +1,207 @@ +package randomx + +import ( + "git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat" + "math" + "math/bits" +) + +type ByteCodeInstruction struct { + Dst, Src byte + ImmB uint8 + Opcode ByteCodeInstructionOp + MemMask uint32 + Imm uint64 + /* + union { + int_reg_t* idst; + rx_vec_f128* fdst; + }; + union { + int_reg_t* isrc; + rx_vec_f128* fsrc; + }; + union { + uint64_t imm; + int64_t simm; + }; + InstructionType type; + union { + int16_t target; + uint16_t shift; + }; + uint32_t memMask; + */ + +} + +func (i ByteCodeInstruction) jumpTarget() int { + return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst))) +} + +func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 { + return uint32(ptr+i.Imm) & i.MemMask +} + +func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 { + return uint32(i.Imm) & i.MemMask +} + +type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction + +// Execute Runs a RandomX program with the given register file and scratchpad +// Warning: This will call asm.SetRoundingMode directly +// It is the caller's responsibility to set and restore the mode to softfloat.RoundingModeToNearest between full executions +// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes +func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) { + for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ { + i := &c[pc] + switch i.Opcode { + case VM_NOP: // we do nothing + case VM_IADD_RS: + f.R[i.Dst] += (f.R[i.Src] << i.ImmB) + i.Imm + case VM_IADD_M: + f.R[i.Dst] += pad.Load64(i.getScratchpadAddress(f.R[i.Src])) + case VM_IADD_MZ: + f.R[i.Dst] += pad.Load64(uint32(i.Imm)) + case VM_ISUB_R: + f.R[i.Dst] -= f.R[i.Src] + case VM_ISUB_I: + f.R[i.Dst] -= i.Imm + case VM_ISUB_M: + f.R[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.R[i.Src])) + case VM_ISUB_MZ: + f.R[i.Dst] -= pad.Load64(uint32(i.Imm)) + case VM_IMUL_R: + f.R[i.Dst] *= f.R[i.Src] + case VM_IMUL_I: + // also handles imul_rcp + f.R[i.Dst] *= i.Imm + case VM_IMUL_M: + f.R[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.R[i.Src])) + case VM_IMUL_MZ: + f.R[i.Dst] *= pad.Load64(uint32(i.Imm)) + case VM_IMULH_R: + f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], f.R[i.Src]) + case VM_IMULH_M: + f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(i.getScratchpadAddress(f.R[i.Src]))) + case VM_IMULH_MZ: + f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(uint32(i.Imm))) + case VM_ISMULH_R: + f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(f.R[i.Src])) + case VM_ISMULH_M: + f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.R[i.Src])))) + case VM_ISMULH_MZ: + f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(uint32(i.Imm)))) + case VM_INEG_R: + f.R[i.Dst] = -f.R[i.Dst] + case VM_IXOR_R: + f.R[i.Dst] ^= f.R[i.Src] + case VM_IXOR_I: + f.R[i.Dst] ^= i.Imm + case VM_IXOR_M: + f.R[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.R[i.Src])) + case VM_IXOR_MZ: + f.R[i.Dst] ^= pad.Load64(uint32(i.Imm)) + case VM_IROR_R: + f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(f.R[i.Src]&63)) + case VM_IROR_I: + //todo: can merge into VM_IROL_I + f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(i.Imm&63)) + case VM_IROL_R: + f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(f.R[i.Src]&63)) + case VM_IROL_I: + f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(i.Imm&63)) + case VM_ISWAP_R: + f.R[i.Dst], f.R[i.Src] = f.R[i.Src], f.R[i.Dst] + case VM_FSWAP_RF: + f.F[i.Dst][HIGH], f.F[i.Dst][LOW] = f.F[i.Dst][LOW], f.F[i.Dst][HIGH] + case VM_FSWAP_RE: + f.E[i.Dst][HIGH], f.E[i.Dst][LOW] = f.E[i.Dst][LOW], f.E[i.Dst][HIGH] + case VM_FADD_R: + f.F[i.Dst][LOW] += f.A[i.Src][LOW] + f.F[i.Dst][HIGH] += f.A[i.Src][HIGH] + case VM_FADD_M: + lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src])) + f.F[i.Dst][LOW] += lo + f.F[i.Dst][HIGH] += hi + case VM_FSUB_R: + f.F[i.Dst][LOW] -= f.A[i.Src][LOW] + f.F[i.Dst][HIGH] -= f.A[i.Src][HIGH] + case VM_FSUB_M: + lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src])) + f.F[i.Dst][LOW] -= lo + f.F[i.Dst][HIGH] -= hi + case VM_FSCAL_R: + // no dependent on rounding modes + f.F[i.Dst][LOW] = softfloat.ScaleNegate(f.F[i.Dst][LOW]) + f.F[i.Dst][HIGH] = softfloat.ScaleNegate(f.F[i.Dst][HIGH]) + case VM_FMUL_R: + f.E[i.Dst][LOW] *= f.A[i.Src][LOW] + f.E[i.Dst][HIGH] *= f.A[i.Src][HIGH] + case VM_FDIV_M: + lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src])) + f.E[i.Dst][LOW] /= softfloat.MaskRegisterExponentMantissa(lo, eMask[LOW]) + f.E[i.Dst][HIGH] /= softfloat.MaskRegisterExponentMantissa(hi, eMask[HIGH]) + case VM_FSQRT_R: + f.E[i.Dst][LOW] = math.Sqrt(f.E[i.Dst][LOW]) + f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH]) + case VM_CBRANCH: + f.R[i.Src] += i.Imm + if (f.R[i.Src] & uint64(i.MemMask)) == 0 { + pc = i.jumpTarget() + } + case VM_CFROUND: + tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right + f.SetRoundingMode(softfloat.RoundingMode(tmp)) + case VM_ISTORE: + pad.Store64(i.getScratchpadAddress(f.R[i.Dst]), f.R[i.Src]) + } + } +} + +type ByteCodeInstructionOp int + +const ( + VM_NOP = ByteCodeInstructionOp(iota) + VM_IADD_RS + VM_IADD_M + VM_IADD_MZ + VM_ISUB_R + VM_ISUB_I + VM_ISUB_M + VM_ISUB_MZ + VM_IMUL_R + VM_IMUL_I + VM_IMUL_M + VM_IMUL_MZ + VM_IMULH_R + VM_IMULH_M + VM_IMULH_MZ + VM_ISMULH_R + VM_ISMULH_M + VM_ISMULH_MZ + VM_INEG_R + VM_IXOR_R + VM_IXOR_I + VM_IXOR_M + VM_IXOR_MZ + VM_IROR_R + VM_IROR_I + VM_IROL_R + VM_IROL_I + VM_ISWAP_R + VM_FSWAP_RF + VM_FSWAP_RE + VM_FADD_R + VM_FADD_M + VM_FSUB_R + VM_FSUB_M + VM_FSCAL_R + VM_FMUL_R + VM_FDIV_M + VM_FSQRT_R + VM_CBRANCH + VM_CFROUND + VM_ISTORE +) diff --git a/vm_instruction.go b/vm_instruction.go index 8de191a..f7d1315 100644 --- a/vm_instruction.go +++ b/vm_instruction.go @@ -37,8 +37,8 @@ import "encoding/binary" //reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding -// since go does not have union, use byte array -type VM_Instruction []byte // it is hardcode 8 bytes +// VM_Instruction since go does not have union, use byte array +type VM_Instruction [8]byte // it is hardcode 8 bytes func (ins VM_Instruction) IMM() uint32 { return binary.LittleEndian.Uint32(ins[4:]) @@ -56,9 +56,9 @@ func (ins VM_Instruction) Opcode() byte { return ins[0] } -// CompileToBytecode this will interpret single vm instruction +// CompileProgramToByteCode this will interpret single vm instruction into executable opcodes // reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions -func (vm *VM) CompileToBytecode() { +func CompileProgramToByteCode(prog []byte) (bc ByteCode) { var registerUsage [RegistersCount]int for i := range registerUsage { @@ -66,8 +66,8 @@ func (vm *VM) CompileToBytecode() { } for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ { - instr := VM_Instruction(vm.Prog[i*8:]) - ibc := &vm.ByteCode[i] + instr := VM_Instruction(prog[i*8:]) + ibc := &bc[i] opcode := instr.Opcode() dst := instr.Dst() % RegistersCount // bit shift optimization @@ -317,7 +317,7 @@ func (vm *VM) CompileToBytecode() { //conditionmask := CONDITIONMASK << shift ibc.Imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift) if CONDITIONOFFSET > 0 || shift > 0 { - ibc.Imm &= (^(uint64(1) << (shift - 1))) + ibc.Imm &= ^(uint64(1) << (shift - 1)) } ibc.MemMask = CONDITIONMASK << shift @@ -349,6 +349,8 @@ func (vm *VM) CompileToBytecode() { } } + return bc + } type ScratchPad [ScratchpadSize]byte