Move argon2 / float packages to their own folders, cleanup vm Run
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
DataHoarder 2024-04-15 03:05:21 +02:00
parent 5b9b3c3565
commit 432590f930
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
13 changed files with 480 additions and 392 deletions

View file

@ -1,58 +0,0 @@
package randomx
import "golang.org/x/crypto/blake2b"
import (
_ "golang.org/x/crypto/argon2"
_ "unsafe"
)
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
const RANDOMX_ARGON_MEMORY = 262144
// Number of Argon2d iterations for Cache initialization.
const RANDOMX_ARGON_ITERATIONS = 3
// Number of parallel lanes for Cache initialization.
const RANDOMX_ARGON_LANES = 1
// Argon2d salt
const RANDOMX_ARGON_SALT = "RandomX\x03"
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1
const ArgonBlockSize uint32 = 1024
type argonBlock [128]uint64
const syncPoints = 4
//go:linkname argon2_initHash golang.org/x/crypto/argon2.initHash
func argon2_initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
//go:linkname argon2_initBlocks golang.org/x/crypto/argon2.initBlocks
func argon2_initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []argonBlock
//go:linkname argon2_processBlocks golang.org/x/crypto/argon2.processBlocks
func argon2_processBlocks(B []argonBlock, time, memory, threads uint32, mode int)
// argon2_buildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call
func argon2_buildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []argonBlock {
if time < 1 {
panic("argon2: number of rounds too small")
}
if threads < 1 {
panic("argon2: parallelism degree too low")
}
const mode = 0 /* argon2d */
h0 := argon2_initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {
memory = 2 * syncPoints * uint32(threads)
}
B := argon2_initBlocks(&h0, memory, uint32(threads))
argon2_processBlocks(B, time, memory, uint32(threads), mode)
return B
}

44
argon2/argon2.go Normal file
View file

@ -0,0 +1,44 @@
package argon2
import "golang.org/x/crypto/blake2b"
import (
_ "golang.org/x/crypto/argon2"
_ "unsafe"
)
const BlockSize uint32 = 1024
type Block [BlockSize / 8]uint64
const syncPoints = 4
//go:linkname initHash golang.org/x/crypto/argon2.initHash
func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
//go:linkname initBlocks golang.org/x/crypto/argon2.initBlocks
func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []Block
//go:linkname processBlocks golang.org/x/crypto/argon2.processBlocks
func processBlocks(B []Block, time, memory, threads uint32, mode int)
// BuildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call
func BuildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []Block {
if time < 1 {
panic("argon2: number of rounds too small")
}
if threads < 1 {
panic("argon2: parallelism degree too low")
}
const mode = 0 /* argon2d */
h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {
memory = 2 * syncPoints * uint32(threads)
}
B := initBlocks(&h0, memory, uint32(threads))
processBlocks(B, time, memory, uint32(threads), mode)
return B
}

View file

@ -1,14 +1,7 @@
package asm
type RoundingMode uint8
import "git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
const (
RoundingModeToNearest = RoundingMode(iota)
RoundingModeToNegative
RoundingModeToPositive
RoundingModeToZero
)
func SetRoundingMode(mode RoundingMode) {
func SetRoundingMode(mode softfloat.RoundingMode) {
setRoundingMode(uint8(mode))
}

View file

@ -1,205 +0,0 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"math"
"math/bits"
)
type ByteCodeInstruction struct {
Dst, Src byte
ImmB uint8
Opcode ByteCodeInstructionOp
MemMask uint32
Imm uint64
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
}
func (i ByteCodeInstruction) jumpTarget() int {
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst)))
}
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
return uint32(ptr+i.Imm) & i.MemMask
}
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 {
return uint32(i.Imm) & i.MemMask
}
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
func (c *ByteCode) Execute(f RegisterFile, pad *ScratchPad, eMask [2]uint64) RegisterFile {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
i := &c[pc]
switch i.Opcode {
case VM_IADD_RS:
f.r[i.Dst] += (f.r[i.Src] << i.ImmB) + i.Imm
case VM_IADD_M:
f.r[i.Dst] += pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
case VM_IADD_MZ:
f.r[i.Dst] += pad.Load64(uint32(i.Imm))
case VM_ISUB_R:
f.r[i.Dst] -= f.r[i.Src]
case VM_ISUB_I:
f.r[i.Dst] -= i.Imm
case VM_ISUB_M:
f.r[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
case VM_ISUB_MZ:
f.r[i.Dst] -= pad.Load64(uint32(i.Imm))
case VM_IMUL_R:
f.r[i.Dst] *= f.r[i.Src]
case VM_IMUL_I:
// also handles imul_rcp
f.r[i.Dst] *= i.Imm
case VM_IMUL_M:
f.r[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
case VM_IMUL_MZ:
f.r[i.Dst] *= pad.Load64(uint32(i.Imm))
case VM_IMULH_R:
f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], f.r[i.Src])
case VM_IMULH_M:
f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], pad.Load64(i.getScratchpadAddress(f.r[i.Src])))
case VM_IMULH_MZ:
f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], pad.Load64(uint32(i.Imm)))
case VM_ISMULH_R:
f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(f.r[i.Src]))
case VM_ISMULH_M:
f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.r[i.Src]))))
case VM_ISMULH_MZ:
f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(pad.Load64(uint32(i.Imm))))
case VM_INEG_R:
//f.r[i.Dst] = (^(f.r[i.Dst])) + 1 // 2's complement negative
f.r[i.Dst] = -f.r[i.Dst]
case VM_IXOR_R:
f.r[i.Dst] ^= f.r[i.Src]
case VM_IXOR_I:
f.r[i.Dst] ^= i.Imm
case VM_IXOR_M:
f.r[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
case VM_IXOR_MZ:
f.r[i.Dst] ^= pad.Load64(uint32(i.Imm))
case VM_IROR_R:
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], 0-int(f.r[i.Src]&63))
case VM_IROR_I:
//todo: can merge into VM_IROL_I
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], 0-int(i.Imm&63))
case VM_IROL_R:
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], int(f.r[i.Src]&63))
case VM_IROL_I:
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], int(i.Imm&63))
case VM_ISWAP_R:
f.r[i.Dst], f.r[i.Src] = f.r[i.Src], f.r[i.Dst]
case VM_FSWAP_RF:
f.f[i.Dst][HIGH], f.f[i.Dst][LOW] = f.f[i.Dst][LOW], f.f[i.Dst][HIGH]
case VM_FSWAP_RE:
f.e[i.Dst][HIGH], f.e[i.Dst][LOW] = f.e[i.Dst][LOW], f.e[i.Dst][HIGH]
case VM_FADD_R:
f.f[i.Dst][LOW] += f.a[i.Src][LOW]
f.f[i.Dst][HIGH] += f.a[i.Src][HIGH]
case VM_FADD_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src]))
f.f[i.Dst][LOW] += lo
f.f[i.Dst][HIGH] += hi
case VM_FSUB_R:
f.f[i.Dst][LOW] -= f.a[i.Src][LOW]
f.f[i.Dst][HIGH] -= f.a[i.Src][HIGH]
case VM_FSUB_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src]))
f.f[i.Dst][LOW] -= lo
f.f[i.Dst][HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
f.f[i.Dst][LOW] = math.Float64frombits(math.Float64bits(f.f[i.Dst][LOW]) ^ 0x80F0000000000000)
f.f[i.Dst][HIGH] = math.Float64frombits(math.Float64bits(f.f[i.Dst][HIGH]) ^ 0x80F0000000000000)
case VM_FMUL_R:
f.e[i.Dst][LOW] *= f.a[i.Src][LOW]
f.e[i.Dst][HIGH] *= f.a[i.Src][HIGH]
case VM_FDIV_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src]))
f.e[i.Dst][LOW] /= MaskRegisterExponentMantissa(lo, eMask[LOW])
f.e[i.Dst][HIGH] /= MaskRegisterExponentMantissa(hi, eMask[HIGH])
case VM_FSQRT_R:
f.e[i.Dst][LOW] = math.Sqrt(f.e[i.Dst][LOW])
f.e[i.Dst][HIGH] = math.Sqrt(f.e[i.Dst][HIGH])
case VM_CBRANCH:
f.r[i.Src] += i.Imm
if (f.r[i.Src] & uint64(i.MemMask)) == 0 {
pc = i.jumpTarget()
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(f.r[i.Src], 0-int(i.Imm))) % 4 // rotate right
asm.SetRoundingMode(asm.RoundingMode(tmp))
case VM_ISTORE:
pad.Store64(i.getScratchpadAddress(f.r[i.Dst]), f.r[i.Src])
case VM_NOP: // we do nothing
}
}
return f
}
type ByteCodeInstructionOp int
const (
VM_NOP = ByteCodeInstructionOp(iota)
VM_IADD_RS
VM_IADD_M
VM_IADD_MZ
VM_ISUB_R
VM_ISUB_I
VM_ISUB_M
VM_ISUB_MZ
VM_IMUL_R
VM_IMUL_I
VM_IMUL_M
VM_IMUL_MZ
VM_IMULH_R
VM_IMULH_M
VM_IMULH_MZ
VM_ISMULH_R
VM_ISMULH_M
VM_ISMULH_MZ
VM_INEG_R
VM_IXOR_R
VM_IXOR_I
VM_IXOR_M
VM_IXOR_MZ
VM_IROR_R
VM_IROR_I
VM_IROL_R
VM_IROL_I
VM_ISWAP_R
VM_FSWAP_RF
VM_FSWAP_RE
VM_FADD_R
VM_FADD_M
VM_FSUB_R
VM_FSUB_M
VM_FSCAL_R
VM_FMUL_R
VM_FDIV_M
VM_FSQRT_R
VM_CBRANCH
VM_CFROUND
VM_ISTORE
)

View file

@ -1,6 +1,7 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
"runtime"
"slices"
@ -66,9 +67,9 @@ func (cache *Randomx_Cache) Init(key []byte) {
kkey := slices.Clone(key)
argonBlocks := argon2_buildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
argonBlocks := argon2.BuildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argon2.Block{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
cache.Blocks = memoryBlocks

View file

@ -29,6 +29,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import "git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
const RANDOMX_ARGON_MEMORY = 262144
// Number of Argon2d iterations for Cache initialization.
const RANDOMX_ARGON_ITERATIONS = 3
// Number of parallel lanes for Cache initialization.
const RANDOMX_ARGON_LANES = 1
// Argon2d salt
const RANDOMX_ARGON_SALT = "RandomX\x03"
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1
// Number of random Cache accesses per Dataset item. Minimum is 2.
const RANDOMX_CACHE_ACCESSES = 8
@ -74,7 +90,7 @@ const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3
const CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & (^(CacheLineSize - 1))
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(ArgonBlockSize)
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(argon2.BlockSize)
const ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / 8
const ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / 8
@ -90,16 +106,6 @@ const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
const STOREL3CONDITION = 14
const mantissaSize = 52
const exponentSize = 11
const mantissaMask = (uint64(1) << mantissaSize) - 1
const exponentMask = (uint64(1) << exponentSize) - 1
const exponentBias = 1023
const dynamicExponentBits = 4
const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1
const RANDOMX_FLAG_DEFAULT = uint64(0)
const RANDOMX_FLAG_JIT = uint64(1 << iota)

View file

@ -1,5 +1,10 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
)
const RegistersCount = 8
const RegistersCountFloat = 4
@ -9,10 +14,20 @@ const HIGH = 1
type RegisterLine [RegistersCount]uint64
type RegisterFile struct {
r RegisterLine
f [RegistersCountFloat][2]float64
e [RegistersCountFloat][2]float64
a [RegistersCountFloat][2]float64
R RegisterLine
F [RegistersCountFloat][2]float64
E [RegistersCountFloat][2]float64
A [RegistersCountFloat][2]float64
FPRC softfloat.RoundingMode
}
func (f *RegisterFile) SetRoundingMode(mode softfloat.RoundingMode) {
if f.FPRC == mode {
return
}
f.FPRC = mode
asm.SetRoundingMode(mode)
}
type MemoryRegisters struct {

37
softfloat/const.go Normal file
View file

@ -0,0 +1,37 @@
package softfloat
const (
mantbits64 uint = 52
expbits64 uint = 11
bias64 = -1<<(expbits64-1) + 1
nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1<<(mantbits64-1) // quiet NaN, 0 payload
inf64 uint64 = (1<<expbits64 - 1) << mantbits64
neg64 uint64 = 1 << (expbits64 + mantbits64)
)
const mantissaMask = (uint64(1) << mantbits64) - 1
const exponentMask = (uint64(1) << expbits64) - 1
const exponentBias = 1023
const dynamicExponentBits = 4
const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantbits64 + dynamicExponentBits)) - 1
const mask22bit = (uint64(1) << 22) - 1
type RoundingMode uint8
const (
// RoundingModeToNearest IEEE 754 roundTiesToEven
RoundingModeToNearest = RoundingMode(iota)
// RoundingModeToNegative IEEE 754 roundTowardNegative
RoundingModeToNegative
// RoundingModeToPositive IEEE 754 roundTowardPositive
RoundingModeToPositive
// RoundingModeToZero IEEE 754 roundTowardZero
RoundingModeToZero
)

35
softfloat/funcs.go Normal file
View file

@ -0,0 +1,35 @@
package softfloat
import "math"
func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode)
}
func ScaleNegate(f float64) float64 {
return math.Float64frombits(math.Float64bits(f) ^ 0x80F0000000000000)
}
func SmallPositiveFloatBits(entropy uint64) float64 {
exponent := entropy >> 59 //0..31
mantissa := entropy & mantissaMask
exponent += exponentBias
exponent &= exponentMask
exponent = exponent << mantbits64
return math.Float64frombits(exponent | mantissa)
}
func StaticExponent(entropy uint64) uint64 {
exponent := constExponentBits
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
exponent <<= mantbits64
return exponent
}
func EMask(entropy uint64) uint64 {
return (entropy & mask22bit) | StaticExponent(entropy)
}
func Xor(a, b float64) float64 {
return math.Float64frombits(math.Float64bits(a) ^ math.Float64bits(b))
}

27
softfloat/softfloat.go Normal file
View file

@ -0,0 +1,27 @@
package softfloat
import (
_ "runtime"
_ "unsafe"
)
//go:linkname funpack64 runtime.funpack64
func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool)
//go:linkname fpack64 runtime.fpack64
func fpack64(sign, mant uint64, exp int, trunc uint64) uint64
//go:linkname fadd64 runtime.fadd64
func fadd64(f, g uint64) uint64
//go:linkname fsub64 runtime.fsub64
func fsub64(f, g uint64) uint64
//go:linkname fneg64 runtime.fneg64
func fneg64(f uint64) uint64
//go:linkname fmul64 runtime.fmul64
func fmul64(f uint64) uint64
//go:linkname fdiv64 runtime.fdiv64
func fdiv64(f uint64) uint64

176
vm.go
View file

@ -31,9 +31,10 @@ package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
"math"
"runtime"
"unsafe"
)
import "encoding/binary"
import "golang.org/x/crypto/blake2b"
@ -45,17 +46,10 @@ type REG struct {
type VM struct {
StateStart [64]byte
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
Prog []byte
ScratchPad ScratchPad
ByteCode ByteCode
// program configuration see program.hpp
entropy [16]uint64
reg RegisterFile // the register file
mem MemoryRegisters
config Config // configuration
datasetOffset uint64
@ -66,48 +60,47 @@ type VM struct {
}
func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode)
}
type Config struct {
eMask [2]uint64
readReg [4]uint64
}
// Run calculate hash based on input
func (vm *VM) Run(inputHash [64]byte) {
// Warning: Underlying callers will run asm.SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to softfloat.RoundingModeToNearest between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (vm *VM) Run(inputHash [64]byte, roundingMode softfloat.RoundingMode) (reg RegisterFile) {
aes.FillAes4Rx4(inputHash, vm.buffer[:])
reg.FPRC = roundingMode
for i := range vm.entropy {
vm.entropy[i] = binary.LittleEndian.Uint64(vm.buffer[i*8:])
}
// buffer first 128 bytes are entropy below rest are program bytes
var buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte
aes.FillAes4Rx4(inputHash, buffer[:])
vm.Prog = vm.buffer[len(vm.entropy)*8:]
entropy := (*[16]uint64)(unsafe.Pointer(&buffer))
clear(vm.reg.r[:])
prog := buffer[len(entropy)*8:]
// do more initialization before we run
for i := range vm.entropy[:8] {
vm.reg.a[i/2][i%2] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[i]))
for i := range entropy[:8] {
reg.A[i/2][i%2] = softfloat.SmallPositiveFloatBits(entropy[i])
}
vm.mem.ma = vm.entropy[8] & CacheLineAlignMask
vm.mem.mx = vm.entropy[10]
vm.mem.ma = entropy[8] & CacheLineAlignMask
vm.mem.mx = entropy[10]
addressRegisters := vm.entropy[12]
addressRegisters := entropy[12]
for i := range vm.config.readReg {
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
addressRegisters >>= 1
}
vm.datasetOffset = (vm.entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.config.eMask[LOW] = getFloatMask(vm.entropy[14])
vm.config.eMask[HIGH] = getFloatMask(vm.entropy[15])
vm.datasetOffset = (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.config.eMask[LOW] = softfloat.EMask(entropy[14])
vm.config.eMask[HIGH] = softfloat.EMask(entropy[15])
vm.CompileToBytecode()
vm.ByteCode = CompileProgramToByteCode(prog)
spAddr0 := vm.mem.mx
spAddr1 := vm.mem.ma
@ -115,50 +108,52 @@ func (vm *VM) Run(inputHash [64]byte) {
var rlCache RegisterLine
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := vm.reg.r[vm.config.readReg[0]] ^ vm.reg.r[vm.config.readReg[1]]
spMix := reg.R[vm.config.readReg[0]] ^ reg.R[vm.config.readReg[1]]
spAddr0 ^= spMix
spAddr0 &= ScratchpadL3Mask64
spAddr1 ^= spMix >> 32
spAddr1 &= ScratchpadL3Mask64
//TODO: optimize these loads!
for i := uint64(0); i < RegistersCount; i++ {
vm.reg.r[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
reg.R[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
reg.F[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.e[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
reg.E[i][LOW] = softfloat.MaskRegisterExponentMantissa(reg.E[i][LOW], vm.config.eMask[LOW])
reg.E[i][HIGH] = softfloat.MaskRegisterExponentMantissa(reg.E[i][HIGH], vm.config.eMask[HIGH])
}
vm.reg = vm.ByteCode.Execute(vm.reg, &vm.ScratchPad, vm.config.eMask)
// Run the actual bytecode
vm.ByteCode.Execute(&reg, &vm.ScratchPad, vm.config.eMask)
vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]]
vm.mem.mx ^= reg.R[vm.config.readReg[2]] ^ reg.R[vm.config.readReg[3]]
vm.mem.mx &= CacheLineAlignMask
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
// execute diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r, &rlCache)
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &reg.R, &rlCache)
// swap the elements
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
for i := uint64(0); i < RegistersCount; i++ {
vm.ScratchPad.Store64(uint32(spAddr1+8*i), vm.reg.r[i])
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
reg.F[i][LOW] = softfloat.Xor(reg.F[i][LOW], reg.E[i][LOW])
reg.F[i][HIGH] = softfloat.Xor(reg.F[i][HIGH], reg.E[i][HIGH])
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(vm.reg.f[i][LOW]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(vm.reg.f[i][HIGH]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH]))
}
spAddr0 = 0
@ -166,56 +161,52 @@ func (vm *VM) Run(inputHash [64]byte) {
}
return reg
}
func (vm *VM) InitScratchpad(seed *[64]byte) {
vm.ScratchPad.Init(seed)
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
var buf [8]byte
hash512, _ := blake2b.New512(nil)
// Lock thread due to rounding mode flags
runtime.LockOSThread()
defer runtime.UnlockOSThread()
//restore rounding mode to golang expected one
defer asm.SetRoundingMode(asm.RoundingModeToNearest)
// reset rounding mode if new hash being calculated
asm.SetRoundingMode(asm.RoundingModeToNearest)
tempHash := blake2b.Sum512(input)
vm.InitScratchpad(&tempHash)
hash512, _ := blake2b.New512(nil)
roundingMode := softfloat.RoundingModeToNearest
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
vm.Run(tempHash)
reg := vm.Run(tempHash, roundingMode)
roundingMode = reg.FPRC
hash512.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
for i := range reg.R {
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
hash512.Write(buf[:])
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
for i := range reg.F {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
for i := range reg.E {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.a {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][LOW]))
for i := range reg.A {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][HIGH]))
hash512.Write(buf[:])
}
@ -223,7 +214,22 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
}
// final loop executes here
vm.Run(tempHash)
reg := vm.Run(tempHash, roundingMode)
roundingMode = reg.FPRC
reg.SetRoundingMode(softfloat.RoundingModeToNearest)
return reg
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
var buf [8]byte
tempHash := blake2b.Sum512(input)
vm.InitScratchpad(&tempHash)
reg := vm.RunLoops(tempHash)
// now hash the scratch pad and place into register a
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
@ -232,22 +238,22 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
hash256.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
for i := range reg.R {
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
hash256.Write(buf[:])
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
for i := range reg.F {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
hash256.Write(buf[:])
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
for i := range reg.E {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
hash256.Write(buf[:])
}
@ -256,25 +262,3 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
hash256.Sum(output[:0])
}
const mask22bit = (uint64(1) << 22) - 1
func getSmallPositiveFloatBits(entropy uint64) uint64 {
exponent := entropy >> 59 //0..31
mantissa := entropy & mantissaMask
exponent += exponentBias
exponent &= exponentMask
exponent = exponent << mantissaSize
return exponent | mantissa
}
func getStaticExponent(entropy uint64) uint64 {
exponent := constExponentBits
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
exponent <<= mantissaSize
return exponent
}
func getFloatMask(entropy uint64) uint64 {
return (entropy & mask22bit) | getStaticExponent(entropy)
}

207
vm_bytecode.go Normal file
View file

@ -0,0 +1,207 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
"math"
"math/bits"
)
type ByteCodeInstruction struct {
Dst, Src byte
ImmB uint8
Opcode ByteCodeInstructionOp
MemMask uint32
Imm uint64
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
}
func (i ByteCodeInstruction) jumpTarget() int {
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst)))
}
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
return uint32(ptr+i.Imm) & i.MemMask
}
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 {
return uint32(i.Imm) & i.MemMask
}
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
// Execute Runs a RandomX program with the given register file and scratchpad
// Warning: This will call asm.SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to softfloat.RoundingModeToNearest between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
i := &c[pc]
switch i.Opcode {
case VM_NOP: // we do nothing
case VM_IADD_RS:
f.R[i.Dst] += (f.R[i.Src] << i.ImmB) + i.Imm
case VM_IADD_M:
f.R[i.Dst] += pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IADD_MZ:
f.R[i.Dst] += pad.Load64(uint32(i.Imm))
case VM_ISUB_R:
f.R[i.Dst] -= f.R[i.Src]
case VM_ISUB_I:
f.R[i.Dst] -= i.Imm
case VM_ISUB_M:
f.R[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_ISUB_MZ:
f.R[i.Dst] -= pad.Load64(uint32(i.Imm))
case VM_IMUL_R:
f.R[i.Dst] *= f.R[i.Src]
case VM_IMUL_I:
// also handles imul_rcp
f.R[i.Dst] *= i.Imm
case VM_IMUL_M:
f.R[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IMUL_MZ:
f.R[i.Dst] *= pad.Load64(uint32(i.Imm))
case VM_IMULH_R:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], f.R[i.Src])
case VM_IMULH_M:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(i.getScratchpadAddress(f.R[i.Src])))
case VM_IMULH_MZ:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(uint32(i.Imm)))
case VM_ISMULH_R:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(f.R[i.Src]))
case VM_ISMULH_M:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.R[i.Src]))))
case VM_ISMULH_MZ:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(uint32(i.Imm))))
case VM_INEG_R:
f.R[i.Dst] = -f.R[i.Dst]
case VM_IXOR_R:
f.R[i.Dst] ^= f.R[i.Src]
case VM_IXOR_I:
f.R[i.Dst] ^= i.Imm
case VM_IXOR_M:
f.R[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IXOR_MZ:
f.R[i.Dst] ^= pad.Load64(uint32(i.Imm))
case VM_IROR_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(f.R[i.Src]&63))
case VM_IROR_I:
//todo: can merge into VM_IROL_I
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(i.Imm&63))
case VM_IROL_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(f.R[i.Src]&63))
case VM_IROL_I:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(i.Imm&63))
case VM_ISWAP_R:
f.R[i.Dst], f.R[i.Src] = f.R[i.Src], f.R[i.Dst]
case VM_FSWAP_RF:
f.F[i.Dst][HIGH], f.F[i.Dst][LOW] = f.F[i.Dst][LOW], f.F[i.Dst][HIGH]
case VM_FSWAP_RE:
f.E[i.Dst][HIGH], f.E[i.Dst][LOW] = f.E[i.Dst][LOW], f.E[i.Dst][HIGH]
case VM_FADD_R:
f.F[i.Dst][LOW] += f.A[i.Src][LOW]
f.F[i.Dst][HIGH] += f.A[i.Src][HIGH]
case VM_FADD_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] += lo
f.F[i.Dst][HIGH] += hi
case VM_FSUB_R:
f.F[i.Dst][LOW] -= f.A[i.Src][LOW]
f.F[i.Dst][HIGH] -= f.A[i.Src][HIGH]
case VM_FSUB_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] -= lo
f.F[i.Dst][HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
f.F[i.Dst][LOW] = softfloat.ScaleNegate(f.F[i.Dst][LOW])
f.F[i.Dst][HIGH] = softfloat.ScaleNegate(f.F[i.Dst][HIGH])
case VM_FMUL_R:
f.E[i.Dst][LOW] *= f.A[i.Src][LOW]
f.E[i.Dst][HIGH] *= f.A[i.Src][HIGH]
case VM_FDIV_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.E[i.Dst][LOW] /= softfloat.MaskRegisterExponentMantissa(lo, eMask[LOW])
f.E[i.Dst][HIGH] /= softfloat.MaskRegisterExponentMantissa(hi, eMask[HIGH])
case VM_FSQRT_R:
f.E[i.Dst][LOW] = math.Sqrt(f.E[i.Dst][LOW])
f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH])
case VM_CBRANCH:
f.R[i.Src] += i.Imm
if (f.R[i.Src] & uint64(i.MemMask)) == 0 {
pc = i.jumpTarget()
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
f.SetRoundingMode(softfloat.RoundingMode(tmp))
case VM_ISTORE:
pad.Store64(i.getScratchpadAddress(f.R[i.Dst]), f.R[i.Src])
}
}
}
type ByteCodeInstructionOp int
const (
VM_NOP = ByteCodeInstructionOp(iota)
VM_IADD_RS
VM_IADD_M
VM_IADD_MZ
VM_ISUB_R
VM_ISUB_I
VM_ISUB_M
VM_ISUB_MZ
VM_IMUL_R
VM_IMUL_I
VM_IMUL_M
VM_IMUL_MZ
VM_IMULH_R
VM_IMULH_M
VM_IMULH_MZ
VM_ISMULH_R
VM_ISMULH_M
VM_ISMULH_MZ
VM_INEG_R
VM_IXOR_R
VM_IXOR_I
VM_IXOR_M
VM_IXOR_MZ
VM_IROR_R
VM_IROR_I
VM_IROL_R
VM_IROL_I
VM_ISWAP_R
VM_FSWAP_RF
VM_FSWAP_RE
VM_FADD_R
VM_FADD_M
VM_FSUB_R
VM_FSUB_M
VM_FSCAL_R
VM_FMUL_R
VM_FDIV_M
VM_FSQRT_R
VM_CBRANCH
VM_CFROUND
VM_ISTORE
)

View file

@ -37,8 +37,8 @@ import "encoding/binary"
//reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding
// since go does not have union, use byte array
type VM_Instruction []byte // it is hardcode 8 bytes
// VM_Instruction since go does not have union, use byte array
type VM_Instruction [8]byte // it is hardcode 8 bytes
func (ins VM_Instruction) IMM() uint32 {
return binary.LittleEndian.Uint32(ins[4:])
@ -56,9 +56,9 @@ func (ins VM_Instruction) Opcode() byte {
return ins[0]
}
// CompileToBytecode this will interpret single vm instruction
// CompileProgramToByteCode this will interpret single vm instruction into executable opcodes
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
func (vm *VM) CompileToBytecode() {
func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
var registerUsage [RegistersCount]int
for i := range registerUsage {
@ -66,8 +66,8 @@ func (vm *VM) CompileToBytecode() {
}
for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ {
instr := VM_Instruction(vm.Prog[i*8:])
ibc := &vm.ByteCode[i]
instr := VM_Instruction(prog[i*8:])
ibc := &bc[i]
opcode := instr.Opcode()
dst := instr.Dst() % RegistersCount // bit shift optimization
@ -317,7 +317,7 @@ func (vm *VM) CompileToBytecode() {
//conditionmask := CONDITIONMASK << shift
ibc.Imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
if CONDITIONOFFSET > 0 || shift > 0 {
ibc.Imm &= (^(uint64(1) << (shift - 1)))
ibc.Imm &= ^(uint64(1) << (shift - 1))
}
ibc.MemMask = CONDITIONMASK << shift
@ -349,6 +349,8 @@ func (vm *VM) CompileToBytecode() {
}
}
return bc
}
type ScratchPad [ScratchpadSize]byte