Compare commits

...

2 commits

Author SHA1 Message Date
DataHoarder 432590f930
Move argon2 / float packages to their own folders, cleanup vm Run
All checks were successful
continuous-integration/drone/push Build is passing
2024-04-15 04:14:15 +02:00
DataHoarder 5b9b3c3565
Use direct register and scratchpad under bytecode execution 2024-04-15 02:22:04 +02:00
13 changed files with 599 additions and 537 deletions

View file

@ -1,58 +0,0 @@
package randomx
import "golang.org/x/crypto/blake2b"
import (
_ "golang.org/x/crypto/argon2"
_ "unsafe"
)
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
const RANDOMX_ARGON_MEMORY = 262144
// Number of Argon2d iterations for Cache initialization.
const RANDOMX_ARGON_ITERATIONS = 3
// Number of parallel lanes for Cache initialization.
const RANDOMX_ARGON_LANES = 1
// Argon2d salt
const RANDOMX_ARGON_SALT = "RandomX\x03"
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1
const ArgonBlockSize uint32 = 1024
type argonBlock [128]uint64
const syncPoints = 4
//go:linkname argon2_initHash golang.org/x/crypto/argon2.initHash
func argon2_initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
//go:linkname argon2_initBlocks golang.org/x/crypto/argon2.initBlocks
func argon2_initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []argonBlock
//go:linkname argon2_processBlocks golang.org/x/crypto/argon2.processBlocks
func argon2_processBlocks(B []argonBlock, time, memory, threads uint32, mode int)
// argon2_buildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call
func argon2_buildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []argonBlock {
if time < 1 {
panic("argon2: number of rounds too small")
}
if threads < 1 {
panic("argon2: parallelism degree too low")
}
const mode = 0 /* argon2d */
h0 := argon2_initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {
memory = 2 * syncPoints * uint32(threads)
}
B := argon2_initBlocks(&h0, memory, uint32(threads))
argon2_processBlocks(B, time, memory, uint32(threads), mode)
return B
}

44
argon2/argon2.go Normal file
View file

@ -0,0 +1,44 @@
package argon2
import "golang.org/x/crypto/blake2b"
import (
_ "golang.org/x/crypto/argon2"
_ "unsafe"
)
const BlockSize uint32 = 1024
type Block [BlockSize / 8]uint64
const syncPoints = 4
//go:linkname initHash golang.org/x/crypto/argon2.initHash
func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
//go:linkname initBlocks golang.org/x/crypto/argon2.initBlocks
func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []Block
//go:linkname processBlocks golang.org/x/crypto/argon2.processBlocks
func processBlocks(B []Block, time, memory, threads uint32, mode int)
// BuildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call
func BuildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []Block {
if time < 1 {
panic("argon2: number of rounds too small")
}
if threads < 1 {
panic("argon2: parallelism degree too low")
}
const mode = 0 /* argon2d */
h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {
memory = 2 * syncPoints * uint32(threads)
}
B := initBlocks(&h0, memory, uint32(threads))
processBlocks(B, time, memory, uint32(threads), mode)
return B
}

View file

@ -1,14 +1,7 @@
package asm
type RoundingMode uint8
import "git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
const (
RoundingModeToNearest = RoundingMode(iota)
RoundingModeToNegative
RoundingModeToPositive
RoundingModeToZero
)
func SetRoundingMode(mode RoundingMode) {
func SetRoundingMode(mode softfloat.RoundingMode) {
setRoundingMode(uint8(mode))
}

View file

@ -1,190 +0,0 @@
package randomx
import (
"encoding/binary"
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"math"
"math/bits"
)
type ByteCodeInstruction struct {
dst, src byte
idst, isrc *uint64
fdst, fsrc *[2]float64
imm uint64
simm int64
Opcode ByteCodeInstructionOp
target int16
shift uint8
memMask uint32
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
}
func (i ByteCodeInstruction) getScratchpadSrcAddress() uint64 {
return (*i.isrc + i.imm) & uint64(i.memMask)
}
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint64 {
return i.imm & uint64(i.memMask)
}
func (i ByteCodeInstruction) getScratchpadDestAddress() uint64 {
return (*i.idst + i.imm) & uint64(i.memMask)
}
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
func (c *ByteCode) Interpret(vm *VM) {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
ibc := c[pc]
switch ibc.Opcode {
case VM_IADD_RS:
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
case VM_IADD_M:
*ibc.idst += vm.Load64(ibc.getScratchpadSrcAddress())
case VM_IADD_MZ:
*ibc.idst += vm.Load64(ibc.getScratchpadZeroAddress())
case VM_ISUB_R:
*ibc.idst -= *ibc.isrc
case VM_ISUB_M:
*ibc.idst -= vm.Load64(ibc.getScratchpadSrcAddress())
case VM_ISUB_MZ:
*ibc.idst -= vm.Load64(ibc.getScratchpadZeroAddress())
case VM_IMUL_R:
// also handles imul_rcp
*ibc.idst *= *ibc.isrc
case VM_IMUL_M:
*ibc.idst *= vm.Load64(ibc.getScratchpadSrcAddress())
case VM_IMUL_MZ:
*ibc.idst *= vm.Load64(ibc.getScratchpadZeroAddress())
case VM_IMULH_R:
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
case VM_IMULH_M:
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadSrcAddress()))
case VM_IMULH_MZ:
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadZeroAddress()))
case VM_ISMULH_R:
*ibc.idst = smulh(int64(*ibc.idst), int64(*ibc.isrc))
case VM_ISMULH_M:
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadSrcAddress())))
case VM_ISMULH_MZ:
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadZeroAddress())))
case VM_INEG_R:
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
case VM_IXOR_R:
*ibc.idst ^= *ibc.isrc
case VM_IXOR_M:
*ibc.idst ^= vm.Load64(ibc.getScratchpadSrcAddress())
case VM_IXOR_MZ:
*ibc.idst ^= vm.Load64(ibc.getScratchpadZeroAddress())
case VM_IROR_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
case VM_IROL_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
case VM_ISWAP_R:
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
case VM_FSWAP_R:
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
case VM_FADD_R:
ibc.fdst[LOW] += ibc.fsrc[LOW]
ibc.fdst[HIGH] += ibc.fsrc[HIGH]
case VM_FADD_M:
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
ibc.fdst[LOW] += lo
ibc.fdst[HIGH] += hi
case VM_FSUB_R:
ibc.fdst[LOW] -= ibc.fsrc[LOW]
ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
case VM_FSUB_M:
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
ibc.fdst[LOW] -= lo
ibc.fdst[HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
case VM_FMUL_R:
ibc.fdst[LOW] *= ibc.fsrc[LOW]
ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
case VM_FDIV_M:
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
ibc.fdst[LOW] /= MaskRegisterExponentMantissa(lo, vm.config.eMask[LOW])
ibc.fdst[HIGH] /= MaskRegisterExponentMantissa(hi, vm.config.eMask[HIGH])
case VM_FSQRT_R:
ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
case VM_CBRANCH:
*ibc.isrc += ibc.imm
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
pc = int(ibc.target)
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
asm.SetRoundingMode(asm.RoundingMode(tmp))
case VM_ISTORE:
binary.LittleEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], *ibc.isrc)
case VM_NOP: // we do nothing
}
}
}
type ByteCodeInstructionOp int
const (
VM_NOP = ByteCodeInstructionOp(iota)
VM_IADD_RS
VM_IADD_M
VM_IADD_MZ
VM_ISUB_R
VM_ISUB_M
VM_ISUB_MZ
VM_IMUL_R
VM_IMUL_M
VM_IMUL_MZ
VM_IMULH_R
VM_IMULH_M
VM_IMULH_MZ
VM_ISMULH_R
VM_ISMULH_M
VM_ISMULH_MZ
VM_IMUL_RCP
VM_INEG_R
VM_IXOR_R
VM_IXOR_M
VM_IXOR_MZ
VM_IROR_R
VM_IROL_R
VM_ISWAP_R
VM_FSWAP_R
VM_FADD_R
VM_FADD_M
VM_FSUB_R
VM_FSUB_M
VM_FSCAL_R
VM_FMUL_R
VM_FDIV_M
VM_FSQRT_R
VM_CBRANCH
VM_CFROUND
VM_ISTORE
)

View file

@ -1,6 +1,7 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
"runtime"
"slices"
@ -66,9 +67,9 @@ func (cache *Randomx_Cache) Init(key []byte) {
kkey := slices.Clone(key)
argonBlocks := argon2_buildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
argonBlocks := argon2.BuildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argon2.Block{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
cache.Blocks = memoryBlocks

View file

@ -29,6 +29,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import "git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
const RANDOMX_ARGON_MEMORY = 262144
// Number of Argon2d iterations for Cache initialization.
const RANDOMX_ARGON_ITERATIONS = 3
// Number of parallel lanes for Cache initialization.
const RANDOMX_ARGON_LANES = 1
// Argon2d salt
const RANDOMX_ARGON_SALT = "RandomX\x03"
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1
// Number of random Cache accesses per Dataset item. Minimum is 2.
const RANDOMX_CACHE_ACCESSES = 8
@ -74,7 +90,7 @@ const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3
const CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & (^(CacheLineSize - 1))
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(ArgonBlockSize)
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(argon2.BlockSize)
const ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / 8
const ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / 8
@ -87,19 +103,9 @@ const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
const STOREL3CONDITION = 14
const mantissaSize = 52
const exponentSize = 11
const mantissaMask = (uint64(1) << mantissaSize) - 1
const exponentMask = (uint64(1) << exponentSize) - 1
const exponentBias = 1023
const dynamicExponentBits = 4
const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1
const RANDOMX_FLAG_DEFAULT = uint64(0)
const RANDOMX_FLAG_JIT = uint64(1 << iota)

View file

@ -1,15 +1,33 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
)
const RegistersCount = 8
const RegistersCountFloat = 4
const LOW = 0
const HIGH = 1
type RegisterLine [RegistersCount]uint64
type RegisterFile struct {
r RegisterLine
f [RegistersCountFloat][2]float64
e [RegistersCountFloat][2]float64
a [RegistersCountFloat][2]float64
R RegisterLine
F [RegistersCountFloat][2]float64
E [RegistersCountFloat][2]float64
A [RegistersCountFloat][2]float64
FPRC softfloat.RoundingMode
}
func (f *RegisterFile) SetRoundingMode(mode softfloat.RoundingMode) {
if f.FPRC == mode {
return
}
f.FPRC = mode
asm.SetRoundingMode(mode)
}
type MemoryRegisters struct {

37
softfloat/const.go Normal file
View file

@ -0,0 +1,37 @@
package softfloat
const (
mantbits64 uint = 52
expbits64 uint = 11
bias64 = -1<<(expbits64-1) + 1
nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1<<(mantbits64-1) // quiet NaN, 0 payload
inf64 uint64 = (1<<expbits64 - 1) << mantbits64
neg64 uint64 = 1 << (expbits64 + mantbits64)
)
const mantissaMask = (uint64(1) << mantbits64) - 1
const exponentMask = (uint64(1) << expbits64) - 1
const exponentBias = 1023
const dynamicExponentBits = 4
const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantbits64 + dynamicExponentBits)) - 1
const mask22bit = (uint64(1) << 22) - 1
type RoundingMode uint8
const (
// RoundingModeToNearest IEEE 754 roundTiesToEven
RoundingModeToNearest = RoundingMode(iota)
// RoundingModeToNegative IEEE 754 roundTowardNegative
RoundingModeToNegative
// RoundingModeToPositive IEEE 754 roundTowardPositive
RoundingModeToPositive
// RoundingModeToZero IEEE 754 roundTowardZero
RoundingModeToZero
)

35
softfloat/funcs.go Normal file
View file

@ -0,0 +1,35 @@
package softfloat
import "math"
func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode)
}
func ScaleNegate(f float64) float64 {
return math.Float64frombits(math.Float64bits(f) ^ 0x80F0000000000000)
}
func SmallPositiveFloatBits(entropy uint64) float64 {
exponent := entropy >> 59 //0..31
mantissa := entropy & mantissaMask
exponent += exponentBias
exponent &= exponentMask
exponent = exponent << mantbits64
return math.Float64frombits(exponent | mantissa)
}
func StaticExponent(entropy uint64) uint64 {
exponent := constExponentBits
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
exponent <<= mantbits64
return exponent
}
func EMask(entropy uint64) uint64 {
return (entropy & mask22bit) | StaticExponent(entropy)
}
func Xor(a, b float64) float64 {
return math.Float64frombits(math.Float64bits(a) ^ math.Float64bits(b))
}

27
softfloat/softfloat.go Normal file
View file

@ -0,0 +1,27 @@
package softfloat
import (
_ "runtime"
_ "unsafe"
)
//go:linkname funpack64 runtime.funpack64
func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool)
//go:linkname fpack64 runtime.fpack64
func fpack64(sign, mant uint64, exp int, trunc uint64) uint64
//go:linkname fadd64 runtime.fadd64
func fadd64(f, g uint64) uint64
//go:linkname fsub64 runtime.fsub64
func fsub64(f, g uint64) uint64
//go:linkname fneg64 runtime.fneg64
func fneg64(f uint64) uint64
//go:linkname fmul64 runtime.fmul64
func fmul64(f uint64) uint64
//go:linkname fdiv64 runtime.fdiv64
func fdiv64(f uint64) uint64

188
vm.go
View file

@ -31,9 +31,10 @@ package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
"math"
"runtime"
"unsafe"
)
import "encoding/binary"
import "golang.org/x/crypto/blake2b"
@ -45,17 +46,10 @@ type REG struct {
type VM struct {
StateStart [64]byte
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
Prog []byte
ScratchPad [ScratchpadSize]byte
ScratchPad ScratchPad
ByteCode ByteCode
// program configuration see program.hpp
entropy [16]uint64
reg RegisterFile // the register file
mem MemoryRegisters
config Config // configuration
datasetOffset uint64
@ -66,51 +60,47 @@ type VM struct {
}
func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode)
}
type Config struct {
eMask [2]uint64
readReg [4]uint64
}
const LOW = 0
const HIGH = 1
// Run calculate hash based on input
// Warning: Underlying callers will run asm.SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to softfloat.RoundingModeToNearest between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (vm *VM) Run(inputHash [64]byte, roundingMode softfloat.RoundingMode) (reg RegisterFile) {
// calculate hash based on input
func (vm *VM) Run(input_hash [64]byte) {
reg.FPRC = roundingMode
aes.FillAes4Rx4(input_hash, vm.buffer[:])
// buffer first 128 bytes are entropy below rest are program bytes
var buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte
aes.FillAes4Rx4(inputHash, buffer[:])
for i := range vm.entropy {
vm.entropy[i] = binary.LittleEndian.Uint64(vm.buffer[i*8:])
}
entropy := (*[16]uint64)(unsafe.Pointer(&buffer))
vm.Prog = vm.buffer[len(vm.entropy)*8:]
clear(vm.reg.r[:])
prog := buffer[len(entropy)*8:]
// do more initialization before we run
for i := range vm.entropy[:8] {
vm.reg.a[i/2][i%2] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[i]))
for i := range entropy[:8] {
reg.A[i/2][i%2] = softfloat.SmallPositiveFloatBits(entropy[i])
}
vm.mem.ma = vm.entropy[8] & CacheLineAlignMask
vm.mem.mx = vm.entropy[10]
vm.mem.ma = entropy[8] & CacheLineAlignMask
vm.mem.mx = entropy[10]
addressRegisters := vm.entropy[12]
addressRegisters := entropy[12]
for i := range vm.config.readReg {
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
addressRegisters >>= 1
}
vm.datasetOffset = (vm.entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.config.eMask[LOW] = getFloatMask(vm.entropy[14])
vm.config.eMask[HIGH] = getFloatMask(vm.entropy[15])
vm.datasetOffset = (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.config.eMask[LOW] = softfloat.EMask(entropy[14])
vm.config.eMask[HIGH] = softfloat.EMask(entropy[15])
vm.CompileToBytecode()
vm.ByteCode = CompileProgramToByteCode(prog)
spAddr0 := vm.mem.mx
spAddr1 := vm.mem.ma
@ -118,51 +108,52 @@ func (vm *VM) Run(input_hash [64]byte) {
var rlCache RegisterLine
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := vm.reg.r[vm.config.readReg[0]] ^ vm.reg.r[vm.config.readReg[1]]
spMix := reg.R[vm.config.readReg[0]] ^ reg.R[vm.config.readReg[1]]
spAddr0 ^= spMix
spAddr0 &= ScratchpadL3Mask64
spAddr1 ^= spMix >> 32
spAddr1 &= ScratchpadL3Mask64
//TODO: optimize these loads!
for i := uint64(0); i < RegistersCount; i++ {
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
reg.R[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i] = vm.Load32FA(spAddr1 + 8*i)
reg.F[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.e[i] = vm.Load32FA(spAddr1 + 8*(i+RegistersCountFloat))
reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
reg.E[i][LOW] = softfloat.MaskRegisterExponentMantissa(reg.E[i][LOW], vm.config.eMask[LOW])
reg.E[i][HIGH] = softfloat.MaskRegisterExponentMantissa(reg.E[i][HIGH], vm.config.eMask[HIGH])
}
// todo: pass register file directly!
vm.ByteCode.Interpret(vm)
// Run the actual bytecode
vm.ByteCode.Execute(&reg, &vm.ScratchPad, vm.config.eMask)
vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]]
vm.mem.mx ^= reg.R[vm.config.readReg[2]] ^ reg.R[vm.config.readReg[3]]
vm.mem.mx &= CacheLineAlignMask
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
// execute diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r, &rlCache)
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &reg.R, &rlCache)
// swap the elements
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
for i := uint64(0); i < RegistersCount; i++ {
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr1+8*i:], vm.reg.r[i])
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
reg.F[i][LOW] = softfloat.Xor(reg.F[i][LOW], reg.E[i][LOW])
reg.F[i][HIGH] = softfloat.Xor(reg.F[i][HIGH], reg.E[i][HIGH])
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i:], math.Float64bits(vm.reg.f[i][LOW]))
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i+8:], math.Float64bits(vm.reg.f[i][HIGH]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH]))
}
spAddr0 = 0
@ -170,58 +161,52 @@ func (vm *VM) Run(input_hash [64]byte) {
}
return reg
}
func (vm *VM) InitScratchpad(seed *[64]byte) {
// calculate and fill scratchpad
clear(vm.ScratchPad[:])
aes.FillAes1Rx4(seed, vm.ScratchPad[:])
vm.ScratchPad.Init(seed)
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
var buf [8]byte
hash512, _ := blake2b.New512(nil)
// Lock thread due to rounding mode flags
runtime.LockOSThread()
defer runtime.UnlockOSThread()
//restore rounding mode to golang expected one
defer asm.SetRoundingMode(asm.RoundingModeToNearest)
// reset rounding mode if new hash being calculated
asm.SetRoundingMode(asm.RoundingModeToNearest)
tempHash := blake2b.Sum512(input)
vm.InitScratchpad(&tempHash)
hash512, _ := blake2b.New512(nil)
roundingMode := softfloat.RoundingModeToNearest
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
vm.Run(tempHash)
reg := vm.Run(tempHash, roundingMode)
roundingMode = reg.FPRC
hash512.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
for i := range reg.R {
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
hash512.Write(buf[:])
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
for i := range reg.F {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
for i := range reg.E {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.a {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][LOW]))
for i := range reg.A {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][HIGH]))
hash512.Write(buf[:])
}
@ -229,7 +214,22 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
}
// final loop executes here
vm.Run(tempHash)
reg := vm.Run(tempHash, roundingMode)
roundingMode = reg.FPRC
reg.SetRoundingMode(softfloat.RoundingModeToNearest)
return reg
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
var buf [8]byte
tempHash := blake2b.Sum512(input)
vm.InitScratchpad(&tempHash)
reg := vm.RunLoops(tempHash)
// now hash the scratch pad and place into register a
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
@ -238,22 +238,22 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
hash256.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
for i := range reg.R {
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
hash256.Write(buf[:])
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
for i := range reg.F {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
hash256.Write(buf[:])
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
for i := range reg.E {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
hash256.Write(buf[:])
}
@ -262,25 +262,3 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
hash256.Sum(output[:0])
}
const mask22bit = (uint64(1) << 22) - 1
func getSmallPositiveFloatBits(entropy uint64) uint64 {
exponent := entropy >> 59 //0..31
mantissa := entropy & mantissaMask
exponent += exponentBias
exponent &= exponentMask
exponent = exponent << mantissaSize
return exponent | mantissa
}
func getStaticExponent(entropy uint64) uint64 {
exponent := constExponentBits
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
exponent <<= mantissaSize
return exponent
}
func getFloatMask(entropy uint64) uint64 {
return (entropy & mask22bit) | getStaticExponent(entropy)
}

207
vm_bytecode.go Normal file
View file

@ -0,0 +1,207 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/softfloat"
"math"
"math/bits"
)
type ByteCodeInstruction struct {
Dst, Src byte
ImmB uint8
Opcode ByteCodeInstructionOp
MemMask uint32
Imm uint64
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
}
func (i ByteCodeInstruction) jumpTarget() int {
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst)))
}
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
return uint32(ptr+i.Imm) & i.MemMask
}
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 {
return uint32(i.Imm) & i.MemMask
}
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
// Execute Runs a RandomX program with the given register file and scratchpad
// Warning: This will call asm.SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to softfloat.RoundingModeToNearest between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
i := &c[pc]
switch i.Opcode {
case VM_NOP: // we do nothing
case VM_IADD_RS:
f.R[i.Dst] += (f.R[i.Src] << i.ImmB) + i.Imm
case VM_IADD_M:
f.R[i.Dst] += pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IADD_MZ:
f.R[i.Dst] += pad.Load64(uint32(i.Imm))
case VM_ISUB_R:
f.R[i.Dst] -= f.R[i.Src]
case VM_ISUB_I:
f.R[i.Dst] -= i.Imm
case VM_ISUB_M:
f.R[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_ISUB_MZ:
f.R[i.Dst] -= pad.Load64(uint32(i.Imm))
case VM_IMUL_R:
f.R[i.Dst] *= f.R[i.Src]
case VM_IMUL_I:
// also handles imul_rcp
f.R[i.Dst] *= i.Imm
case VM_IMUL_M:
f.R[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IMUL_MZ:
f.R[i.Dst] *= pad.Load64(uint32(i.Imm))
case VM_IMULH_R:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], f.R[i.Src])
case VM_IMULH_M:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(i.getScratchpadAddress(f.R[i.Src])))
case VM_IMULH_MZ:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(uint32(i.Imm)))
case VM_ISMULH_R:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(f.R[i.Src]))
case VM_ISMULH_M:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.R[i.Src]))))
case VM_ISMULH_MZ:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(uint32(i.Imm))))
case VM_INEG_R:
f.R[i.Dst] = -f.R[i.Dst]
case VM_IXOR_R:
f.R[i.Dst] ^= f.R[i.Src]
case VM_IXOR_I:
f.R[i.Dst] ^= i.Imm
case VM_IXOR_M:
f.R[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IXOR_MZ:
f.R[i.Dst] ^= pad.Load64(uint32(i.Imm))
case VM_IROR_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(f.R[i.Src]&63))
case VM_IROR_I:
//todo: can merge into VM_IROL_I
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(i.Imm&63))
case VM_IROL_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(f.R[i.Src]&63))
case VM_IROL_I:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(i.Imm&63))
case VM_ISWAP_R:
f.R[i.Dst], f.R[i.Src] = f.R[i.Src], f.R[i.Dst]
case VM_FSWAP_RF:
f.F[i.Dst][HIGH], f.F[i.Dst][LOW] = f.F[i.Dst][LOW], f.F[i.Dst][HIGH]
case VM_FSWAP_RE:
f.E[i.Dst][HIGH], f.E[i.Dst][LOW] = f.E[i.Dst][LOW], f.E[i.Dst][HIGH]
case VM_FADD_R:
f.F[i.Dst][LOW] += f.A[i.Src][LOW]
f.F[i.Dst][HIGH] += f.A[i.Src][HIGH]
case VM_FADD_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] += lo
f.F[i.Dst][HIGH] += hi
case VM_FSUB_R:
f.F[i.Dst][LOW] -= f.A[i.Src][LOW]
f.F[i.Dst][HIGH] -= f.A[i.Src][HIGH]
case VM_FSUB_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] -= lo
f.F[i.Dst][HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
f.F[i.Dst][LOW] = softfloat.ScaleNegate(f.F[i.Dst][LOW])
f.F[i.Dst][HIGH] = softfloat.ScaleNegate(f.F[i.Dst][HIGH])
case VM_FMUL_R:
f.E[i.Dst][LOW] *= f.A[i.Src][LOW]
f.E[i.Dst][HIGH] *= f.A[i.Src][HIGH]
case VM_FDIV_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.E[i.Dst][LOW] /= softfloat.MaskRegisterExponentMantissa(lo, eMask[LOW])
f.E[i.Dst][HIGH] /= softfloat.MaskRegisterExponentMantissa(hi, eMask[HIGH])
case VM_FSQRT_R:
f.E[i.Dst][LOW] = math.Sqrt(f.E[i.Dst][LOW])
f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH])
case VM_CBRANCH:
f.R[i.Src] += i.Imm
if (f.R[i.Src] & uint64(i.MemMask)) == 0 {
pc = i.jumpTarget()
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
f.SetRoundingMode(softfloat.RoundingMode(tmp))
case VM_ISTORE:
pad.Store64(i.getScratchpadAddress(f.R[i.Dst]), f.R[i.Src])
}
}
}
type ByteCodeInstructionOp int
const (
VM_NOP = ByteCodeInstructionOp(iota)
VM_IADD_RS
VM_IADD_M
VM_IADD_MZ
VM_ISUB_R
VM_ISUB_I
VM_ISUB_M
VM_ISUB_MZ
VM_IMUL_R
VM_IMUL_I
VM_IMUL_M
VM_IMUL_MZ
VM_IMULH_R
VM_IMULH_M
VM_IMULH_MZ
VM_ISMULH_R
VM_ISMULH_M
VM_ISMULH_MZ
VM_INEG_R
VM_IXOR_R
VM_IXOR_I
VM_IXOR_M
VM_IXOR_MZ
VM_IROR_R
VM_IROR_I
VM_IROL_R
VM_IROL_I
VM_ISWAP_R
VM_FSWAP_RF
VM_FSWAP_RE
VM_FADD_R
VM_FADD_M
VM_FSUB_R
VM_FSUB_M
VM_FSCAL_R
VM_FMUL_R
VM_FDIV_M
VM_FSQRT_R
VM_CBRANCH
VM_CFROUND
VM_ISTORE
)

View file

@ -30,14 +30,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"unsafe"
)
import "encoding/binary"
//reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding
// since go does not have union, use byte array
type VM_Instruction []byte // it is hardcode 8 bytes
// VM_Instruction since go does not have union, use byte array
type VM_Instruction [8]byte // it is hardcode 8 bytes
func (ins VM_Instruction) IMM() uint32 {
return binary.LittleEndian.Uint32(ins[4:])
@ -55,9 +56,9 @@ func (ins VM_Instruction) Opcode() byte {
return ins[0]
}
// CompileToBytecode this will interpret single vm instruction
// CompileProgramToByteCode this will interpret single vm instruction into executable opcodes
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
func (vm *VM) CompileToBytecode() {
func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
var registerUsage [RegistersCount]int
for i := range registerUsage {
@ -65,150 +66,130 @@ func (vm *VM) CompileToBytecode() {
}
for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ {
instr := VM_Instruction(vm.Prog[i*8:])
ibc := &vm.ByteCode[i]
instr := VM_Instruction(prog[i*8:])
ibc := &bc[i]
opcode := instr.Opcode()
dst := instr.Dst() % RegistersCount // bit shift optimization
src := instr.Src() % RegistersCount
ibc.dst = dst
ibc.src = src
ibc.Dst = dst
ibc.Src = src
switch opcode {
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
ibc.Opcode = VM_IADD_RS
ibc.idst = &vm.reg.r[dst]
if dst != RegisterNeedsDisplacement {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
ibc.imm = 0
//shift
ibc.ImmB = (instr.Mod() >> 2) % 4
ibc.Imm = 0
} else {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
ibc.imm = signExtend2sCompl(instr.IMM())
//shift
ibc.ImmB = (instr.Mod() >> 2) % 4
ibc.Imm = signExtend2sCompl(instr.IMM())
}
registerUsage[dst] = i
case 16, 17, 18, 19, 20, 21, 22: // 7
ibc.Opcode = VM_IADD_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.Opcode = VM_IADD_MZ
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
ibc.Opcode = VM_ISUB_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Opcode = VM_ISUB_I
}
registerUsage[dst] = i
case 39, 40, 41, 42, 43, 44, 45: // 7
ibc.Opcode = VM_ISUB_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.Opcode = VM_ISUB_MZ
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Opcode = VM_IMUL_I
}
registerUsage[dst] = i
case 62, 63, 64, 65: //4
ibc.Opcode = VM_IMUL_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.Opcode = VM_IMUL_MZ
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 66, 67, 68, 69: //4
ibc.Opcode = VM_IMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 70: //1
ibc.Opcode = VM_IMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.Opcode = VM_IMULH_MZ
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 71, 72, 73, 74: //4
ibc.Opcode = VM_ISMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 75: //1
ibc.Opcode = VM_ISMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.Opcode = VM_ISMULH_MZ
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
divisor := instr.IMM()
if !isZeroOrPowerOf2(divisor) {
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
ibc.imm = randomx_reciprocal(divisor)
ibc.isrc = &ibc.imm
ibc.Opcode = VM_IMUL_I
ibc.Imm = randomx_reciprocal(divisor)
registerUsage[dst] = i
} else {
ibc.Opcode = VM_NOP
@ -216,66 +197,49 @@ func (vm *VM) CompileToBytecode() {
case 84, 85: //2
ibc.Opcode = VM_INEG_R
ibc.idst = &vm.reg.r[dst]
registerUsage[dst] = i
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
ibc.Opcode = VM_IXOR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Opcode = VM_IXOR_I
}
registerUsage[dst] = i
case 101, 102, 103, 104, 105: //5
ibc.Opcode = VM_IXOR_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.Opcode = VM_IXOR_MZ
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 106, 107, 108, 109, 110, 111, 112, 113: //8
ibc.Opcode = VM_IROR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Opcode = VM_IROR_I
}
registerUsage[dst] = i
case 114, 115: // 2 IROL_R
ibc.Opcode = VM_IROL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Opcode = VM_IROL_I
}
registerUsage[dst] = i
case 116, 117, 118, 119: //4
if src != dst {
ibc.Opcode = VM_ISWAP_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
registerUsage[src] = i
} else {
@ -285,87 +249,77 @@ func (vm *VM) CompileToBytecode() {
// below are floating point instructions
case 120, 121, 122, 123: // 4
ibc.Opcode = VM_FSWAP_R
//ibc.Opcode = VM_FSWAP_R
if dst < RegistersCountFloat {
ibc.fdst = &vm.reg.f[dst]
ibc.Opcode = VM_FSWAP_RF
} else {
ibc.fdst = &vm.reg.e[dst-RegistersCountFloat]
ibc.Opcode = VM_FSWAP_RE
ibc.Dst = dst - RegistersCountFloat
}
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
src := instr.Src() % RegistersCountFloat
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FADD_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 140, 141, 142, 143, 144: //5
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FADD_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
src := instr.Src() % RegistersCountFloat
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FSUB_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 161, 162, 163, 164, 165: //5
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSUB_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
case 166, 167, 168, 169, 170, 171: //6
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSCAL_R
ibc.fdst = &vm.reg.f[dst]
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
src := instr.Src() % RegistersCountFloat
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FMUL_R
ibc.fdst = &vm.reg.e[dst]
ibc.fsrc = &vm.reg.a[src]
case 204, 205, 206, 207: //4
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FDIV_M
ibc.fdst = &vm.reg.e[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
case 208, 209, 210, 211, 212, 213: //6
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSQRT_R
ibc.fdst = &vm.reg.e[dst]
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
ibc.Opcode = VM_CBRANCH
reg := instr.Dst() % RegistersCount
ibc.isrc = &vm.reg.r[reg]
ibc.target = int16(registerUsage[reg])
ibc.Src = instr.Dst() % RegistersCount
target := uint16(int16(registerUsage[ibc.Src]))
ibc.Dst = uint8(target)
ibc.ImmB = uint8(target >> 8)
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
//conditionmask := CONDITIONMASK << shift
ibc.imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
ibc.Imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
if CONDITIONOFFSET > 0 || shift > 0 {
ibc.imm &= (^(uint64(1) << (shift - 1)))
ibc.Imm &= ^(uint64(1) << (shift - 1))
}
ibc.memMask = CONDITIONMASK << shift
ibc.MemMask = CONDITIONMASK << shift
for j := 0; j < RegistersCount; j++ {
registerUsage[j] = i
@ -373,23 +327,20 @@ func (vm *VM) CompileToBytecode() {
case 239: //1
ibc.Opcode = VM_CFROUND
ibc.isrc = &vm.reg.r[src]
ibc.imm = uint64(instr.IMM() & 63)
ibc.Imm = uint64(instr.IMM() & 63)
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
ibc.Opcode = VM_ISTORE
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = signExtend2sCompl(instr.IMM())
if (instr.Mod() >> 4) < STOREL3CONDITION {
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
}
default:
@ -398,21 +349,34 @@ func (vm *VM) CompileToBytecode() {
}
}
return bc
}
func (vm *VM) Load64(addr uint64) uint64 {
return *(*uint64)(unsafe.Pointer(&vm.ScratchPad[addr]))
type ScratchPad [ScratchpadSize]byte
func (pad *ScratchPad) Init(seed *[64]byte) {
// calculate and fill scratchpad
clear(pad[:])
aes.FillAes1Rx4(seed, pad[:])
}
func (vm *VM) Load32(addr uint64) uint32 {
return *(*uint32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Store64(addr uint32, val uint64) {
*(*uint64)(unsafe.Pointer(&pad[addr])) = val
//binary.LittleEndian.PutUint64(pad[addr:], val)
}
func (pad *ScratchPad) Load64(addr uint32) uint64 {
return *(*uint64)(unsafe.Pointer(&pad[addr]))
}
func (pad *ScratchPad) Load32(addr uint32) uint32 {
return *(*uint32)(unsafe.Pointer(&pad[addr]))
}
func (vm *VM) Load32F(addr uint64) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Load32F(addr uint32) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return float64(a[LOW]), float64(a[HIGH])
}
func (vm *VM) Load32FA(addr uint64) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Load32FA(addr uint32) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return [2]float64{float64(a[LOW]), float64(a[HIGH])}
}