Use direct register and scratchpad under bytecode execution
This commit is contained in:
parent
b72f79a653
commit
5b9b3c3565
153
bytecode.go
153
bytecode.go
|
@ -1,22 +1,17 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
type ByteCodeInstruction struct {
|
||||
dst, src byte
|
||||
idst, isrc *uint64
|
||||
fdst, fsrc *[2]float64
|
||||
imm uint64
|
||||
simm int64
|
||||
Opcode ByteCodeInstructionOp
|
||||
target int16
|
||||
shift uint8
|
||||
memMask uint32
|
||||
Dst, Src byte
|
||||
ImmB uint8
|
||||
Opcode ByteCodeInstructionOp
|
||||
MemMask uint32
|
||||
Imm uint64
|
||||
/*
|
||||
union {
|
||||
int_reg_t* idst;
|
||||
|
@ -40,112 +35,127 @@ type ByteCodeInstruction struct {
|
|||
|
||||
}
|
||||
|
||||
func (i ByteCodeInstruction) getScratchpadSrcAddress() uint64 {
|
||||
return (*i.isrc + i.imm) & uint64(i.memMask)
|
||||
func (i ByteCodeInstruction) jumpTarget() int {
|
||||
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst)))
|
||||
}
|
||||
|
||||
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint64 {
|
||||
return i.imm & uint64(i.memMask)
|
||||
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
|
||||
return uint32(ptr+i.Imm) & i.MemMask
|
||||
}
|
||||
|
||||
func (i ByteCodeInstruction) getScratchpadDestAddress() uint64 {
|
||||
return (*i.idst + i.imm) & uint64(i.memMask)
|
||||
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 {
|
||||
return uint32(i.Imm) & i.MemMask
|
||||
}
|
||||
|
||||
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
|
||||
|
||||
func (c *ByteCode) Interpret(vm *VM) {
|
||||
func (c *ByteCode) Execute(f RegisterFile, pad *ScratchPad, eMask [2]uint64) RegisterFile {
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
ibc := c[pc]
|
||||
switch ibc.Opcode {
|
||||
i := &c[pc]
|
||||
switch i.Opcode {
|
||||
case VM_IADD_RS:
|
||||
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
|
||||
f.r[i.Dst] += (f.r[i.Src] << i.ImmB) + i.Imm
|
||||
case VM_IADD_M:
|
||||
*ibc.idst += vm.Load64(ibc.getScratchpadSrcAddress())
|
||||
f.r[i.Dst] += pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
|
||||
case VM_IADD_MZ:
|
||||
*ibc.idst += vm.Load64(ibc.getScratchpadZeroAddress())
|
||||
f.r[i.Dst] += pad.Load64(uint32(i.Imm))
|
||||
case VM_ISUB_R:
|
||||
*ibc.idst -= *ibc.isrc
|
||||
f.r[i.Dst] -= f.r[i.Src]
|
||||
case VM_ISUB_I:
|
||||
f.r[i.Dst] -= i.Imm
|
||||
case VM_ISUB_M:
|
||||
*ibc.idst -= vm.Load64(ibc.getScratchpadSrcAddress())
|
||||
f.r[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
|
||||
case VM_ISUB_MZ:
|
||||
*ibc.idst -= vm.Load64(ibc.getScratchpadZeroAddress())
|
||||
f.r[i.Dst] -= pad.Load64(uint32(i.Imm))
|
||||
case VM_IMUL_R:
|
||||
f.r[i.Dst] *= f.r[i.Src]
|
||||
case VM_IMUL_I:
|
||||
// also handles imul_rcp
|
||||
*ibc.idst *= *ibc.isrc
|
||||
f.r[i.Dst] *= i.Imm
|
||||
case VM_IMUL_M:
|
||||
*ibc.idst *= vm.Load64(ibc.getScratchpadSrcAddress())
|
||||
f.r[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
|
||||
case VM_IMUL_MZ:
|
||||
*ibc.idst *= vm.Load64(ibc.getScratchpadZeroAddress())
|
||||
f.r[i.Dst] *= pad.Load64(uint32(i.Imm))
|
||||
case VM_IMULH_R:
|
||||
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
|
||||
f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], f.r[i.Src])
|
||||
case VM_IMULH_M:
|
||||
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadSrcAddress()))
|
||||
f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], pad.Load64(i.getScratchpadAddress(f.r[i.Src])))
|
||||
case VM_IMULH_MZ:
|
||||
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadZeroAddress()))
|
||||
f.r[i.Dst], _ = bits.Mul64(f.r[i.Dst], pad.Load64(uint32(i.Imm)))
|
||||
case VM_ISMULH_R:
|
||||
*ibc.idst = smulh(int64(*ibc.idst), int64(*ibc.isrc))
|
||||
f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(f.r[i.Src]))
|
||||
case VM_ISMULH_M:
|
||||
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadSrcAddress())))
|
||||
f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.r[i.Src]))))
|
||||
case VM_ISMULH_MZ:
|
||||
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadZeroAddress())))
|
||||
f.r[i.Dst] = smulh(int64(f.r[i.Dst]), int64(pad.Load64(uint32(i.Imm))))
|
||||
case VM_INEG_R:
|
||||
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
|
||||
//f.r[i.Dst] = (^(f.r[i.Dst])) + 1 // 2's complement negative
|
||||
f.r[i.Dst] = -f.r[i.Dst]
|
||||
case VM_IXOR_R:
|
||||
*ibc.idst ^= *ibc.isrc
|
||||
f.r[i.Dst] ^= f.r[i.Src]
|
||||
case VM_IXOR_I:
|
||||
f.r[i.Dst] ^= i.Imm
|
||||
case VM_IXOR_M:
|
||||
*ibc.idst ^= vm.Load64(ibc.getScratchpadSrcAddress())
|
||||
f.r[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.r[i.Src]))
|
||||
case VM_IXOR_MZ:
|
||||
*ibc.idst ^= vm.Load64(ibc.getScratchpadZeroAddress())
|
||||
f.r[i.Dst] ^= pad.Load64(uint32(i.Imm))
|
||||
case VM_IROR_R:
|
||||
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
|
||||
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], 0-int(f.r[i.Src]&63))
|
||||
case VM_IROR_I:
|
||||
//todo: can merge into VM_IROL_I
|
||||
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], 0-int(i.Imm&63))
|
||||
case VM_IROL_R:
|
||||
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
|
||||
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], int(f.r[i.Src]&63))
|
||||
case VM_IROL_I:
|
||||
f.r[i.Dst] = bits.RotateLeft64(f.r[i.Dst], int(i.Imm&63))
|
||||
case VM_ISWAP_R:
|
||||
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
|
||||
case VM_FSWAP_R:
|
||||
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
|
||||
f.r[i.Dst], f.r[i.Src] = f.r[i.Src], f.r[i.Dst]
|
||||
case VM_FSWAP_RF:
|
||||
f.f[i.Dst][HIGH], f.f[i.Dst][LOW] = f.f[i.Dst][LOW], f.f[i.Dst][HIGH]
|
||||
case VM_FSWAP_RE:
|
||||
f.e[i.Dst][HIGH], f.e[i.Dst][LOW] = f.e[i.Dst][LOW], f.e[i.Dst][HIGH]
|
||||
case VM_FADD_R:
|
||||
ibc.fdst[LOW] += ibc.fsrc[LOW]
|
||||
ibc.fdst[HIGH] += ibc.fsrc[HIGH]
|
||||
f.f[i.Dst][LOW] += f.a[i.Src][LOW]
|
||||
f.f[i.Dst][HIGH] += f.a[i.Src][HIGH]
|
||||
case VM_FADD_M:
|
||||
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
|
||||
ibc.fdst[LOW] += lo
|
||||
ibc.fdst[HIGH] += hi
|
||||
lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src]))
|
||||
f.f[i.Dst][LOW] += lo
|
||||
f.f[i.Dst][HIGH] += hi
|
||||
case VM_FSUB_R:
|
||||
ibc.fdst[LOW] -= ibc.fsrc[LOW]
|
||||
ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
|
||||
f.f[i.Dst][LOW] -= f.a[i.Src][LOW]
|
||||
f.f[i.Dst][HIGH] -= f.a[i.Src][HIGH]
|
||||
case VM_FSUB_M:
|
||||
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
|
||||
ibc.fdst[LOW] -= lo
|
||||
ibc.fdst[HIGH] -= hi
|
||||
lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src]))
|
||||
f.f[i.Dst][LOW] -= lo
|
||||
f.f[i.Dst][HIGH] -= hi
|
||||
case VM_FSCAL_R:
|
||||
// no dependent on rounding modes
|
||||
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
|
||||
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
|
||||
f.f[i.Dst][LOW] = math.Float64frombits(math.Float64bits(f.f[i.Dst][LOW]) ^ 0x80F0000000000000)
|
||||
f.f[i.Dst][HIGH] = math.Float64frombits(math.Float64bits(f.f[i.Dst][HIGH]) ^ 0x80F0000000000000)
|
||||
case VM_FMUL_R:
|
||||
ibc.fdst[LOW] *= ibc.fsrc[LOW]
|
||||
ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
|
||||
f.e[i.Dst][LOW] *= f.a[i.Src][LOW]
|
||||
f.e[i.Dst][HIGH] *= f.a[i.Src][HIGH]
|
||||
case VM_FDIV_M:
|
||||
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
|
||||
ibc.fdst[LOW] /= MaskRegisterExponentMantissa(lo, vm.config.eMask[LOW])
|
||||
ibc.fdst[HIGH] /= MaskRegisterExponentMantissa(hi, vm.config.eMask[HIGH])
|
||||
lo, hi := pad.Load32F(i.getScratchpadAddress(f.r[i.Src]))
|
||||
f.e[i.Dst][LOW] /= MaskRegisterExponentMantissa(lo, eMask[LOW])
|
||||
f.e[i.Dst][HIGH] /= MaskRegisterExponentMantissa(hi, eMask[HIGH])
|
||||
case VM_FSQRT_R:
|
||||
ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
|
||||
ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
|
||||
f.e[i.Dst][LOW] = math.Sqrt(f.e[i.Dst][LOW])
|
||||
f.e[i.Dst][HIGH] = math.Sqrt(f.e[i.Dst][HIGH])
|
||||
case VM_CBRANCH:
|
||||
*ibc.isrc += ibc.imm
|
||||
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
|
||||
pc = int(ibc.target)
|
||||
f.r[i.Src] += i.Imm
|
||||
if (f.r[i.Src] & uint64(i.MemMask)) == 0 {
|
||||
pc = i.jumpTarget()
|
||||
}
|
||||
case VM_CFROUND:
|
||||
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
|
||||
tmp := (bits.RotateLeft64(f.r[i.Src], 0-int(i.Imm))) % 4 // rotate right
|
||||
asm.SetRoundingMode(asm.RoundingMode(tmp))
|
||||
case VM_ISTORE:
|
||||
binary.LittleEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], *ibc.isrc)
|
||||
pad.Store64(i.getScratchpadAddress(f.r[i.Dst]), f.r[i.Src])
|
||||
case VM_NOP: // we do nothing
|
||||
}
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
type ByteCodeInstructionOp int
|
||||
|
@ -156,9 +166,11 @@ const (
|
|||
VM_IADD_M
|
||||
VM_IADD_MZ
|
||||
VM_ISUB_R
|
||||
VM_ISUB_I
|
||||
VM_ISUB_M
|
||||
VM_ISUB_MZ
|
||||
VM_IMUL_R
|
||||
VM_IMUL_I
|
||||
VM_IMUL_M
|
||||
VM_IMUL_MZ
|
||||
VM_IMULH_R
|
||||
|
@ -167,15 +179,18 @@ const (
|
|||
VM_ISMULH_R
|
||||
VM_ISMULH_M
|
||||
VM_ISMULH_MZ
|
||||
VM_IMUL_RCP
|
||||
VM_INEG_R
|
||||
VM_IXOR_R
|
||||
VM_IXOR_I
|
||||
VM_IXOR_M
|
||||
VM_IXOR_MZ
|
||||
VM_IROR_R
|
||||
VM_IROR_I
|
||||
VM_IROL_R
|
||||
VM_IROL_I
|
||||
VM_ISWAP_R
|
||||
VM_FSWAP_R
|
||||
VM_FSWAP_RF
|
||||
VM_FSWAP_RE
|
||||
VM_FADD_R
|
||||
VM_FADD_M
|
||||
VM_FSUB_R
|
||||
|
|
|
@ -87,7 +87,7 @@ const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
|
|||
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
|
||||
|
||||
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
|
||||
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
|
||||
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
|
||||
const STOREL3CONDITION = 14
|
||||
|
||||
const mantissaSize = 52
|
||||
|
|
|
@ -3,6 +3,9 @@ package randomx
|
|||
const RegistersCount = 8
|
||||
const RegistersCountFloat = 4
|
||||
|
||||
const LOW = 0
|
||||
const HIGH = 1
|
||||
|
||||
type RegisterLine [RegistersCount]uint64
|
||||
|
||||
type RegisterFile struct {
|
||||
|
|
30
vm.go
30
vm.go
|
@ -47,7 +47,7 @@ type VM struct {
|
|||
StateStart [64]byte
|
||||
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
|
||||
Prog []byte
|
||||
ScratchPad [ScratchpadSize]byte
|
||||
ScratchPad ScratchPad
|
||||
|
||||
ByteCode ByteCode
|
||||
|
||||
|
@ -75,13 +75,10 @@ type Config struct {
|
|||
readReg [4]uint64
|
||||
}
|
||||
|
||||
const LOW = 0
|
||||
const HIGH = 1
|
||||
// Run calculate hash based on input
|
||||
func (vm *VM) Run(inputHash [64]byte) {
|
||||
|
||||
// calculate hash based on input
|
||||
func (vm *VM) Run(input_hash [64]byte) {
|
||||
|
||||
aes.FillAes4Rx4(input_hash, vm.buffer[:])
|
||||
aes.FillAes4Rx4(inputHash, vm.buffer[:])
|
||||
|
||||
for i := range vm.entropy {
|
||||
vm.entropy[i] = binary.LittleEndian.Uint64(vm.buffer[i*8:])
|
||||
|
@ -126,22 +123,21 @@ func (vm *VM) Run(input_hash [64]byte) {
|
|||
spAddr1 &= ScratchpadL3Mask64
|
||||
|
||||
for i := uint64(0); i < RegistersCount; i++ {
|
||||
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
|
||||
vm.reg.r[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
|
||||
}
|
||||
|
||||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
vm.reg.f[i] = vm.Load32FA(spAddr1 + 8*i)
|
||||
vm.reg.f[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
|
||||
}
|
||||
|
||||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
vm.reg.e[i] = vm.Load32FA(spAddr1 + 8*(i+RegistersCountFloat))
|
||||
vm.reg.e[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
|
||||
|
||||
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
|
||||
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
|
||||
}
|
||||
|
||||
// todo: pass register file directly!
|
||||
vm.ByteCode.Interpret(vm)
|
||||
vm.reg = vm.ByteCode.Execute(vm.reg, &vm.ScratchPad, vm.config.eMask)
|
||||
|
||||
vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]]
|
||||
vm.mem.mx &= CacheLineAlignMask
|
||||
|
@ -154,15 +150,15 @@ func (vm *VM) Run(input_hash [64]byte) {
|
|||
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
|
||||
|
||||
for i := uint64(0); i < RegistersCount; i++ {
|
||||
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr1+8*i:], vm.reg.r[i])
|
||||
vm.ScratchPad.Store64(uint32(spAddr1+8*i), vm.reg.r[i])
|
||||
}
|
||||
|
||||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
|
||||
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
|
||||
|
||||
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i:], math.Float64bits(vm.reg.f[i][LOW]))
|
||||
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i+8:], math.Float64bits(vm.reg.f[i][HIGH]))
|
||||
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(vm.reg.f[i][LOW]))
|
||||
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(vm.reg.f[i][HIGH]))
|
||||
}
|
||||
|
||||
spAddr0 = 0
|
||||
|
@ -173,9 +169,7 @@ func (vm *VM) Run(input_hash [64]byte) {
|
|||
}
|
||||
|
||||
func (vm *VM) InitScratchpad(seed *[64]byte) {
|
||||
// calculate and fill scratchpad
|
||||
clear(vm.ScratchPad[:])
|
||||
aes.FillAes1Rx4(seed, vm.ScratchPad[:])
|
||||
vm.ScratchPad.Init(seed)
|
||||
}
|
||||
|
||||
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
||||
|
|
|
@ -30,6 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
|
||||
"unsafe"
|
||||
)
|
||||
import "encoding/binary"
|
||||
|
@ -71,144 +72,124 @@ func (vm *VM) CompileToBytecode() {
|
|||
opcode := instr.Opcode()
|
||||
dst := instr.Dst() % RegistersCount // bit shift optimization
|
||||
src := instr.Src() % RegistersCount
|
||||
ibc.dst = dst
|
||||
ibc.src = src
|
||||
ibc.Dst = dst
|
||||
ibc.Src = src
|
||||
switch opcode {
|
||||
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
|
||||
ibc.Opcode = VM_IADD_RS
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
if dst != RegisterNeedsDisplacement {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.shift = (instr.Mod() >> 2) % 4
|
||||
ibc.imm = 0
|
||||
//shift
|
||||
ibc.ImmB = (instr.Mod() >> 2) % 4
|
||||
ibc.Imm = 0
|
||||
} else {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.shift = (instr.Mod() >> 2) % 4
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
//shift
|
||||
ibc.ImmB = (instr.Mod() >> 2) % 4
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
||||
case 16, 17, 18, 19, 20, 21, 22: // 7
|
||||
ibc.Opcode = VM_IADD_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.Opcode = VM_IADD_MZ
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
ibc.MemMask = ScratchpadL3Mask
|
||||
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
|
||||
ibc.Opcode = VM_ISUB_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Opcode = VM_ISUB_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 39, 40, 41, 42, 43, 44, 45: // 7
|
||||
ibc.Opcode = VM_ISUB_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.Opcode = VM_ISUB_MZ
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
ibc.MemMask = ScratchpadL3Mask
|
||||
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
|
||||
ibc.Opcode = VM_IMUL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Opcode = VM_IMUL_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 62, 63, 64, 65: //4
|
||||
ibc.Opcode = VM_IMUL_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.Opcode = VM_IMUL_MZ
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
ibc.MemMask = ScratchpadL3Mask
|
||||
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 66, 67, 68, 69: //4
|
||||
ibc.Opcode = VM_IMULH_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
case 70: //1
|
||||
ibc.Opcode = VM_IMULH_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.Opcode = VM_IMULH_MZ
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
ibc.MemMask = ScratchpadL3Mask
|
||||
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 71, 72, 73, 74: //4
|
||||
ibc.Opcode = VM_ISMULH_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
case 75: //1
|
||||
ibc.Opcode = VM_ISMULH_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.Opcode = VM_ISMULH_MZ
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
ibc.MemMask = ScratchpadL3Mask
|
||||
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
|
||||
divisor := instr.IMM()
|
||||
if !isZeroOrPowerOf2(divisor) {
|
||||
ibc.Opcode = VM_IMUL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = randomx_reciprocal(divisor)
|
||||
ibc.isrc = &ibc.imm
|
||||
ibc.Opcode = VM_IMUL_I
|
||||
ibc.Imm = randomx_reciprocal(divisor)
|
||||
registerUsage[dst] = i
|
||||
} else {
|
||||
ibc.Opcode = VM_NOP
|
||||
|
@ -216,66 +197,49 @@ func (vm *VM) CompileToBytecode() {
|
|||
|
||||
case 84, 85: //2
|
||||
ibc.Opcode = VM_INEG_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
registerUsage[dst] = i
|
||||
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
|
||||
ibc.Opcode = VM_IXOR_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Opcode = VM_IXOR_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 101, 102, 103, 104, 105: //5
|
||||
ibc.Opcode = VM_IXOR_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.Opcode = VM_IXOR_MZ
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
ibc.MemMask = ScratchpadL3Mask
|
||||
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 106, 107, 108, 109, 110, 111, 112, 113: //8
|
||||
ibc.Opcode = VM_IROR_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Opcode = VM_IROR_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 114, 115: // 2 IROL_R
|
||||
ibc.Opcode = VM_IROL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Opcode = VM_IROL_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
||||
case 116, 117, 118, 119: //4
|
||||
if src != dst {
|
||||
ibc.Opcode = VM_ISWAP_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
registerUsage[src] = i
|
||||
} else {
|
||||
|
@ -285,87 +249,77 @@ func (vm *VM) CompileToBytecode() {
|
|||
|
||||
// below are floating point instructions
|
||||
case 120, 121, 122, 123: // 4
|
||||
ibc.Opcode = VM_FSWAP_R
|
||||
//ibc.Opcode = VM_FSWAP_R
|
||||
if dst < RegistersCountFloat {
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.Opcode = VM_FSWAP_RF
|
||||
} else {
|
||||
ibc.fdst = &vm.reg.e[dst-RegistersCountFloat]
|
||||
ibc.Opcode = VM_FSWAP_RE
|
||||
ibc.Dst = dst - RegistersCountFloat
|
||||
}
|
||||
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
src := instr.Src() % RegistersCountFloat
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Src = instr.Src() % RegistersCountFloat
|
||||
ibc.Opcode = VM_FADD_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
|
||||
case 140, 141, 142, 143, 144: //5
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Opcode = VM_FADD_M
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
|
||||
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
src := instr.Src() % RegistersCountFloat
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Src = instr.Src() % RegistersCountFloat
|
||||
ibc.Opcode = VM_FSUB_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
case 161, 162, 163, 164, 165: //5
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Opcode = VM_FSUB_M
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
|
||||
case 166, 167, 168, 169, 170, 171: //6
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Opcode = VM_FSCAL_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
src := instr.Src() % RegistersCountFloat
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Src = instr.Src() % RegistersCountFloat
|
||||
ibc.Opcode = VM_FMUL_R
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
case 204, 205, 206, 207: //4
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Opcode = VM_FDIV_M
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
case 208, 209, 210, 211, 212, 213: //6
|
||||
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Opcode = VM_FSQRT_R
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
|
||||
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
|
||||
ibc.Opcode = VM_CBRANCH
|
||||
reg := instr.Dst() % RegistersCount
|
||||
ibc.isrc = &vm.reg.r[reg]
|
||||
ibc.target = int16(registerUsage[reg])
|
||||
ibc.Src = instr.Dst() % RegistersCount
|
||||
|
||||
target := uint16(int16(registerUsage[ibc.Src]))
|
||||
ibc.Dst = uint8(target)
|
||||
ibc.ImmB = uint8(target >> 8)
|
||||
|
||||
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
|
||||
//conditionmask := CONDITIONMASK << shift
|
||||
ibc.imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
|
||||
if CONDITIONOFFSET > 0 || shift > 0 {
|
||||
ibc.imm &= (^(uint64(1) << (shift - 1)))
|
||||
ibc.Imm &= (^(uint64(1) << (shift - 1)))
|
||||
}
|
||||
ibc.memMask = CONDITIONMASK << shift
|
||||
ibc.MemMask = CONDITIONMASK << shift
|
||||
|
||||
for j := 0; j < RegistersCount; j++ {
|
||||
registerUsage[j] = i
|
||||
|
@ -373,23 +327,20 @@ func (vm *VM) CompileToBytecode() {
|
|||
|
||||
case 239: //1
|
||||
ibc.Opcode = VM_CFROUND
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.imm = uint64(instr.IMM() & 63)
|
||||
ibc.Imm = uint64(instr.IMM() & 63)
|
||||
|
||||
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
|
||||
ibc.Opcode = VM_ISTORE
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
if (instr.Mod() >> 4) < STOREL3CONDITION {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
ibc.MemMask = ScratchpadL3Mask
|
||||
}
|
||||
|
||||
default:
|
||||
|
@ -400,19 +351,30 @@ func (vm *VM) CompileToBytecode() {
|
|||
|
||||
}
|
||||
|
||||
func (vm *VM) Load64(addr uint64) uint64 {
|
||||
return *(*uint64)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
type ScratchPad [ScratchpadSize]byte
|
||||
|
||||
func (pad *ScratchPad) Init(seed *[64]byte) {
|
||||
// calculate and fill scratchpad
|
||||
clear(pad[:])
|
||||
aes.FillAes1Rx4(seed, pad[:])
|
||||
}
|
||||
func (vm *VM) Load32(addr uint64) uint32 {
|
||||
return *(*uint32)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
func (pad *ScratchPad) Store64(addr uint32, val uint64) {
|
||||
*(*uint64)(unsafe.Pointer(&pad[addr])) = val
|
||||
//binary.LittleEndian.PutUint64(pad[addr:], val)
|
||||
}
|
||||
func (pad *ScratchPad) Load64(addr uint32) uint64 {
|
||||
return *(*uint64)(unsafe.Pointer(&pad[addr]))
|
||||
}
|
||||
func (pad *ScratchPad) Load32(addr uint32) uint32 {
|
||||
return *(*uint32)(unsafe.Pointer(&pad[addr]))
|
||||
}
|
||||
|
||||
func (vm *VM) Load32F(addr uint64) (lo, hi float64) {
|
||||
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
func (pad *ScratchPad) Load32F(addr uint32) (lo, hi float64) {
|
||||
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
|
||||
return float64(a[LOW]), float64(a[HIGH])
|
||||
}
|
||||
|
||||
func (vm *VM) Load32FA(addr uint64) [2]float64 {
|
||||
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
func (pad *ScratchPad) Load32FA(addr uint32) [2]float64 {
|
||||
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
|
||||
return [2]float64{float64(a[LOW]), float64(a[HIGH])}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue