Use direct register and scratchpad under bytecode execution
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
DataHoarder 2024-04-15 02:00:44 +02:00
parent b72f79a653
commit e4866b5bfd
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
4 changed files with 155 additions and 180 deletions

View file

@ -1,22 +1,17 @@
package randomx
import (
"encoding/binary"
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"math"
"math/bits"
)
type ByteCodeInstruction struct {
dst, src byte
idst, isrc *uint64
fdst, fsrc *[2]float64
imm uint64
simm int64
Opcode ByteCodeInstructionOp
target int16
shift uint8
memMask uint32
dst, src byte
immB uint8
Opcode ByteCodeInstructionOp
memMask uint32
imm uint64
/*
union {
int_reg_t* idst;
@ -40,112 +35,127 @@ type ByteCodeInstruction struct {
}
func (i ByteCodeInstruction) getScratchpadSrcAddress() uint64 {
return (*i.isrc + i.imm) & uint64(i.memMask)
func (i ByteCodeInstruction) jumpTarget() int {
return int(int16((uint16(i.immB) << 8) | uint16(i.dst)))
}
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint64 {
return i.imm & uint64(i.memMask)
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
return uint32(ptr+i.imm) & i.memMask
}
func (i ByteCodeInstruction) getScratchpadDestAddress() uint64 {
return (*i.idst + i.imm) & uint64(i.memMask)
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 {
return uint32(i.imm) & i.memMask
}
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
func (c *ByteCode) Interpret(vm *VM) {
func (c *ByteCode) Execute(f RegisterFile, pad *ScratchPad, eMask [2]uint64) RegisterFile {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
ibc := c[pc]
ibc := &c[pc]
switch ibc.Opcode {
case VM_IADD_RS:
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
f.r[ibc.dst] += (f.r[ibc.src] << ibc.immB) + ibc.imm
case VM_IADD_M:
*ibc.idst += vm.Load64(ibc.getScratchpadSrcAddress())
f.r[ibc.dst] += pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_IADD_MZ:
*ibc.idst += vm.Load64(ibc.getScratchpadZeroAddress())
f.r[ibc.dst] += pad.Load64(uint32(ibc.imm))
case VM_ISUB_R:
*ibc.idst -= *ibc.isrc
f.r[ibc.dst] -= f.r[ibc.src]
case VM_ISUB_I:
f.r[ibc.dst] -= ibc.imm
case VM_ISUB_M:
*ibc.idst -= vm.Load64(ibc.getScratchpadSrcAddress())
f.r[ibc.dst] -= pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_ISUB_MZ:
*ibc.idst -= vm.Load64(ibc.getScratchpadZeroAddress())
f.r[ibc.dst] -= pad.Load64(uint32(ibc.imm))
case VM_IMUL_R:
f.r[ibc.dst] *= f.r[ibc.src]
case VM_IMUL_I:
// also handles imul_rcp
*ibc.idst *= *ibc.isrc
f.r[ibc.dst] *= ibc.imm
case VM_IMUL_M:
*ibc.idst *= vm.Load64(ibc.getScratchpadSrcAddress())
f.r[ibc.dst] *= pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_IMUL_MZ:
*ibc.idst *= vm.Load64(ibc.getScratchpadZeroAddress())
f.r[ibc.dst] *= pad.Load64(uint32(ibc.imm))
case VM_IMULH_R:
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
f.r[ibc.dst], _ = bits.Mul64(f.r[ibc.dst], f.r[ibc.src])
case VM_IMULH_M:
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadSrcAddress()))
f.r[ibc.dst], _ = bits.Mul64(f.r[ibc.dst], pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src])))
case VM_IMULH_MZ:
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadZeroAddress()))
f.r[ibc.dst], _ = bits.Mul64(f.r[ibc.dst], pad.Load64(uint32(ibc.imm)))
case VM_ISMULH_R:
*ibc.idst = smulh(int64(*ibc.idst), int64(*ibc.isrc))
f.r[ibc.dst] = smulh(int64(f.r[ibc.dst]), int64(f.r[ibc.src]))
case VM_ISMULH_M:
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadSrcAddress())))
f.r[ibc.dst] = smulh(int64(f.r[ibc.dst]), int64(pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))))
case VM_ISMULH_MZ:
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadZeroAddress())))
f.r[ibc.dst] = smulh(int64(f.r[ibc.dst]), int64(pad.Load64(uint32(ibc.imm))))
case VM_INEG_R:
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
//f.r[ibc.dst] = (^(f.r[ibc.dst])) + 1 // 2's complement negative
f.r[ibc.dst] = -f.r[ibc.dst]
case VM_IXOR_R:
*ibc.idst ^= *ibc.isrc
f.r[ibc.dst] ^= f.r[ibc.src]
case VM_IXOR_I:
f.r[ibc.dst] ^= ibc.imm
case VM_IXOR_M:
*ibc.idst ^= vm.Load64(ibc.getScratchpadSrcAddress())
f.r[ibc.dst] ^= pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_IXOR_MZ:
*ibc.idst ^= vm.Load64(ibc.getScratchpadZeroAddress())
f.r[ibc.dst] ^= pad.Load64(uint32(ibc.imm))
case VM_IROR_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], 0-int(f.r[ibc.src]&63))
case VM_IROR_I:
//todo: can merge into VM_IROL_I
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], 0-int(ibc.imm&63))
case VM_IROL_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], int(f.r[ibc.src]&63))
case VM_IROL_I:
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], int(ibc.imm&63))
case VM_ISWAP_R:
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
case VM_FSWAP_R:
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
f.r[ibc.dst], f.r[ibc.src] = f.r[ibc.src], f.r[ibc.dst]
case VM_FSWAP_RF:
f.f[ibc.dst][HIGH], f.f[ibc.dst][LOW] = f.f[ibc.dst][LOW], f.f[ibc.dst][HIGH]
case VM_FSWAP_RE:
f.e[ibc.dst][HIGH], f.e[ibc.dst][LOW] = f.e[ibc.dst][LOW], f.e[ibc.dst][HIGH]
case VM_FADD_R:
ibc.fdst[LOW] += ibc.fsrc[LOW]
ibc.fdst[HIGH] += ibc.fsrc[HIGH]
f.f[ibc.dst][LOW] += f.a[ibc.src][LOW]
f.f[ibc.dst][HIGH] += f.a[ibc.src][HIGH]
case VM_FADD_M:
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
ibc.fdst[LOW] += lo
ibc.fdst[HIGH] += hi
lo, hi := pad.Load32F(ibc.getScratchpadAddress(f.r[ibc.src]))
f.f[ibc.dst][LOW] += lo
f.f[ibc.dst][HIGH] += hi
case VM_FSUB_R:
ibc.fdst[LOW] -= ibc.fsrc[LOW]
ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
f.f[ibc.dst][LOW] -= f.a[ibc.src][LOW]
f.f[ibc.dst][HIGH] -= f.a[ibc.src][HIGH]
case VM_FSUB_M:
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
ibc.fdst[LOW] -= lo
ibc.fdst[HIGH] -= hi
lo, hi := pad.Load32F(ibc.getScratchpadAddress(f.r[ibc.src]))
f.f[ibc.dst][LOW] -= lo
f.f[ibc.dst][HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
f.f[ibc.dst][LOW] = math.Float64frombits(math.Float64bits(f.f[ibc.dst][LOW]) ^ 0x80F0000000000000)
f.f[ibc.dst][HIGH] = math.Float64frombits(math.Float64bits(f.f[ibc.dst][HIGH]) ^ 0x80F0000000000000)
case VM_FMUL_R:
ibc.fdst[LOW] *= ibc.fsrc[LOW]
ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
f.e[ibc.dst][LOW] *= f.a[ibc.src][LOW]
f.e[ibc.dst][HIGH] *= f.a[ibc.src][HIGH]
case VM_FDIV_M:
lo, hi := vm.Load32F(ibc.getScratchpadSrcAddress())
ibc.fdst[LOW] /= MaskRegisterExponentMantissa(lo, vm.config.eMask[LOW])
ibc.fdst[HIGH] /= MaskRegisterExponentMantissa(hi, vm.config.eMask[HIGH])
lo, hi := pad.Load32F(ibc.getScratchpadAddress(f.r[ibc.src]))
f.e[ibc.dst][LOW] /= MaskRegisterExponentMantissa(lo, eMask[LOW])
f.e[ibc.dst][HIGH] /= MaskRegisterExponentMantissa(hi, eMask[HIGH])
case VM_FSQRT_R:
ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
f.e[ibc.dst][LOW] = math.Sqrt(f.e[ibc.dst][LOW])
f.e[ibc.dst][HIGH] = math.Sqrt(f.e[ibc.dst][HIGH])
case VM_CBRANCH:
*ibc.isrc += ibc.imm
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
pc = int(ibc.target)
f.r[ibc.src] += ibc.imm
if (f.r[ibc.src] & uint64(ibc.memMask)) == 0 {
pc = ibc.jumpTarget()
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
tmp := (bits.RotateLeft64(f.r[ibc.src], 0-int(ibc.imm))) % 4 // rotate right
asm.SetRoundingMode(asm.RoundingMode(tmp))
case VM_ISTORE:
binary.LittleEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], *ibc.isrc)
pad.Store64(ibc.getScratchpadAddress(f.r[ibc.dst]), f.r[ibc.src])
case VM_NOP: // we do nothing
}
}
return f
}
type ByteCodeInstructionOp int
@ -156,9 +166,11 @@ const (
VM_IADD_M
VM_IADD_MZ
VM_ISUB_R
VM_ISUB_I
VM_ISUB_M
VM_ISUB_MZ
VM_IMUL_R
VM_IMUL_I
VM_IMUL_M
VM_IMUL_MZ
VM_IMULH_R
@ -167,15 +179,18 @@ const (
VM_ISMULH_R
VM_ISMULH_M
VM_ISMULH_MZ
VM_IMUL_RCP
VM_INEG_R
VM_IXOR_R
VM_IXOR_I
VM_IXOR_M
VM_IXOR_MZ
VM_IROR_R
VM_IROR_I
VM_IROL_R
VM_IROL_I
VM_ISWAP_R
VM_FSWAP_R
VM_FSWAP_RF
VM_FSWAP_RE
VM_FADD_R
VM_FADD_M
VM_FSUB_R

View file

@ -87,7 +87,7 @@ const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
const STOREL3CONDITION = 14
const mantissaSize = 52

20
vm.go
View file

@ -47,7 +47,7 @@ type VM struct {
StateStart [64]byte
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
Prog []byte
ScratchPad [ScratchpadSize]byte
ScratchPad ScratchPad
ByteCode ByteCode
@ -126,22 +126,22 @@ func (vm *VM) Run(input_hash [64]byte) {
spAddr1 &= ScratchpadL3Mask64
for i := uint64(0); i < RegistersCount; i++ {
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
vm.reg.r[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i] = vm.Load32FA(spAddr1 + 8*i)
vm.reg.f[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.e[i] = vm.Load32FA(spAddr1 + 8*(i+RegistersCountFloat))
vm.reg.e[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
}
// todo: pass register file directly!
vm.ByteCode.Interpret(vm)
vm.reg = vm.ByteCode.Execute(vm.reg, &vm.ScratchPad, vm.config.eMask)
vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]]
vm.mem.mx &= CacheLineAlignMask
@ -154,15 +154,15 @@ func (vm *VM) Run(input_hash [64]byte) {
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
for i := uint64(0); i < RegistersCount; i++ {
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr1+8*i:], vm.reg.r[i])
vm.ScratchPad.Store64(uint32(spAddr1+8*i), vm.reg.r[i])
}
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i:], math.Float64bits(vm.reg.f[i][LOW]))
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i+8:], math.Float64bits(vm.reg.f[i][HIGH]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(vm.reg.f[i][LOW]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(vm.reg.f[i][HIGH]))
}
spAddr0 = 0
@ -173,9 +173,7 @@ func (vm *VM) Run(input_hash [64]byte) {
}
func (vm *VM) InitScratchpad(seed *[64]byte) {
// calculate and fill scratchpad
clear(vm.ScratchPad[:])
aes.FillAes1Rx4(seed, vm.ScratchPad[:])
vm.ScratchPad.Init(seed)
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {

View file

@ -30,6 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"unsafe"
)
import "encoding/binary"
@ -76,24 +77,21 @@ func (vm *VM) CompileToBytecode() {
switch opcode {
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
ibc.Opcode = VM_IADD_RS
ibc.idst = &vm.reg.r[dst]
if dst != RegisterNeedsDisplacement {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
//shift
ibc.immB = (instr.Mod() >> 2) % 4
ibc.imm = 0
} else {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
//shift
ibc.immB = (instr.Mod() >> 2) % 4
ibc.imm = signExtend2sCompl(instr.IMM())
}
registerUsage[dst] = i
case 16, 17, 18, 19, 20, 21, 22: // 7
ibc.Opcode = VM_IADD_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -102,26 +100,21 @@ func (vm *VM) CompileToBytecode() {
} else {
ibc.Opcode = VM_IADD_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
ibc.Opcode = VM_ISUB_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_ISUB_I
}
registerUsage[dst] = i
case 39, 40, 41, 42, 43, 44, 45: // 7
ibc.Opcode = VM_ISUB_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -130,26 +123,21 @@ func (vm *VM) CompileToBytecode() {
} else {
ibc.Opcode = VM_ISUB_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IMUL_I
}
registerUsage[dst] = i
case 62, 63, 64, 65: //4
ibc.Opcode = VM_IMUL_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -158,19 +146,16 @@ func (vm *VM) CompileToBytecode() {
} else {
ibc.Opcode = VM_IMUL_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 66, 67, 68, 69: //4
ibc.Opcode = VM_IMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 70: //1
ibc.Opcode = VM_IMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -179,19 +164,16 @@ func (vm *VM) CompileToBytecode() {
} else {
ibc.Opcode = VM_IMULH_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 71, 72, 73, 74: //4
ibc.Opcode = VM_ISMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 75: //1
ibc.Opcode = VM_ISMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -200,15 +182,14 @@ func (vm *VM) CompileToBytecode() {
} else {
ibc.Opcode = VM_ISMULH_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
divisor := instr.IMM()
if !isZeroOrPowerOf2(divisor) {
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
ibc.Opcode = VM_IMUL_I
ibc.imm = randomx_reciprocal(divisor)
ibc.isrc = &ibc.imm
registerUsage[dst] = i
} else {
ibc.Opcode = VM_NOP
@ -216,26 +197,19 @@ func (vm *VM) CompileToBytecode() {
case 84, 85: //2
ibc.Opcode = VM_INEG_R
ibc.idst = &vm.reg.r[dst]
registerUsage[dst] = i
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
ibc.Opcode = VM_IXOR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IXOR_I
}
registerUsage[dst] = i
case 101, 102, 103, 104, 105: //5
ibc.Opcode = VM_IXOR_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -244,38 +218,28 @@ func (vm *VM) CompileToBytecode() {
} else {
ibc.Opcode = VM_IXOR_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 106, 107, 108, 109, 110, 111, 112, 113: //8
ibc.Opcode = VM_IROR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IROR_I
}
registerUsage[dst] = i
case 114, 115: // 2 IROL_R
ibc.Opcode = VM_IROL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IROL_I
}
registerUsage[dst] = i
case 116, 117, 118, 119: //4
if src != dst {
ibc.Opcode = VM_ISWAP_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
registerUsage[src] = i
} else {
@ -285,24 +249,21 @@ func (vm *VM) CompileToBytecode() {
// below are floating point instructions
case 120, 121, 122, 123: // 4
ibc.Opcode = VM_FSWAP_R
//ibc.Opcode = VM_FSWAP_R
if dst < RegistersCountFloat {
ibc.fdst = &vm.reg.f[dst]
ibc.Opcode = VM_FSWAP_RF
} else {
ibc.fdst = &vm.reg.e[dst-RegistersCountFloat]
ibc.Opcode = VM_FSWAP_RE
ibc.dst = dst - RegistersCountFloat
}
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
src := instr.Src() % RegistersCountFloat
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FADD_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 140, 141, 142, 143, 144: //5
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FADD_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -311,16 +272,12 @@ func (vm *VM) CompileToBytecode() {
ibc.imm = signExtend2sCompl(instr.IMM())
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
src := instr.Src() % RegistersCountFloat
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FSUB_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 161, 162, 163, 164, 165: //5
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSUB_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -329,20 +286,15 @@ func (vm *VM) CompileToBytecode() {
ibc.imm = signExtend2sCompl(instr.IMM())
case 166, 167, 168, 169, 170, 171: //6
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSCAL_R
ibc.fdst = &vm.reg.f[dst]
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
src := instr.Src() % RegistersCountFloat
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FMUL_R
ibc.fdst = &vm.reg.e[dst]
ibc.fsrc = &vm.reg.a[src]
case 204, 205, 206, 207: //4
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FDIV_M
ibc.fdst = &vm.reg.e[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -350,15 +302,17 @@ func (vm *VM) CompileToBytecode() {
}
ibc.imm = signExtend2sCompl(instr.IMM())
case 208, 209, 210, 211, 212, 213: //6
dst := instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSQRT_R
ibc.fdst = &vm.reg.e[dst]
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
ibc.Opcode = VM_CBRANCH
reg := instr.Dst() % RegistersCount
ibc.isrc = &vm.reg.r[reg]
ibc.target = int16(registerUsage[reg])
ibc.src = instr.Dst() % RegistersCount
target := uint16(int16(registerUsage[ibc.src]))
ibc.dst = uint8(target)
ibc.immB = uint8(target >> 8)
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
//conditionmask := CONDITIONMASK << shift
ibc.imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
@ -373,13 +327,10 @@ func (vm *VM) CompileToBytecode() {
case 239: //1
ibc.Opcode = VM_CFROUND
ibc.isrc = &vm.reg.r[src]
ibc.imm = uint64(instr.IMM() & 63)
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
ibc.Opcode = VM_ISTORE
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
ibc.imm = signExtend2sCompl(instr.IMM())
if (instr.Mod() >> 4) < STOREL3CONDITION {
if (instr.Mod() % 4) != 0 {
@ -400,19 +351,30 @@ func (vm *VM) CompileToBytecode() {
}
func (vm *VM) Load64(addr uint64) uint64 {
return *(*uint64)(unsafe.Pointer(&vm.ScratchPad[addr]))
type ScratchPad [ScratchpadSize]byte
func (pad *ScratchPad) Init(seed *[64]byte) {
// calculate and fill scratchpad
clear(pad[:])
aes.FillAes1Rx4(seed, pad[:])
}
func (vm *VM) Load32(addr uint64) uint32 {
return *(*uint32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Store64(addr uint32, val uint64) {
*(*uint64)(unsafe.Pointer(&pad[addr])) = val
//binary.LittleEndian.PutUint64(pad[addr:], val)
}
func (pad *ScratchPad) Load64(addr uint32) uint64 {
return *(*uint64)(unsafe.Pointer(&pad[addr]))
}
func (pad *ScratchPad) Load32(addr uint32) uint32 {
return *(*uint32)(unsafe.Pointer(&pad[addr]))
}
func (vm *VM) Load32F(addr uint64) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Load32F(addr uint32) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return float64(a[LOW]), float64(a[HIGH])
}
func (vm *VM) Load32FA(addr uint64) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Load32FA(addr uint32) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return [2]float64{float64(a[LOW]), float64(a[HIGH])}
}