Compare commits

...

2 commits

Author SHA1 Message Date
DataHoarder e4866b5bfd
Use direct register and scratchpad under bytecode execution
All checks were successful
continuous-integration/drone/push Build is passing
2024-04-15 02:14:01 +02:00
DataHoarder b72f79a653
Remove zero register from vm bytecode 2024-04-14 15:43:54 +02:00
6 changed files with 315 additions and 346 deletions

205
bytecode.go Normal file
View file

@ -0,0 +1,205 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"math"
"math/bits"
)
type ByteCodeInstruction struct {
dst, src byte
immB uint8
Opcode ByteCodeInstructionOp
memMask uint32
imm uint64
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
}
func (i ByteCodeInstruction) jumpTarget() int {
return int(int16((uint16(i.immB) << 8) | uint16(i.dst)))
}
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
return uint32(ptr+i.imm) & i.memMask
}
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 {
return uint32(i.imm) & i.memMask
}
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
func (c *ByteCode) Execute(f RegisterFile, pad *ScratchPad, eMask [2]uint64) RegisterFile {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
ibc := &c[pc]
switch ibc.Opcode {
case VM_IADD_RS:
f.r[ibc.dst] += (f.r[ibc.src] << ibc.immB) + ibc.imm
case VM_IADD_M:
f.r[ibc.dst] += pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_IADD_MZ:
f.r[ibc.dst] += pad.Load64(uint32(ibc.imm))
case VM_ISUB_R:
f.r[ibc.dst] -= f.r[ibc.src]
case VM_ISUB_I:
f.r[ibc.dst] -= ibc.imm
case VM_ISUB_M:
f.r[ibc.dst] -= pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_ISUB_MZ:
f.r[ibc.dst] -= pad.Load64(uint32(ibc.imm))
case VM_IMUL_R:
f.r[ibc.dst] *= f.r[ibc.src]
case VM_IMUL_I:
// also handles imul_rcp
f.r[ibc.dst] *= ibc.imm
case VM_IMUL_M:
f.r[ibc.dst] *= pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_IMUL_MZ:
f.r[ibc.dst] *= pad.Load64(uint32(ibc.imm))
case VM_IMULH_R:
f.r[ibc.dst], _ = bits.Mul64(f.r[ibc.dst], f.r[ibc.src])
case VM_IMULH_M:
f.r[ibc.dst], _ = bits.Mul64(f.r[ibc.dst], pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src])))
case VM_IMULH_MZ:
f.r[ibc.dst], _ = bits.Mul64(f.r[ibc.dst], pad.Load64(uint32(ibc.imm)))
case VM_ISMULH_R:
f.r[ibc.dst] = smulh(int64(f.r[ibc.dst]), int64(f.r[ibc.src]))
case VM_ISMULH_M:
f.r[ibc.dst] = smulh(int64(f.r[ibc.dst]), int64(pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))))
case VM_ISMULH_MZ:
f.r[ibc.dst] = smulh(int64(f.r[ibc.dst]), int64(pad.Load64(uint32(ibc.imm))))
case VM_INEG_R:
//f.r[ibc.dst] = (^(f.r[ibc.dst])) + 1 // 2's complement negative
f.r[ibc.dst] = -f.r[ibc.dst]
case VM_IXOR_R:
f.r[ibc.dst] ^= f.r[ibc.src]
case VM_IXOR_I:
f.r[ibc.dst] ^= ibc.imm
case VM_IXOR_M:
f.r[ibc.dst] ^= pad.Load64(ibc.getScratchpadAddress(f.r[ibc.src]))
case VM_IXOR_MZ:
f.r[ibc.dst] ^= pad.Load64(uint32(ibc.imm))
case VM_IROR_R:
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], 0-int(f.r[ibc.src]&63))
case VM_IROR_I:
//todo: can merge into VM_IROL_I
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], 0-int(ibc.imm&63))
case VM_IROL_R:
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], int(f.r[ibc.src]&63))
case VM_IROL_I:
f.r[ibc.dst] = bits.RotateLeft64(f.r[ibc.dst], int(ibc.imm&63))
case VM_ISWAP_R:
f.r[ibc.dst], f.r[ibc.src] = f.r[ibc.src], f.r[ibc.dst]
case VM_FSWAP_RF:
f.f[ibc.dst][HIGH], f.f[ibc.dst][LOW] = f.f[ibc.dst][LOW], f.f[ibc.dst][HIGH]
case VM_FSWAP_RE:
f.e[ibc.dst][HIGH], f.e[ibc.dst][LOW] = f.e[ibc.dst][LOW], f.e[ibc.dst][HIGH]
case VM_FADD_R:
f.f[ibc.dst][LOW] += f.a[ibc.src][LOW]
f.f[ibc.dst][HIGH] += f.a[ibc.src][HIGH]
case VM_FADD_M:
lo, hi := pad.Load32F(ibc.getScratchpadAddress(f.r[ibc.src]))
f.f[ibc.dst][LOW] += lo
f.f[ibc.dst][HIGH] += hi
case VM_FSUB_R:
f.f[ibc.dst][LOW] -= f.a[ibc.src][LOW]
f.f[ibc.dst][HIGH] -= f.a[ibc.src][HIGH]
case VM_FSUB_M:
lo, hi := pad.Load32F(ibc.getScratchpadAddress(f.r[ibc.src]))
f.f[ibc.dst][LOW] -= lo
f.f[ibc.dst][HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
f.f[ibc.dst][LOW] = math.Float64frombits(math.Float64bits(f.f[ibc.dst][LOW]) ^ 0x80F0000000000000)
f.f[ibc.dst][HIGH] = math.Float64frombits(math.Float64bits(f.f[ibc.dst][HIGH]) ^ 0x80F0000000000000)
case VM_FMUL_R:
f.e[ibc.dst][LOW] *= f.a[ibc.src][LOW]
f.e[ibc.dst][HIGH] *= f.a[ibc.src][HIGH]
case VM_FDIV_M:
lo, hi := pad.Load32F(ibc.getScratchpadAddress(f.r[ibc.src]))
f.e[ibc.dst][LOW] /= MaskRegisterExponentMantissa(lo, eMask[LOW])
f.e[ibc.dst][HIGH] /= MaskRegisterExponentMantissa(hi, eMask[HIGH])
case VM_FSQRT_R:
f.e[ibc.dst][LOW] = math.Sqrt(f.e[ibc.dst][LOW])
f.e[ibc.dst][HIGH] = math.Sqrt(f.e[ibc.dst][HIGH])
case VM_CBRANCH:
f.r[ibc.src] += ibc.imm
if (f.r[ibc.src] & uint64(ibc.memMask)) == 0 {
pc = ibc.jumpTarget()
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(f.r[ibc.src], 0-int(ibc.imm))) % 4 // rotate right
asm.SetRoundingMode(asm.RoundingMode(tmp))
case VM_ISTORE:
pad.Store64(ibc.getScratchpadAddress(f.r[ibc.dst]), f.r[ibc.src])
case VM_NOP: // we do nothing
}
}
return f
}
type ByteCodeInstructionOp int
const (
VM_NOP = ByteCodeInstructionOp(iota)
VM_IADD_RS
VM_IADD_M
VM_IADD_MZ
VM_ISUB_R
VM_ISUB_I
VM_ISUB_M
VM_ISUB_MZ
VM_IMUL_R
VM_IMUL_I
VM_IMUL_M
VM_IMUL_MZ
VM_IMULH_R
VM_IMULH_M
VM_IMULH_MZ
VM_ISMULH_R
VM_ISMULH_M
VM_ISMULH_MZ
VM_INEG_R
VM_IXOR_R
VM_IXOR_I
VM_IXOR_M
VM_IXOR_MZ
VM_IROR_R
VM_IROR_I
VM_IROL_R
VM_IROL_I
VM_ISWAP_R
VM_FSWAP_RF
VM_FSWAP_RE
VM_FADD_R
VM_FADD_M
VM_FSUB_R
VM_FSUB_M
VM_FSCAL_R
VM_FMUL_R
VM_FDIV_M
VM_FSQRT_R
VM_CBRANCH
VM_CFROUND
VM_ISTORE
)

View file

@ -87,12 +87,9 @@ const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
const STOREL3CONDITION = 14
const REGISTERSCOUNT = 8
const REGISTERCOUNTFLT = 4
const mantissaSize = 52
const exponentSize = 11
const mantissaMask = (uint64(1) << mantissaSize) - 1

View file

@ -1,3 +1,17 @@
package randomx
type RegisterLine [REGISTERSCOUNT]uint64
const RegistersCount = 8
const RegistersCountFloat = 4
type RegisterLine [RegistersCount]uint64
type RegisterFile struct {
r RegisterLine
f [RegistersCountFloat][2]float64
e [RegistersCountFloat][2]float64
a [RegistersCountFloat][2]float64
}
type MemoryRegisters struct {
mx, ma uint64
}

View file

@ -85,8 +85,8 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
for i := range p {
instr := &p[i]
dst := instr.Dst_Reg % REGISTERSCOUNT
src := instr.Src_Reg % REGISTERSCOUNT
dst := instr.Dst_Reg % RegistersCount
src := instr.Src_Reg % RegistersCount
switch instr.Opcode {
case S_ISUB_R:

46
vm.go
View file

@ -47,15 +47,15 @@ type VM struct {
StateStart [64]byte
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
Prog []byte
ScratchPad [ScratchpadSize]byte
ScratchPad ScratchPad
ByteCode [RANDOMX_PROGRAM_SIZE]InstructionByteCode
ByteCode ByteCode
// program configuration see program.hpp
entropy [16]uint64
reg REGISTER_FILE // the register file
reg RegisterFile // the register file
mem MemoryRegisters
config Config // configuration
datasetOffset uint64
@ -75,16 +75,6 @@ type Config struct {
readReg [4]uint64
}
type REGISTER_FILE struct {
r RegisterLine
f [4][2]float64
e [4][2]float64
a [4][2]float64
}
type MemoryRegisters struct {
mx, ma uint64
}
const LOW = 0
const HIGH = 1
@ -120,7 +110,7 @@ func (vm *VM) Run(input_hash [64]byte) {
vm.config.eMask[LOW] = getFloatMask(vm.entropy[14])
vm.config.eMask[HIGH] = getFloatMask(vm.entropy[15])
vm.Compile_TO_Bytecode()
vm.CompileToBytecode()
spAddr0 := vm.mem.mx
spAddr1 := vm.mem.ma
@ -135,23 +125,23 @@ func (vm *VM) Run(input_hash [64]byte) {
spAddr1 ^= spMix >> 32
spAddr1 &= ScratchpadL3Mask64
for i := uint64(0); i < REGISTERSCOUNT; i++ {
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
for i := uint64(0); i < RegistersCount; i++ {
vm.reg.r[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.f[i] = vm.Load32FA(spAddr1 + 8*i)
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.e[i] = vm.Load32FA(spAddr1 + 8*(i+REGISTERCOUNTFLT))
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.e[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
}
// todo: pass register file directly!
vm.InterpretByteCode()
vm.reg = vm.ByteCode.Execute(vm.reg, &vm.ScratchPad, vm.config.eMask)
vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]]
vm.mem.mx &= CacheLineAlignMask
@ -163,16 +153,16 @@ func (vm *VM) Run(input_hash [64]byte) {
// swap the elements
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
for i := uint64(0); i < REGISTERSCOUNT; i++ {
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr1+8*i:], vm.reg.r[i])
for i := uint64(0); i < RegistersCount; i++ {
vm.ScratchPad.Store64(uint32(spAddr1+8*i), vm.reg.r[i])
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
for i := uint64(0); i < RegistersCountFloat; i++ {
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i:], math.Float64bits(vm.reg.f[i][LOW]))
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i+8:], math.Float64bits(vm.reg.f[i][HIGH]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(vm.reg.f[i][LOW]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(vm.reg.f[i][HIGH]))
}
spAddr0 = 0
@ -183,9 +173,7 @@ func (vm *VM) Run(input_hash [64]byte) {
}
func (vm *VM) InitScratchpad(seed *[64]byte) {
// calculate and fill scratchpad
clear(vm.ScratchPad[:])
aes.FillAes1Rx4(seed, vm.ScratchPad[:])
vm.ScratchPad.Init(seed)
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {

View file

@ -30,17 +30,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"math"
"math/bits"
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"unsafe"
)
import "encoding/binary"
//reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding
var Zero uint64 = 0
// since go does not have union, use byte array
type VM_Instruction []byte // it is hardcode 8 bytes
@ -60,80 +56,11 @@ func (ins VM_Instruction) Opcode() byte {
return ins[0]
}
type VM_Instruction_Type int
const (
VM_IADD_RS VM_Instruction_Type = 0
VM_IADD_M VM_Instruction_Type = 1
VM_ISUB_R VM_Instruction_Type = 2
VM_ISUB_M VM_Instruction_Type = 3
VM_IMUL_R VM_Instruction_Type = 4
VM_IMUL_M VM_Instruction_Type = 5
VM_IMULH_R VM_Instruction_Type = 6
VM_IMULH_M VM_Instruction_Type = 7
VM_ISMULH_R VM_Instruction_Type = 8
VM_ISMULH_M VM_Instruction_Type = 9
VM_IMUL_RCP VM_Instruction_Type = 10
VM_INEG_R VM_Instruction_Type = 11
VM_IXOR_R VM_Instruction_Type = 12
VM_IXOR_M VM_Instruction_Type = 13
VM_IROR_R VM_Instruction_Type = 14
VM_IROL_R VM_Instruction_Type = 15
VM_ISWAP_R VM_Instruction_Type = 16
VM_FSWAP_R VM_Instruction_Type = 17
VM_FADD_R VM_Instruction_Type = 18
VM_FADD_M VM_Instruction_Type = 19
VM_FSUB_R VM_Instruction_Type = 20
VM_FSUB_M VM_Instruction_Type = 21
VM_FSCAL_R VM_Instruction_Type = 22
VM_FMUL_R VM_Instruction_Type = 23
VM_FDIV_M VM_Instruction_Type = 24
VM_FSQRT_R VM_Instruction_Type = 25
VM_CBRANCH VM_Instruction_Type = 26
VM_CFROUND VM_Instruction_Type = 27
VM_ISTORE VM_Instruction_Type = 28
VM_NOP VM_Instruction_Type = 29
)
var Names = map[VM_Instruction_Type]string{
VM_IADD_RS: "VM_IADD_RS",
VM_IADD_M: "VM_IADD_M",
VM_ISUB_R: "VM_ISUB_R",
VM_ISUB_M: "VM_ISUB_M",
VM_IMUL_R: "VM_IMUL_R",
VM_IMUL_M: "VM_IMUL_M",
VM_IMULH_R: "VM_IMULH_R",
VM_IMULH_M: "VM_IMULH_M",
VM_ISMULH_R: "VM_ISMULH_R",
VM_ISMULH_M: "VM_ISMULH_M",
VM_IMUL_RCP: "VM_IMUL_RCP",
VM_INEG_R: "VM_INEG_R",
VM_IXOR_R: "VM_IXOR_R",
VM_IXOR_M: "VM_IXOR_M",
VM_IROR_R: "VM_IROR_R",
VM_IROL_R: "VM_IROL_R",
VM_ISWAP_R: "VM_ISWAP_R",
VM_FSWAP_R: "VM_FSWAP_R",
VM_FADD_R: "VM_FADD_R",
VM_FADD_M: "VM_FADD_M",
VM_FSUB_R: "VM_FSUB_R",
VM_FSUB_M: "VM_FSUB_M",
VM_FSCAL_R: "VM_FSCAL_R",
VM_FMUL_R: "VM_FMUL_R",
VM_FDIV_M: "VM_FDIV_M",
VM_FSQRT_R: "VM_FSQRT_R",
VM_CBRANCH: "VM_CBRANCH",
VM_CFROUND: "VM_CFROUND",
VM_ISTORE: "VM_ISTORE",
VM_NOP: "VM_NOP",
}
// this will interpret single vm instruction
// CompileToBytecode this will interpret single vm instruction
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
func (vm *VM) Compile_TO_Bytecode() {
func (vm *VM) CompileToBytecode() {
var registerUsage [REGISTERSCOUNT]int
var registerUsage [RegistersCount]int
for i := range registerUsage {
registerUsage[i] = -1
}
@ -143,146 +70,126 @@ func (vm *VM) Compile_TO_Bytecode() {
ibc := &vm.ByteCode[i]
opcode := instr.Opcode()
dst := instr.Dst() % REGISTERSCOUNT // bit shift optimization
src := instr.Src() % REGISTERSCOUNT
dst := instr.Dst() % RegistersCount // bit shift optimization
src := instr.Src() % RegistersCount
ibc.dst = dst
ibc.src = src
switch opcode {
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
ibc.Opcode = VM_IADD_RS
ibc.idst = &vm.reg.r[dst]
if dst != RegisterNeedsDisplacement {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
//shift
ibc.immB = (instr.Mod() >> 2) % 4
ibc.imm = 0
} else {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
//shift
ibc.immB = (instr.Mod() >> 2) % 4
ibc.imm = signExtend2sCompl(instr.IMM())
}
registerUsage[dst] = i
case 16, 17, 18, 19, 20, 21, 22: // 7
ibc.Opcode = VM_IADD_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.Opcode = VM_IADD_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
ibc.Opcode = VM_ISUB_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_ISUB_I
}
registerUsage[dst] = i
case 39, 40, 41, 42, 43, 44, 45: // 7
ibc.Opcode = VM_ISUB_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.Opcode = VM_ISUB_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IMUL_I
}
registerUsage[dst] = i
case 62, 63, 64, 65: //4
ibc.Opcode = VM_IMUL_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.Opcode = VM_IMUL_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 66, 67, 68, 69: //4
ibc.Opcode = VM_IMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 70: //1
ibc.Opcode = VM_IMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.Opcode = VM_IMULH_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 71, 72, 73, 74: //4
ibc.Opcode = VM_ISMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 75: //1
ibc.Opcode = VM_ISMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.Opcode = VM_ISMULH_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
divisor := instr.IMM()
if !isZeroOrPowerOf2(divisor) {
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
ibc.Opcode = VM_IMUL_I
ibc.imm = randomx_reciprocal(divisor)
ibc.isrc = &ibc.imm
registerUsage[dst] = i
} else {
ibc.Opcode = VM_NOP
@ -290,66 +197,49 @@ func (vm *VM) Compile_TO_Bytecode() {
case 84, 85: //2
ibc.Opcode = VM_INEG_R
ibc.idst = &vm.reg.r[dst]
registerUsage[dst] = i
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
ibc.Opcode = VM_IXOR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IXOR_I
}
registerUsage[dst] = i
case 101, 102, 103, 104, 105: //5
ibc.Opcode = VM_IXOR_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.Opcode = VM_IXOR_MZ
ibc.memMask = ScratchpadL3Mask
ibc.imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 106, 107, 108, 109, 110, 111, 112, 113: //8
ibc.Opcode = VM_IROR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IROR_I
}
registerUsage[dst] = i
case 114, 115: // 2 IROL_R
ibc.Opcode = VM_IROL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
if src == dst {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
ibc.Opcode = VM_IROL_I
}
registerUsage[dst] = i
case 116, 117, 118, 119: //4
if src != dst {
ibc.Opcode = VM_ISWAP_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
registerUsage[src] = i
} else {
@ -359,24 +249,21 @@ func (vm *VM) Compile_TO_Bytecode() {
// below are floating point instructions
case 120, 121, 122, 123: // 4
ibc.Opcode = VM_FSWAP_R
if dst < REGISTERCOUNTFLT {
ibc.fdst = &vm.reg.f[dst]
//ibc.Opcode = VM_FSWAP_R
if dst < RegistersCountFloat {
ibc.Opcode = VM_FSWAP_RF
} else {
ibc.fdst = &vm.reg.e[dst-REGISTERCOUNTFLT]
ibc.Opcode = VM_FSWAP_RE
ibc.dst = dst - RegistersCountFloat
}
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FADD_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 140, 141, 142, 143, 144: //5
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FADD_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -385,16 +272,12 @@ func (vm *VM) Compile_TO_Bytecode() {
ibc.imm = signExtend2sCompl(instr.IMM())
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FSUB_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 161, 162, 163, 164, 165: //5
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSUB_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -403,20 +286,15 @@ func (vm *VM) Compile_TO_Bytecode() {
ibc.imm = signExtend2sCompl(instr.IMM())
case 166, 167, 168, 169, 170, 171: //6
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSCAL_R
ibc.fdst = &vm.reg.f[dst]
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FMUL_R
ibc.fdst = &vm.reg.e[dst]
ibc.fsrc = &vm.reg.a[src]
case 204, 205, 206, 207: //4
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FDIV_M
ibc.fdst = &vm.reg.e[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
@ -424,15 +302,17 @@ func (vm *VM) Compile_TO_Bytecode() {
}
ibc.imm = signExtend2sCompl(instr.IMM())
case 208, 209, 210, 211, 212, 213: //6
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSQRT_R
ibc.fdst = &vm.reg.e[dst]
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
ibc.Opcode = VM_CBRANCH
reg := instr.Dst() % REGISTERSCOUNT
ibc.isrc = &vm.reg.r[reg]
ibc.target = int16(registerUsage[reg])
ibc.src = instr.Dst() % RegistersCount
target := uint16(int16(registerUsage[ibc.src]))
ibc.dst = uint8(target)
ibc.immB = uint8(target >> 8)
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
//conditionmask := CONDITIONMASK << shift
ibc.imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
@ -441,19 +321,16 @@ func (vm *VM) Compile_TO_Bytecode() {
}
ibc.memMask = CONDITIONMASK << shift
for j := 0; j < REGISTERSCOUNT; j++ {
for j := 0; j < RegistersCount; j++ {
registerUsage[j] = i
}
case 239: //1
ibc.Opcode = VM_CFROUND
ibc.isrc = &vm.reg.r[src]
ibc.imm = uint64(instr.IMM() & 63)
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
ibc.Opcode = VM_ISTORE
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
ibc.imm = signExtend2sCompl(instr.IMM())
if (instr.Mod() >> 4) < STOREL3CONDITION {
if (instr.Mod() % 4) != 0 {
@ -474,142 +351,30 @@ func (vm *VM) Compile_TO_Bytecode() {
}
type InstructionByteCode struct {
dst, src byte
idst, isrc *uint64
fdst, fsrc *[2]float64
imm uint64
simm int64
Opcode VM_Instruction_Type
target int16
shift uint8
memMask uint32
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
type ScratchPad [ScratchpadSize]byte
func (pad *ScratchPad) Init(seed *[64]byte) {
// calculate and fill scratchpad
clear(pad[:])
aes.FillAes1Rx4(seed, pad[:])
}
func (pad *ScratchPad) Store64(addr uint32, val uint64) {
*(*uint64)(unsafe.Pointer(&pad[addr])) = val
//binary.LittleEndian.PutUint64(pad[addr:], val)
}
func (pad *ScratchPad) Load64(addr uint32) uint64 {
return *(*uint64)(unsafe.Pointer(&pad[addr]))
}
func (pad *ScratchPad) Load32(addr uint32) uint32 {
return *(*uint32)(unsafe.Pointer(&pad[addr]))
}
func (ibc *InstructionByteCode) getScratchpadAddress() uint64 {
return (*ibc.isrc + ibc.imm) & uint64(ibc.memMask)
}
func (ibc *InstructionByteCode) getScratchpadDestAddress() uint64 {
return (*ibc.idst + ibc.imm) & uint64(ibc.memMask)
}
func (vm *VM) Load64(addr uint64) uint64 {
return *(*uint64)(unsafe.Pointer(&vm.ScratchPad[addr]))
}
func (vm *VM) Load32(addr uint64) uint32 {
return *(*uint32)(unsafe.Pointer(&vm.ScratchPad[addr]))
}
func (vm *VM) Load32F(addr uint64) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Load32F(addr uint32) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return float64(a[LOW]), float64(a[HIGH])
}
func (vm *VM) Load32FA(addr uint64) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
func (pad *ScratchPad) Load32FA(addr uint32) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return [2]float64{float64(a[LOW]), float64(a[HIGH])}
}
func (vm *VM) InterpretByteCode() {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
ibc := &vm.ByteCode[pc]
switch ibc.Opcode {
case VM_IADD_RS:
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
case VM_IADD_M:
*ibc.idst += vm.Load64(ibc.getScratchpadAddress())
case VM_ISUB_R:
*ibc.idst -= *ibc.isrc
case VM_ISUB_M:
*ibc.idst -= vm.Load64(ibc.getScratchpadAddress())
case VM_IMUL_R:
// also handles imul_rcp
*ibc.idst *= *ibc.isrc
case VM_IMUL_M:
*ibc.idst *= vm.Load64(ibc.getScratchpadAddress())
case VM_IMULH_R:
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
case VM_IMULH_M:
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadAddress()))
case VM_ISMULH_R:
*ibc.idst = smulh(int64(*ibc.idst), int64(*ibc.isrc))
case VM_ISMULH_M:
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadAddress())))
case VM_INEG_R:
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
case VM_IXOR_R:
*ibc.idst ^= *ibc.isrc
case VM_IXOR_M:
*ibc.idst ^= vm.Load64(ibc.getScratchpadAddress())
case VM_IROR_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
case VM_IROL_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
case VM_ISWAP_R:
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
case VM_FSWAP_R:
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
case VM_FADD_R:
ibc.fdst[LOW] += ibc.fsrc[LOW]
ibc.fdst[HIGH] += ibc.fsrc[HIGH]
case VM_FADD_M:
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
ibc.fdst[LOW] += lo
ibc.fdst[HIGH] += hi
case VM_FSUB_R:
ibc.fdst[LOW] -= ibc.fsrc[LOW]
ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
case VM_FSUB_M:
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
ibc.fdst[LOW] -= lo
ibc.fdst[HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
case VM_FMUL_R:
ibc.fdst[LOW] *= ibc.fsrc[LOW]
ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
case VM_FDIV_M:
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
ibc.fdst[LOW] /= MaskRegisterExponentMantissa(lo, vm.config.eMask[LOW])
ibc.fdst[HIGH] /= MaskRegisterExponentMantissa(hi, vm.config.eMask[HIGH])
case VM_FSQRT_R:
ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
case VM_CBRANCH:
*ibc.isrc += ibc.imm
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
pc = int(ibc.target)
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
asm.SetRoundingMode(asm.RoundingMode(tmp))
case VM_ISTORE:
binary.LittleEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], *ibc.isrc)
case VM_NOP: // we do nothing
}
}
}