Compare commits

...

4 commits

Author SHA1 Message Date
DataHoarder 80f473de54
General cleanup of jit / VM / mmap usage
Some checks failed
continuous-integration/drone/push Build is failing
continuous-integration/drone/tag Build is failing
2024-04-18 07:57:15 +02:00
DataHoarder fe253fb825
cleanup vm_instruction IMM with sign extension 2024-04-18 07:11:51 +02:00
DataHoarder 699ce02f2d
hash register file memory at once instead on loop calls 2024-04-17 09:53:24 +02:00
DataHoarder b35751462b
hack: reserve stack on JIT call 2024-04-17 09:40:54 +02:00
11 changed files with 295 additions and 184 deletions

View file

@ -84,12 +84,12 @@ func (cache *Randomx_Cache) Init(key []byte) {
}
const Mask = CacheSize/CacheLineSize - 1
// GetMixBlock fetch a 64 byte block in uint64 form
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
mask := CacheSize/CacheLineSize - 1
addr = (addr & mask) * CacheLineSize
addr = (addr & Mask) * CacheLineSize
block := addr / 1024
return cache.Blocks[block].GetLine(addr % 1024)
@ -138,7 +138,7 @@ func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(registerValue)
cache.JitPrograms[i].Execute(rl)
cache.JitPrograms[i].Execute(uintptr(unsafe.Pointer(rl)))
for q := range rl {
rl[q] ^= mix[q]

View file

@ -4,17 +4,28 @@ package randomx
import (
"golang.org/x/sys/unix"
"runtime"
"unsafe"
)
func (f ProgramFunc) Execute(rl *RegisterLine) {
func (f ProgramFunc) Execute(v uintptr) {
if f == nil {
panic("program is nil")
}
memoryPtr := &f
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
fun(rl)
var reservedStackHack [8 * 8]byte
for i := range reservedStackHack {
reservedStackHack[i] = uint8(i)
}
memoryPtr := &f
fun := *(*func(v uintptr))(unsafe.Pointer(&memoryPtr))
fun(v)
for i := range reservedStackHack {
reservedStackHack[i] = uint8(-i)
}
runtime.KeepAlive(reservedStackHack)
}
func (f ProgramFunc) Close() error {

206
jit_amd64.go Normal file
View file

@ -0,0 +1,206 @@
//go:build unix && amd64 && !disable_jit && !purego
package randomx
/*
REGISTER ALLOCATION:
; rax -> temporary
; rbx -> iteration counter "ic"
; rcx -> temporary
; rdx -> temporary
; rsi -> scratchpad pointer
; rdi -> dataset pointer
; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits)
; rsp -> stack pointer
; r8 -> "r0"
; r9 -> "r1"
; r10 -> "r2"
; r11 -> "r3"
; r12 -> "r4"
; r13 -> "r5"
; r14 -> "r6"
; r15 -> "r7"
; xmm0 -> "f0"
; xmm1 -> "f1"
; xmm2 -> "f2"
; xmm3 -> "f3"
; xmm4 -> "e0"
; xmm5 -> "e1"
; xmm6 -> "e2"
; xmm7 -> "e3"
; xmm8 -> "a0"
; xmm9 -> "a1"
; xmm10 -> "a2"
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
*/
const MaxRandomXInstrCodeSize = 32 //FDIV_M requires up to 32 bytes of x86 code
const MaxSuperscalarInstrSize = 14 //IMUL_RCP requires 14 bytes of x86 code
const SuperscalarProgramHeader = 128 //overhead per superscalar program
const CodeAlign = 4096 //align code size to a multiple of 4 KiB
const ReserveCodeSize = CodeAlign //function prologue/epilogue + reserve
func alignSize[T ~uintptr | ~uint32 | ~uint64 | ~int64 | ~int32 | ~int](pos, align T) T {
return ((pos-1)/align + 1) * align
}
var RandomXCodeSize = alignSize[uint64](ReserveCodeSize+MaxRandomXInstrCodeSize*RANDOMX_PROGRAM_SIZE, CodeAlign)
var SuperscalarSize = alignSize[uint64](ReserveCodeSize+(SuperscalarProgramHeader+MaxSuperscalarInstrSize*SuperscalarMaxSize)*RANDOMX_CACHE_ACCESSES, CodeAlign)
var CodeSize = uint32(RandomXCodeSize + SuperscalarSize)
var superScalarHashOffset = int32(RandomXCodeSize)
var REX_ADD_RR = []byte{0x4d, 0x03}
var REX_ADD_RM = []byte{0x4c, 0x03}
var REX_SUB_RR = []byte{0x4d, 0x2b}
var REX_SUB_RM = []byte{0x4c, 0x2b}
var REX_MOV_RR = []byte{0x41, 0x8b}
var REX_MOV_RR64 = []byte{0x49, 0x8b}
var REX_MOV_R64R = []byte{0x4c, 0x8b}
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
var REX_IMUL_RRI = []byte{0x4d, 0x69}
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
var REX_MUL_R = []byte{0x49, 0xf7}
var REX_MUL_M = []byte{0x48, 0xf7}
var REX_81 = []byte{0x49, 0x81}
var AND_EAX_I byte = 0x25
var MOV_EAX_I byte = 0xb8
var MOV_RAX_I = []byte{0x48, 0xb8}
var MOV_RCX_I = []byte{0x48, 0xb9}
var REX_LEA = []byte{0x4f, 0x8d}
var REX_MUL_MEM = []byte{0x48, 0xf7, 0x24, 0x0e}
var REX_IMUL_MEM = []byte{0x48, 0xf7, 0x2c, 0x0e}
var REX_SHR_RAX = []byte{0x48, 0xc1, 0xe8}
var RAX_ADD_SBB_1 = []byte{0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00}
var MUL_RCX = []byte{0x48, 0xf7, 0xe1}
var REX_SHR_RDX = []byte{0x48, 0xc1, 0xea}
var REX_SH = []byte{0x49, 0xc1}
var MOV_RCX_RAX_SAR_RCX_63 = []byte{0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f}
var AND_ECX_I = []byte{0x81, 0xe1}
var ADD_RAX_RCX = []byte{0x48, 0x01, 0xC8}
var SAR_RAX_I8 = []byte{0x48, 0xC1, 0xF8}
var NEG_RAX = []byte{0x48, 0xF7, 0xD8}
var ADD_R_RAX = []byte{0x4C, 0x03}
var XOR_EAX_EAX = []byte{0x33, 0xC0}
var ADD_RDX_R = []byte{0x4c, 0x01}
var SUB_RDX_R = []byte{0x4c, 0x29}
var SAR_RDX_I8 = []byte{0x48, 0xC1, 0xFA}
var TEST_RDX_RDX = []byte{0x48, 0x85, 0xD2}
var SETS_AL_ADD_RDX_RAX = []byte{0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0}
var REX_NEG = []byte{0x49, 0xF7}
var REX_XOR_RR = []byte{0x4D, 0x33}
var REX_XOR_RI = []byte{0x49, 0x81}
var REX_XOR_RM = []byte{0x4c, 0x33}
var REX_ROT_CL = []byte{0x49, 0xd3}
var REX_ROT_I8 = []byte{0x49, 0xc1}
var SHUFPD = []byte{0x66, 0x0f, 0xc6}
var REX_ADDPD = []byte{0x66, 0x41, 0x0f, 0x58}
var REX_CVTDQ2PD_XMM12 = []byte{0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06}
var REX_SUBPD = []byte{0x66, 0x41, 0x0f, 0x5c}
var REX_XORPS = []byte{0x41, 0x0f, 0x57}
var REX_MULPD = []byte{0x66, 0x41, 0x0f, 0x59}
var REX_MAXPD = []byte{0x66, 0x41, 0x0f, 0x5f}
var REX_DIVPD = []byte{0x66, 0x41, 0x0f, 0x5e}
var SQRTPD = []byte{0x66, 0x0f, 0x51}
var AND_OR_MOV_LDMXCSR = []byte{0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58}
var ROL_RAX = []byte{0x48, 0xc1, 0xc0}
var XOR_ECX_ECX = []byte{0x33, 0xC9}
var REX_CMP_R32I = []byte{0x41, 0x81}
var REX_CMP_M32I = []byte{0x81, 0x3c, 0x06}
var MOVAPD = []byte{0x66, 0x0f, 0x29}
var REX_MOV_MR = []byte{0x4c, 0x89}
var REX_XOR_EAX = []byte{0x41, 0x33}
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
var JNZ = []byte{0x0f, 0x85}
var JMP = 0xe9
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
var REX_XCHG = []byte{0x4d, 0x87}
var REX_ANDPS_XMM12 = []byte{0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6}
var REX_PADD = []byte{0x66, 0x44, 0x0f}
var PADD_OPCODES = []byte{0xfc, 0xfd, 0xfe, 0xd4}
var CALL = 0xe8
var REX_ADD_I = []byte{0x49, 0x81}
var REX_TEST = []byte{0x49, 0xF7}
var JZ = []byte{0x0f, 0x84}
var JZ_SHORT = 0x74
var RET byte = 0xc3
var LEA_32 = []byte{0x41, 0x8d}
var MOVNTI = []byte{0x4c, 0x0f, 0xc3}
var ADD_EBX_I = []byte{0x81, 0xc3}
var NOP1 = []byte{0x90}
var NOP2 = []byte{0x66, 0x90}
var NOP3 = []byte{0x66, 0x66, 0x90}
var NOP4 = []byte{0x0F, 0x1F, 0x40, 0x00}
var NOP5 = []byte{0x0F, 0x1F, 0x44, 0x00, 0x00}
var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
func genSIB(scale, index, base int) byte {
return byte((scale << 6) | (index << 3) | base)
}
/*
push rbp
push rbx
push rsi
push r12
push r13
push r14
push r15
mov rbp,rsp
sub rsp,(0x8*7)
mov rsi, rax; # register dataset
prefetchnta byte ptr [rsi]
mov r8, qword ptr [rsi+0]
mov r9, qword ptr [rsi+8]
mov r10, qword ptr [rsi+16]
mov r11, qword ptr [rsi+24]
mov r12, qword ptr [rsi+32]
mov r13, qword ptr [rsi+40]
mov r14, qword ptr [rsi+48]
mov r15, qword ptr [rsi+56]
*/
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
/*
prefetchw byte ptr [rsi]
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9
mov qword ptr [rsi+16], r10
mov qword ptr [rsi+24], r11
mov qword ptr [rsi+32], r12
mov qword ptr [rsi+40], r13
mov qword ptr [rsi+48], r14
mov qword ptr [rsi+56], r15
add rsp,(0x8*7)
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rbx
pop rbp
ret
*/
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}

View file

@ -1,5 +1,7 @@
package randomx
import "unsafe"
const RegistersCount = 8
const RegistersCountFloat = 4
@ -17,6 +19,12 @@ type RegisterFile struct {
FPRC uint8
}
const RegisterFileSize = RegistersCount*8 + RegistersCountFloat*2*8*3
func (rf *RegisterFile) Memory() *[RegisterFileSize]byte {
return (*[RegisterFileSize]byte)(unsafe.Pointer(rf))
}
type MemoryRegisters struct {
mx, ma uint64
}

View file

@ -766,8 +766,6 @@ func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) b
return true
}
const Mask = CacheSize/CacheLineSize - 1
// executeSuperscalar execute the superscalar program
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
@ -813,7 +811,7 @@ func randomx_reciprocal(divisor uint32) uint64 {
quotient := p2exp63 / uint64(divisor)
remainder := p2exp63 % uint64(divisor)
shift := uint32(bits.Len32(divisor))
shift := bits.Len32(divisor)
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
}

View file

@ -6,74 +6,6 @@ import (
"encoding/binary"
)
var REX_SUB_RR = []byte{0x4d, 0x2b}
var REX_MOV_RR64 = []byte{0x49, 0x8b}
var REX_MOV_R64R = []byte{0x4c, 0x8b}
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
var REX_MUL_R = []byte{0x49, 0xf7}
var REX_81 = []byte{0x49, 0x81}
var MOV_RAX_I = []byte{0x48, 0xb8}
var REX_LEA = []byte{0x4f, 0x8d}
var REX_XOR_RR = []byte{0x4D, 0x33}
var REX_XOR_RI = []byte{0x49, 0x81}
var REX_ROT_I8 = []byte{0x49, 0xc1}
func genSIB(scale, index, base int) byte {
return byte((scale << 6) | (index << 3) | base)
}
/*
push rbp
push rbx
push rsi
push r12
push r13
push r14
push r15
mov rbp,rsp
sub rsp,(0x8*7)
mov rsi, rax; # register dataset
prefetchnta byte ptr [rsi]
mov r8, qword ptr [rsi+0]
mov r9, qword ptr [rsi+8]
mov r10, qword ptr [rsi+16]
mov r11, qword ptr [rsi+24]
mov r12, qword ptr [rsi+32]
mov r13, qword ptr [rsi+40]
mov r14, qword ptr [rsi+48]
mov r15, qword ptr [rsi+56]
*/
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
/*
prefetchw byte ptr [rsi]
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9
mov qword ptr [rsi+16], r10
mov qword ptr [rsi+24], r11
mov qword ptr [rsi+32], r12
mov qword ptr [rsi+40], r13
mov qword ptr [rsi+48], r14
mov qword ptr [rsi+56], r15
add rsp,(0x8*7)
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rbx
pop rbp
ret
*/
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
// generateSuperscalarCode
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {

116
vm.go
View file

@ -35,7 +35,6 @@ import (
"runtime"
"unsafe"
)
import "encoding/binary"
import "golang.org/x/crypto/blake2b"
type REG struct {
@ -44,29 +43,14 @@ type REG struct {
}
type VM struct {
StateStart [64]byte
ScratchPad ScratchPad
ByteCode ByteCode
mem MemoryRegisters
config Config // configuration
datasetOffset uint64
Dataset Randomx_Dataset
Cache *Randomx_Cache // randomx cache
}
type Config struct {
eMask [2]uint64
readReg [4]uint64
}
// Run calculate hash based on input
// Warning: Underlying callers will run asm.SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
// Warning: Underlying callers will run float64 SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
@ -86,28 +70,33 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
}
vm.mem.ma = entropy[8] & CacheLineAlignMask
vm.mem.mx = entropy[10]
var mem MemoryRegisters
mem.ma = entropy[8] & CacheLineAlignMask
mem.mx = entropy[10]
addressRegisters := entropy[12]
for i := range vm.config.readReg {
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
var readReg [4]uint64
for i := range readReg {
readReg[i] = uint64(i*2) + (addressRegisters & 1)
addressRegisters >>= 1
}
vm.datasetOffset = (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.config.eMask[LOW] = EMask(entropy[14])
vm.config.eMask[HIGH] = EMask(entropy[15])
datasetOffset := (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.ByteCode = CompileProgramToByteCode(prog)
eMask := [2]uint64{EMask(entropy[14]), EMask(entropy[15])}
spAddr0 := vm.mem.mx
spAddr1 := vm.mem.ma
byteCode := CompileProgramToByteCode(prog)
spAddr0 := mem.mx
spAddr1 := mem.ma
var rlCache RegisterLine
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := reg.R[vm.config.readReg[0]] ^ reg.R[vm.config.readReg[1]]
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
spAddr0 ^= spMix
spAddr0 &= ScratchpadL3Mask64
@ -126,22 +115,22 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
for i := uint64(0); i < RegistersCountFloat; i++ {
reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], vm.config.eMask[LOW])
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], vm.config.eMask[HIGH])
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], eMask[LOW])
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
}
// Run the actual bytecode
vm.ByteCode.Execute(&reg, &vm.ScratchPad, vm.config.eMask)
byteCode.Execute(&reg, &vm.ScratchPad, eMask)
vm.mem.mx ^= reg.R[vm.config.readReg[2]] ^ reg.R[vm.config.readReg[3]]
vm.mem.mx &= CacheLineAlignMask
mem.mx ^= reg.R[readReg[2]] ^ reg.R[readReg[3]]
mem.mx &= CacheLineAlignMask
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
vm.Dataset.PrefetchDataset(datasetOffset + mem.mx)
// execute diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &reg.R, &rlCache)
vm.Dataset.ReadDataset(datasetOffset+mem.ma, &reg.R, &rlCache)
// swap the elements
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
mem.mx, mem.ma = mem.ma, mem.mx
for i := uint64(0); i < RegistersCount; i++ {
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
@ -169,8 +158,6 @@ func (vm *VM) InitScratchpad(seed *[64]byte) {
}
func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
var buf [8]byte
hash512, _ := blake2b.New512(nil)
// Lock thread due to rounding mode flags
@ -184,30 +171,10 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
roundingMode = reg.FPRC
hash512.Reset()
for i := range reg.R {
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
hash512.Write(buf[:])
}
for i := range reg.F {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
hash512.Write(buf[:])
}
for i := range reg.E {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
hash512.Write(buf[:])
}
for i := range reg.A {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][HIGH]))
hash512.Write(buf[:])
}
// write R, F, E, A registers
hash512.Write(reg.Memory()[:])
runtime.KeepAlive(reg)
hash512.Sum(tempHash[:0])
}
@ -217,14 +184,12 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
roundingMode = reg.FPRC
//restore rounding mode
vm.ByteCode.SetRoundingMode(&reg, 0)
SetRoundingMode(&reg, 0)
return reg
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
var buf [8]byte
tempHash := blake2b.Sum512(input)
vm.InitScratchpad(&tempHash)
@ -238,24 +203,9 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
hash256.Reset()
for i := range reg.R {
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
hash256.Write(buf[:])
}
for i := range reg.F {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
hash256.Write(buf[:])
}
for i := range reg.E {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
hash256.Write(buf[:])
}
// write R, F, E registers
hash256.Write(reg.Memory()[:RegisterFileSize-RegistersCountFloat*2*8])
runtime.KeepAlive(reg)
// copy tempHash as it first copied to register and then hashed
hash256.Write(tempHash[:])

View file

@ -6,6 +6,7 @@ type ByteCodeInstruction struct {
Opcode ByteCodeInstructionOp
MemMask uint32
Imm uint64
EMask uint64
/*
union {
int_reg_t* idst;

View file

@ -108,7 +108,7 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH])
case VM_CFROUND:
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
c.SetRoundingMode(f, uint8(tmp))
SetRoundingMode(f, uint8(tmp))
case VM_CBRANCH:
f.R[i.Src] += i.Imm
@ -121,7 +121,7 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
}
}
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
func SetRoundingMode(f *RegisterFile, mode uint8) {
if f.FPRC == mode {
return
}

View file

@ -8,8 +8,8 @@ import (
)
// Execute Runs a RandomX program with the given register file and scratchpad
// Warning: This will call asm.SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
// Warning: This will call float64 SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
@ -120,6 +120,6 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
}
}
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
func SetRoundingMode(f *RegisterFile, mode uint8) {
f.FPRC = mode
}

View file

@ -43,6 +43,11 @@ type VM_Instruction [8]byte // it is hardcode 8 bytes
func (ins VM_Instruction) IMM() uint32 {
return binary.LittleEndian.Uint32(ins[4:])
}
func (ins VM_Instruction) IMM64() uint64 {
return signExtend2sCompl(ins.IMM())
}
func (ins VM_Instruction) Mod() byte {
return ins[3]
}
@ -84,13 +89,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
} else {
//shift
ibc.ImmB = (instr.Mod() >> 2) % 4
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
}
registerUsage[dst] = i
case 16, 17, 18, 19, 20, 21, 22: // 7
ibc.Opcode = VM_IADD_M
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
if (instr.Mod() % 4) != 0 {
ibc.MemMask = ScratchpadL1Mask
@ -107,13 +112,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
ibc.Opcode = VM_ISUB_R
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_ISUB_I
}
registerUsage[dst] = i
case 39, 40, 41, 42, 43, 44, 45: // 7
ibc.Opcode = VM_ISUB_M
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
if (instr.Mod() % 4) != 0 {
ibc.MemMask = ScratchpadL1Mask
@ -130,13 +135,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
ibc.Opcode = VM_IMUL_R
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IMUL_I
}
registerUsage[dst] = i
case 62, 63, 64, 65: //4
ibc.Opcode = VM_IMUL_M
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
if (instr.Mod() % 4) != 0 {
ibc.MemMask = ScratchpadL1Mask
@ -154,7 +159,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
registerUsage[dst] = i
case 70: //1
ibc.Opcode = VM_IMULH_M
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
if (instr.Mod() % 4) != 0 {
ibc.MemMask = ScratchpadL1Mask
@ -172,7 +177,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
registerUsage[dst] = i
case 75: //1
ibc.Opcode = VM_ISMULH_M
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
if (instr.Mod() % 4) != 0 {
ibc.MemMask = ScratchpadL1Mask
@ -202,13 +207,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
ibc.Opcode = VM_IXOR_R
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IXOR_I
}
registerUsage[dst] = i
case 101, 102, 103, 104, 105: //5
ibc.Opcode = VM_IXOR_M
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
if (instr.Mod() % 4) != 0 {
ibc.MemMask = ScratchpadL1Mask
@ -224,7 +229,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
case 106, 107, 108, 109, 110, 111, 112, 113: //8
ibc.Opcode = VM_IROR_R
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IROR_I
}
registerUsage[dst] = i
@ -232,7 +237,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
ibc.Opcode = VM_IROL_R
if src == dst {
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IROL_I
}
registerUsage[dst] = i
@ -269,7 +274,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
} else {
ibc.MemMask = ScratchpadL2Mask
}
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
@ -283,7 +288,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
} else {
ibc.MemMask = ScratchpadL2Mask
}
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
case 166, 167, 168, 169, 170, 171: //6
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
@ -300,7 +305,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
} else {
ibc.MemMask = ScratchpadL2Mask
}
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
case 208, 209, 210, 211, 212, 213: //6
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSQRT_R
@ -315,7 +320,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
//conditionmask := CONDITIONMASK << shift
ibc.Imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
ibc.Imm = instr.IMM64() | (uint64(1) << shift)
if CONDITIONOFFSET > 0 || shift > 0 {
ibc.Imm &= ^(uint64(1) << (shift - 1))
}
@ -331,7 +336,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
ibc.Opcode = VM_ISTORE
ibc.Imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if (instr.Mod() >> 4) < STOREL3CONDITION {
if (instr.Mod() % 4) != 0 {
ibc.MemMask = ScratchpadL1Mask