General cleanup of jit / VM / mmap usage
This commit is contained in:
parent
fe253fb825
commit
80f473de54
2
cache.go
2
cache.go
|
@ -138,7 +138,7 @@ func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint
|
|||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
cache.JitPrograms[i].Execute(rl)
|
||||
cache.JitPrograms[i].Execute(uintptr(unsafe.Pointer(rl)))
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"unsafe"
|
||||
)
|
||||
|
||||
func (f ProgramFunc) Execute(rl *RegisterLine) {
|
||||
func (f ProgramFunc) Execute(v uintptr) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
|
@ -19,8 +19,8 @@ func (f ProgramFunc) Execute(rl *RegisterLine) {
|
|||
}
|
||||
|
||||
memoryPtr := &f
|
||||
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
|
||||
fun(rl)
|
||||
fun := *(*func(v uintptr))(unsafe.Pointer(&memoryPtr))
|
||||
fun(v)
|
||||
|
||||
for i := range reservedStackHack {
|
||||
reservedStackHack[i] = uint8(-i)
|
||||
|
|
206
jit_amd64.go
Normal file
206
jit_amd64.go
Normal file
|
@ -0,0 +1,206 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
/*
|
||||
|
||||
REGISTER ALLOCATION:
|
||||
|
||||
; rax -> temporary
|
||||
; rbx -> iteration counter "ic"
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> scratchpad pointer
|
||||
; rdi -> dataset pointer
|
||||
; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits)
|
||||
; rsp -> stack pointer
|
||||
; r8 -> "r0"
|
||||
; r9 -> "r1"
|
||||
; r10 -> "r2"
|
||||
; r11 -> "r3"
|
||||
; r12 -> "r4"
|
||||
; r13 -> "r5"
|
||||
; r14 -> "r6"
|
||||
; r15 -> "r7"
|
||||
; xmm0 -> "f0"
|
||||
; xmm1 -> "f1"
|
||||
; xmm2 -> "f2"
|
||||
; xmm3 -> "f3"
|
||||
; xmm4 -> "e0"
|
||||
; xmm5 -> "e1"
|
||||
; xmm6 -> "e2"
|
||||
; xmm7 -> "e3"
|
||||
; xmm8 -> "a0"
|
||||
; xmm9 -> "a1"
|
||||
; xmm10 -> "a2"
|
||||
; xmm11 -> "a3"
|
||||
; xmm12 -> temporary
|
||||
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
|
||||
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
|
||||
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
||||
|
||||
*/
|
||||
|
||||
const MaxRandomXInstrCodeSize = 32 //FDIV_M requires up to 32 bytes of x86 code
|
||||
const MaxSuperscalarInstrSize = 14 //IMUL_RCP requires 14 bytes of x86 code
|
||||
const SuperscalarProgramHeader = 128 //overhead per superscalar program
|
||||
const CodeAlign = 4096 //align code size to a multiple of 4 KiB
|
||||
const ReserveCodeSize = CodeAlign //function prologue/epilogue + reserve
|
||||
|
||||
func alignSize[T ~uintptr | ~uint32 | ~uint64 | ~int64 | ~int32 | ~int](pos, align T) T {
|
||||
return ((pos-1)/align + 1) * align
|
||||
}
|
||||
|
||||
var RandomXCodeSize = alignSize[uint64](ReserveCodeSize+MaxRandomXInstrCodeSize*RANDOMX_PROGRAM_SIZE, CodeAlign)
|
||||
var SuperscalarSize = alignSize[uint64](ReserveCodeSize+(SuperscalarProgramHeader+MaxSuperscalarInstrSize*SuperscalarMaxSize)*RANDOMX_CACHE_ACCESSES, CodeAlign)
|
||||
|
||||
var CodeSize = uint32(RandomXCodeSize + SuperscalarSize)
|
||||
|
||||
var superScalarHashOffset = int32(RandomXCodeSize)
|
||||
|
||||
var REX_ADD_RR = []byte{0x4d, 0x03}
|
||||
var REX_ADD_RM = []byte{0x4c, 0x03}
|
||||
var REX_SUB_RR = []byte{0x4d, 0x2b}
|
||||
var REX_SUB_RM = []byte{0x4c, 0x2b}
|
||||
var REX_MOV_RR = []byte{0x41, 0x8b}
|
||||
var REX_MOV_RR64 = []byte{0x49, 0x8b}
|
||||
var REX_MOV_R64R = []byte{0x4c, 0x8b}
|
||||
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
|
||||
var REX_IMUL_RRI = []byte{0x4d, 0x69}
|
||||
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
|
||||
var REX_MUL_R = []byte{0x49, 0xf7}
|
||||
var REX_MUL_M = []byte{0x48, 0xf7}
|
||||
var REX_81 = []byte{0x49, 0x81}
|
||||
var AND_EAX_I byte = 0x25
|
||||
|
||||
var MOV_EAX_I byte = 0xb8
|
||||
|
||||
var MOV_RAX_I = []byte{0x48, 0xb8}
|
||||
var MOV_RCX_I = []byte{0x48, 0xb9}
|
||||
var REX_LEA = []byte{0x4f, 0x8d}
|
||||
var REX_MUL_MEM = []byte{0x48, 0xf7, 0x24, 0x0e}
|
||||
var REX_IMUL_MEM = []byte{0x48, 0xf7, 0x2c, 0x0e}
|
||||
var REX_SHR_RAX = []byte{0x48, 0xc1, 0xe8}
|
||||
var RAX_ADD_SBB_1 = []byte{0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00}
|
||||
var MUL_RCX = []byte{0x48, 0xf7, 0xe1}
|
||||
var REX_SHR_RDX = []byte{0x48, 0xc1, 0xea}
|
||||
var REX_SH = []byte{0x49, 0xc1}
|
||||
var MOV_RCX_RAX_SAR_RCX_63 = []byte{0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f}
|
||||
var AND_ECX_I = []byte{0x81, 0xe1}
|
||||
var ADD_RAX_RCX = []byte{0x48, 0x01, 0xC8}
|
||||
var SAR_RAX_I8 = []byte{0x48, 0xC1, 0xF8}
|
||||
var NEG_RAX = []byte{0x48, 0xF7, 0xD8}
|
||||
var ADD_R_RAX = []byte{0x4C, 0x03}
|
||||
var XOR_EAX_EAX = []byte{0x33, 0xC0}
|
||||
var ADD_RDX_R = []byte{0x4c, 0x01}
|
||||
var SUB_RDX_R = []byte{0x4c, 0x29}
|
||||
var SAR_RDX_I8 = []byte{0x48, 0xC1, 0xFA}
|
||||
var TEST_RDX_RDX = []byte{0x48, 0x85, 0xD2}
|
||||
var SETS_AL_ADD_RDX_RAX = []byte{0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0}
|
||||
var REX_NEG = []byte{0x49, 0xF7}
|
||||
var REX_XOR_RR = []byte{0x4D, 0x33}
|
||||
var REX_XOR_RI = []byte{0x49, 0x81}
|
||||
var REX_XOR_RM = []byte{0x4c, 0x33}
|
||||
var REX_ROT_CL = []byte{0x49, 0xd3}
|
||||
var REX_ROT_I8 = []byte{0x49, 0xc1}
|
||||
var SHUFPD = []byte{0x66, 0x0f, 0xc6}
|
||||
var REX_ADDPD = []byte{0x66, 0x41, 0x0f, 0x58}
|
||||
var REX_CVTDQ2PD_XMM12 = []byte{0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06}
|
||||
var REX_SUBPD = []byte{0x66, 0x41, 0x0f, 0x5c}
|
||||
var REX_XORPS = []byte{0x41, 0x0f, 0x57}
|
||||
var REX_MULPD = []byte{0x66, 0x41, 0x0f, 0x59}
|
||||
var REX_MAXPD = []byte{0x66, 0x41, 0x0f, 0x5f}
|
||||
var REX_DIVPD = []byte{0x66, 0x41, 0x0f, 0x5e}
|
||||
var SQRTPD = []byte{0x66, 0x0f, 0x51}
|
||||
var AND_OR_MOV_LDMXCSR = []byte{0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58}
|
||||
var ROL_RAX = []byte{0x48, 0xc1, 0xc0}
|
||||
var XOR_ECX_ECX = []byte{0x33, 0xC9}
|
||||
var REX_CMP_R32I = []byte{0x41, 0x81}
|
||||
var REX_CMP_M32I = []byte{0x81, 0x3c, 0x06}
|
||||
var MOVAPD = []byte{0x66, 0x0f, 0x29}
|
||||
var REX_MOV_MR = []byte{0x4c, 0x89}
|
||||
var REX_XOR_EAX = []byte{0x41, 0x33}
|
||||
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
|
||||
var JNZ = []byte{0x0f, 0x85}
|
||||
var JMP = 0xe9
|
||||
|
||||
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
|
||||
var REX_XCHG = []byte{0x4d, 0x87}
|
||||
var REX_ANDPS_XMM12 = []byte{0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6}
|
||||
var REX_PADD = []byte{0x66, 0x44, 0x0f}
|
||||
var PADD_OPCODES = []byte{0xfc, 0xfd, 0xfe, 0xd4}
|
||||
var CALL = 0xe8
|
||||
|
||||
var REX_ADD_I = []byte{0x49, 0x81}
|
||||
var REX_TEST = []byte{0x49, 0xF7}
|
||||
var JZ = []byte{0x0f, 0x84}
|
||||
var JZ_SHORT = 0x74
|
||||
|
||||
var RET byte = 0xc3
|
||||
|
||||
var LEA_32 = []byte{0x41, 0x8d}
|
||||
var MOVNTI = []byte{0x4c, 0x0f, 0xc3}
|
||||
var ADD_EBX_I = []byte{0x81, 0xc3}
|
||||
|
||||
var NOP1 = []byte{0x90}
|
||||
var NOP2 = []byte{0x66, 0x90}
|
||||
var NOP3 = []byte{0x66, 0x66, 0x90}
|
||||
var NOP4 = []byte{0x0F, 0x1F, 0x40, 0x00}
|
||||
var NOP5 = []byte{0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
|
||||
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
|
||||
/*
|
||||
push rbp
|
||||
push rbx
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rbp,rsp
|
||||
sub rsp,(0x8*7)
|
||||
|
||||
mov rsi, rax; # register dataset
|
||||
|
||||
prefetchnta byte ptr [rsi]
|
||||
|
||||
mov r8, qword ptr [rsi+0]
|
||||
mov r9, qword ptr [rsi+8]
|
||||
mov r10, qword ptr [rsi+16]
|
||||
mov r11, qword ptr [rsi+24]
|
||||
mov r12, qword ptr [rsi+32]
|
||||
mov r13, qword ptr [rsi+40]
|
||||
mov r14, qword ptr [rsi+48]
|
||||
mov r15, qword ptr [rsi+56]
|
||||
*/
|
||||
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
|
||||
|
||||
/*
|
||||
prefetchw byte ptr [rsi]
|
||||
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
|
||||
add rsp,(0x8*7)
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rbp
|
||||
ret
|
||||
*/
|
||||
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
|
|
@ -6,74 +6,6 @@ import (
|
|||
"encoding/binary"
|
||||
)
|
||||
|
||||
var REX_SUB_RR = []byte{0x4d, 0x2b}
|
||||
var REX_MOV_RR64 = []byte{0x49, 0x8b}
|
||||
var REX_MOV_R64R = []byte{0x4c, 0x8b}
|
||||
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
|
||||
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
|
||||
var REX_MUL_R = []byte{0x49, 0xf7}
|
||||
var REX_81 = []byte{0x49, 0x81}
|
||||
|
||||
var MOV_RAX_I = []byte{0x48, 0xb8}
|
||||
var REX_LEA = []byte{0x4f, 0x8d}
|
||||
var REX_XOR_RR = []byte{0x4D, 0x33}
|
||||
var REX_XOR_RI = []byte{0x49, 0x81}
|
||||
var REX_ROT_I8 = []byte{0x49, 0xc1}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
|
||||
/*
|
||||
push rbp
|
||||
push rbx
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rbp,rsp
|
||||
sub rsp,(0x8*7)
|
||||
|
||||
mov rsi, rax; # register dataset
|
||||
|
||||
prefetchnta byte ptr [rsi]
|
||||
|
||||
mov r8, qword ptr [rsi+0]
|
||||
mov r9, qword ptr [rsi+8]
|
||||
mov r10, qword ptr [rsi+16]
|
||||
mov r11, qword ptr [rsi+24]
|
||||
mov r12, qword ptr [rsi+32]
|
||||
mov r13, qword ptr [rsi+40]
|
||||
mov r14, qword ptr [rsi+48]
|
||||
mov r15, qword ptr [rsi+56]
|
||||
*/
|
||||
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
|
||||
|
||||
/*
|
||||
prefetchw byte ptr [rsi]
|
||||
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
|
||||
add rsp,(0x8*7)
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rbp
|
||||
ret
|
||||
*/
|
||||
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
|
||||
|
||||
// generateSuperscalarCode
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
|
||||
|
60
vm.go
60
vm.go
|
@ -43,24 +43,9 @@ type REG struct {
|
|||
}
|
||||
|
||||
type VM struct {
|
||||
StateStart [64]byte
|
||||
ScratchPad ScratchPad
|
||||
|
||||
ByteCode ByteCode
|
||||
|
||||
mem MemoryRegisters
|
||||
config Config // configuration
|
||||
datasetOffset uint64
|
||||
|
||||
Dataset Randomx_Dataset
|
||||
|
||||
Cache *Randomx_Cache // randomx cache
|
||||
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
eMask [2]uint64
|
||||
readReg [4]uint64
|
||||
}
|
||||
|
||||
// Run calculate hash based on input
|
||||
|
@ -85,28 +70,33 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
|
||||
}
|
||||
|
||||
vm.mem.ma = entropy[8] & CacheLineAlignMask
|
||||
vm.mem.mx = entropy[10]
|
||||
var mem MemoryRegisters
|
||||
|
||||
mem.ma = entropy[8] & CacheLineAlignMask
|
||||
mem.mx = entropy[10]
|
||||
|
||||
addressRegisters := entropy[12]
|
||||
for i := range vm.config.readReg {
|
||||
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
|
||||
var readReg [4]uint64
|
||||
|
||||
for i := range readReg {
|
||||
readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
}
|
||||
|
||||
vm.datasetOffset = (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
vm.config.eMask[LOW] = EMask(entropy[14])
|
||||
vm.config.eMask[HIGH] = EMask(entropy[15])
|
||||
datasetOffset := (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
|
||||
vm.ByteCode = CompileProgramToByteCode(prog)
|
||||
eMask := [2]uint64{EMask(entropy[14]), EMask(entropy[15])}
|
||||
|
||||
spAddr0 := vm.mem.mx
|
||||
spAddr1 := vm.mem.ma
|
||||
byteCode := CompileProgramToByteCode(prog)
|
||||
|
||||
spAddr0 := mem.mx
|
||||
spAddr1 := mem.ma
|
||||
|
||||
var rlCache RegisterLine
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := reg.R[vm.config.readReg[0]] ^ reg.R[vm.config.readReg[1]]
|
||||
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
|
||||
|
||||
spAddr0 ^= spMix
|
||||
spAddr0 &= ScratchpadL3Mask64
|
||||
|
@ -125,22 +115,22 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
|
||||
|
||||
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], vm.config.eMask[LOW])
|
||||
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], vm.config.eMask[HIGH])
|
||||
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], eMask[LOW])
|
||||
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
|
||||
}
|
||||
|
||||
// Run the actual bytecode
|
||||
vm.ByteCode.Execute(®, &vm.ScratchPad, vm.config.eMask)
|
||||
byteCode.Execute(®, &vm.ScratchPad, eMask)
|
||||
|
||||
vm.mem.mx ^= reg.R[vm.config.readReg[2]] ^ reg.R[vm.config.readReg[3]]
|
||||
vm.mem.mx &= CacheLineAlignMask
|
||||
mem.mx ^= reg.R[readReg[2]] ^ reg.R[readReg[3]]
|
||||
mem.mx &= CacheLineAlignMask
|
||||
|
||||
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
|
||||
vm.Dataset.PrefetchDataset(datasetOffset + mem.mx)
|
||||
// execute diffuser superscalar program to get dataset 64 bytes
|
||||
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, ®.R, &rlCache)
|
||||
vm.Dataset.ReadDataset(datasetOffset+mem.ma, ®.R, &rlCache)
|
||||
|
||||
// swap the elements
|
||||
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
|
||||
mem.mx, mem.ma = mem.ma, mem.mx
|
||||
|
||||
for i := uint64(0); i < RegistersCount; i++ {
|
||||
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
|
||||
|
@ -194,7 +184,7 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
|||
roundingMode = reg.FPRC
|
||||
|
||||
//restore rounding mode
|
||||
vm.ByteCode.SetRoundingMode(®, 0)
|
||||
SetRoundingMode(®, 0)
|
||||
|
||||
return reg
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ type ByteCodeInstruction struct {
|
|||
Opcode ByteCodeInstructionOp
|
||||
MemMask uint32
|
||||
Imm uint64
|
||||
EMask uint64
|
||||
/*
|
||||
union {
|
||||
int_reg_t* idst;
|
||||
|
|
|
@ -108,7 +108,7 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH])
|
||||
case VM_CFROUND:
|
||||
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
|
||||
c.SetRoundingMode(f, uint8(tmp))
|
||||
SetRoundingMode(f, uint8(tmp))
|
||||
|
||||
case VM_CBRANCH:
|
||||
f.R[i.Src] += i.Imm
|
||||
|
@ -121,7 +121,7 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
func SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
if f.FPRC == mode {
|
||||
return
|
||||
}
|
||||
|
|
|
@ -120,6 +120,6 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
func SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
f.FPRC = mode
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue