Compare commits
4 commits
1ce9bff7d3
...
80f473de54
Author | SHA1 | Date | |
---|---|---|---|
DataHoarder | 80f473de54 | ||
DataHoarder | fe253fb825 | ||
DataHoarder | 699ce02f2d | ||
DataHoarder | b35751462b |
8
cache.go
8
cache.go
|
@ -84,12 +84,12 @@ func (cache *Randomx_Cache) Init(key []byte) {
|
|||
|
||||
}
|
||||
|
||||
const Mask = CacheSize/CacheLineSize - 1
|
||||
|
||||
// GetMixBlock fetch a 64 byte block in uint64 form
|
||||
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
|
||||
|
||||
mask := CacheSize/CacheLineSize - 1
|
||||
|
||||
addr = (addr & mask) * CacheLineSize
|
||||
addr = (addr & Mask) * CacheLineSize
|
||||
|
||||
block := addr / 1024
|
||||
return cache.Blocks[block].GetLine(addr % 1024)
|
||||
|
@ -138,7 +138,7 @@ func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint
|
|||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
cache.JitPrograms[i].Execute(rl)
|
||||
cache.JitPrograms[i].Execute(uintptr(unsafe.Pointer(rl)))
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
|
|
|
@ -4,17 +4,28 @@ package randomx
|
|||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func (f ProgramFunc) Execute(rl *RegisterLine) {
|
||||
func (f ProgramFunc) Execute(v uintptr) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
memoryPtr := &f
|
||||
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
|
||||
|
||||
fun(rl)
|
||||
var reservedStackHack [8 * 8]byte
|
||||
for i := range reservedStackHack {
|
||||
reservedStackHack[i] = uint8(i)
|
||||
}
|
||||
|
||||
memoryPtr := &f
|
||||
fun := *(*func(v uintptr))(unsafe.Pointer(&memoryPtr))
|
||||
fun(v)
|
||||
|
||||
for i := range reservedStackHack {
|
||||
reservedStackHack[i] = uint8(-i)
|
||||
}
|
||||
runtime.KeepAlive(reservedStackHack)
|
||||
}
|
||||
|
||||
func (f ProgramFunc) Close() error {
|
||||
|
|
206
jit_amd64.go
Normal file
206
jit_amd64.go
Normal file
|
@ -0,0 +1,206 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
/*
|
||||
|
||||
REGISTER ALLOCATION:
|
||||
|
||||
; rax -> temporary
|
||||
; rbx -> iteration counter "ic"
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> scratchpad pointer
|
||||
; rdi -> dataset pointer
|
||||
; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits)
|
||||
; rsp -> stack pointer
|
||||
; r8 -> "r0"
|
||||
; r9 -> "r1"
|
||||
; r10 -> "r2"
|
||||
; r11 -> "r3"
|
||||
; r12 -> "r4"
|
||||
; r13 -> "r5"
|
||||
; r14 -> "r6"
|
||||
; r15 -> "r7"
|
||||
; xmm0 -> "f0"
|
||||
; xmm1 -> "f1"
|
||||
; xmm2 -> "f2"
|
||||
; xmm3 -> "f3"
|
||||
; xmm4 -> "e0"
|
||||
; xmm5 -> "e1"
|
||||
; xmm6 -> "e2"
|
||||
; xmm7 -> "e3"
|
||||
; xmm8 -> "a0"
|
||||
; xmm9 -> "a1"
|
||||
; xmm10 -> "a2"
|
||||
; xmm11 -> "a3"
|
||||
; xmm12 -> temporary
|
||||
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
|
||||
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
|
||||
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
||||
|
||||
*/
|
||||
|
||||
const MaxRandomXInstrCodeSize = 32 //FDIV_M requires up to 32 bytes of x86 code
|
||||
const MaxSuperscalarInstrSize = 14 //IMUL_RCP requires 14 bytes of x86 code
|
||||
const SuperscalarProgramHeader = 128 //overhead per superscalar program
|
||||
const CodeAlign = 4096 //align code size to a multiple of 4 KiB
|
||||
const ReserveCodeSize = CodeAlign //function prologue/epilogue + reserve
|
||||
|
||||
func alignSize[T ~uintptr | ~uint32 | ~uint64 | ~int64 | ~int32 | ~int](pos, align T) T {
|
||||
return ((pos-1)/align + 1) * align
|
||||
}
|
||||
|
||||
var RandomXCodeSize = alignSize[uint64](ReserveCodeSize+MaxRandomXInstrCodeSize*RANDOMX_PROGRAM_SIZE, CodeAlign)
|
||||
var SuperscalarSize = alignSize[uint64](ReserveCodeSize+(SuperscalarProgramHeader+MaxSuperscalarInstrSize*SuperscalarMaxSize)*RANDOMX_CACHE_ACCESSES, CodeAlign)
|
||||
|
||||
var CodeSize = uint32(RandomXCodeSize + SuperscalarSize)
|
||||
|
||||
var superScalarHashOffset = int32(RandomXCodeSize)
|
||||
|
||||
var REX_ADD_RR = []byte{0x4d, 0x03}
|
||||
var REX_ADD_RM = []byte{0x4c, 0x03}
|
||||
var REX_SUB_RR = []byte{0x4d, 0x2b}
|
||||
var REX_SUB_RM = []byte{0x4c, 0x2b}
|
||||
var REX_MOV_RR = []byte{0x41, 0x8b}
|
||||
var REX_MOV_RR64 = []byte{0x49, 0x8b}
|
||||
var REX_MOV_R64R = []byte{0x4c, 0x8b}
|
||||
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
|
||||
var REX_IMUL_RRI = []byte{0x4d, 0x69}
|
||||
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
|
||||
var REX_MUL_R = []byte{0x49, 0xf7}
|
||||
var REX_MUL_M = []byte{0x48, 0xf7}
|
||||
var REX_81 = []byte{0x49, 0x81}
|
||||
var AND_EAX_I byte = 0x25
|
||||
|
||||
var MOV_EAX_I byte = 0xb8
|
||||
|
||||
var MOV_RAX_I = []byte{0x48, 0xb8}
|
||||
var MOV_RCX_I = []byte{0x48, 0xb9}
|
||||
var REX_LEA = []byte{0x4f, 0x8d}
|
||||
var REX_MUL_MEM = []byte{0x48, 0xf7, 0x24, 0x0e}
|
||||
var REX_IMUL_MEM = []byte{0x48, 0xf7, 0x2c, 0x0e}
|
||||
var REX_SHR_RAX = []byte{0x48, 0xc1, 0xe8}
|
||||
var RAX_ADD_SBB_1 = []byte{0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00}
|
||||
var MUL_RCX = []byte{0x48, 0xf7, 0xe1}
|
||||
var REX_SHR_RDX = []byte{0x48, 0xc1, 0xea}
|
||||
var REX_SH = []byte{0x49, 0xc1}
|
||||
var MOV_RCX_RAX_SAR_RCX_63 = []byte{0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f}
|
||||
var AND_ECX_I = []byte{0x81, 0xe1}
|
||||
var ADD_RAX_RCX = []byte{0x48, 0x01, 0xC8}
|
||||
var SAR_RAX_I8 = []byte{0x48, 0xC1, 0xF8}
|
||||
var NEG_RAX = []byte{0x48, 0xF7, 0xD8}
|
||||
var ADD_R_RAX = []byte{0x4C, 0x03}
|
||||
var XOR_EAX_EAX = []byte{0x33, 0xC0}
|
||||
var ADD_RDX_R = []byte{0x4c, 0x01}
|
||||
var SUB_RDX_R = []byte{0x4c, 0x29}
|
||||
var SAR_RDX_I8 = []byte{0x48, 0xC1, 0xFA}
|
||||
var TEST_RDX_RDX = []byte{0x48, 0x85, 0xD2}
|
||||
var SETS_AL_ADD_RDX_RAX = []byte{0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0}
|
||||
var REX_NEG = []byte{0x49, 0xF7}
|
||||
var REX_XOR_RR = []byte{0x4D, 0x33}
|
||||
var REX_XOR_RI = []byte{0x49, 0x81}
|
||||
var REX_XOR_RM = []byte{0x4c, 0x33}
|
||||
var REX_ROT_CL = []byte{0x49, 0xd3}
|
||||
var REX_ROT_I8 = []byte{0x49, 0xc1}
|
||||
var SHUFPD = []byte{0x66, 0x0f, 0xc6}
|
||||
var REX_ADDPD = []byte{0x66, 0x41, 0x0f, 0x58}
|
||||
var REX_CVTDQ2PD_XMM12 = []byte{0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06}
|
||||
var REX_SUBPD = []byte{0x66, 0x41, 0x0f, 0x5c}
|
||||
var REX_XORPS = []byte{0x41, 0x0f, 0x57}
|
||||
var REX_MULPD = []byte{0x66, 0x41, 0x0f, 0x59}
|
||||
var REX_MAXPD = []byte{0x66, 0x41, 0x0f, 0x5f}
|
||||
var REX_DIVPD = []byte{0x66, 0x41, 0x0f, 0x5e}
|
||||
var SQRTPD = []byte{0x66, 0x0f, 0x51}
|
||||
var AND_OR_MOV_LDMXCSR = []byte{0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58}
|
||||
var ROL_RAX = []byte{0x48, 0xc1, 0xc0}
|
||||
var XOR_ECX_ECX = []byte{0x33, 0xC9}
|
||||
var REX_CMP_R32I = []byte{0x41, 0x81}
|
||||
var REX_CMP_M32I = []byte{0x81, 0x3c, 0x06}
|
||||
var MOVAPD = []byte{0x66, 0x0f, 0x29}
|
||||
var REX_MOV_MR = []byte{0x4c, 0x89}
|
||||
var REX_XOR_EAX = []byte{0x41, 0x33}
|
||||
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
|
||||
var JNZ = []byte{0x0f, 0x85}
|
||||
var JMP = 0xe9
|
||||
|
||||
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
|
||||
var REX_XCHG = []byte{0x4d, 0x87}
|
||||
var REX_ANDPS_XMM12 = []byte{0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6}
|
||||
var REX_PADD = []byte{0x66, 0x44, 0x0f}
|
||||
var PADD_OPCODES = []byte{0xfc, 0xfd, 0xfe, 0xd4}
|
||||
var CALL = 0xe8
|
||||
|
||||
var REX_ADD_I = []byte{0x49, 0x81}
|
||||
var REX_TEST = []byte{0x49, 0xF7}
|
||||
var JZ = []byte{0x0f, 0x84}
|
||||
var JZ_SHORT = 0x74
|
||||
|
||||
var RET byte = 0xc3
|
||||
|
||||
var LEA_32 = []byte{0x41, 0x8d}
|
||||
var MOVNTI = []byte{0x4c, 0x0f, 0xc3}
|
||||
var ADD_EBX_I = []byte{0x81, 0xc3}
|
||||
|
||||
var NOP1 = []byte{0x90}
|
||||
var NOP2 = []byte{0x66, 0x90}
|
||||
var NOP3 = []byte{0x66, 0x66, 0x90}
|
||||
var NOP4 = []byte{0x0F, 0x1F, 0x40, 0x00}
|
||||
var NOP5 = []byte{0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
|
||||
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
|
||||
/*
|
||||
push rbp
|
||||
push rbx
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rbp,rsp
|
||||
sub rsp,(0x8*7)
|
||||
|
||||
mov rsi, rax; # register dataset
|
||||
|
||||
prefetchnta byte ptr [rsi]
|
||||
|
||||
mov r8, qword ptr [rsi+0]
|
||||
mov r9, qword ptr [rsi+8]
|
||||
mov r10, qword ptr [rsi+16]
|
||||
mov r11, qword ptr [rsi+24]
|
||||
mov r12, qword ptr [rsi+32]
|
||||
mov r13, qword ptr [rsi+40]
|
||||
mov r14, qword ptr [rsi+48]
|
||||
mov r15, qword ptr [rsi+56]
|
||||
*/
|
||||
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
|
||||
|
||||
/*
|
||||
prefetchw byte ptr [rsi]
|
||||
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
|
||||
add rsp,(0x8*7)
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rbp
|
||||
ret
|
||||
*/
|
||||
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
|
|
@ -1,5 +1,7 @@
|
|||
package randomx
|
||||
|
||||
import "unsafe"
|
||||
|
||||
const RegistersCount = 8
|
||||
const RegistersCountFloat = 4
|
||||
|
||||
|
@ -17,6 +19,12 @@ type RegisterFile struct {
|
|||
FPRC uint8
|
||||
}
|
||||
|
||||
const RegisterFileSize = RegistersCount*8 + RegistersCountFloat*2*8*3
|
||||
|
||||
func (rf *RegisterFile) Memory() *[RegisterFileSize]byte {
|
||||
return (*[RegisterFileSize]byte)(unsafe.Pointer(rf))
|
||||
}
|
||||
|
||||
type MemoryRegisters struct {
|
||||
mx, ma uint64
|
||||
}
|
||||
|
|
|
@ -766,8 +766,6 @@ func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) b
|
|||
return true
|
||||
}
|
||||
|
||||
const Mask = CacheSize/CacheLineSize - 1
|
||||
|
||||
// executeSuperscalar execute the superscalar program
|
||||
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
|
||||
|
||||
|
@ -813,7 +811,7 @@ func randomx_reciprocal(divisor uint32) uint64 {
|
|||
quotient := p2exp63 / uint64(divisor)
|
||||
remainder := p2exp63 % uint64(divisor)
|
||||
|
||||
shift := uint32(bits.Len32(divisor))
|
||||
shift := bits.Len32(divisor)
|
||||
|
||||
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
|
||||
}
|
||||
|
|
|
@ -6,74 +6,6 @@ import (
|
|||
"encoding/binary"
|
||||
)
|
||||
|
||||
var REX_SUB_RR = []byte{0x4d, 0x2b}
|
||||
var REX_MOV_RR64 = []byte{0x49, 0x8b}
|
||||
var REX_MOV_R64R = []byte{0x4c, 0x8b}
|
||||
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
|
||||
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
|
||||
var REX_MUL_R = []byte{0x49, 0xf7}
|
||||
var REX_81 = []byte{0x49, 0x81}
|
||||
|
||||
var MOV_RAX_I = []byte{0x48, 0xb8}
|
||||
var REX_LEA = []byte{0x4f, 0x8d}
|
||||
var REX_XOR_RR = []byte{0x4D, 0x33}
|
||||
var REX_XOR_RI = []byte{0x49, 0x81}
|
||||
var REX_ROT_I8 = []byte{0x49, 0xc1}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
|
||||
/*
|
||||
push rbp
|
||||
push rbx
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rbp,rsp
|
||||
sub rsp,(0x8*7)
|
||||
|
||||
mov rsi, rax; # register dataset
|
||||
|
||||
prefetchnta byte ptr [rsi]
|
||||
|
||||
mov r8, qword ptr [rsi+0]
|
||||
mov r9, qword ptr [rsi+8]
|
||||
mov r10, qword ptr [rsi+16]
|
||||
mov r11, qword ptr [rsi+24]
|
||||
mov r12, qword ptr [rsi+32]
|
||||
mov r13, qword ptr [rsi+40]
|
||||
mov r14, qword ptr [rsi+48]
|
||||
mov r15, qword ptr [rsi+56]
|
||||
*/
|
||||
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
|
||||
|
||||
/*
|
||||
prefetchw byte ptr [rsi]
|
||||
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
|
||||
add rsp,(0x8*7)
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rbp
|
||||
ret
|
||||
*/
|
||||
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
|
||||
|
||||
// generateSuperscalarCode
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
|
||||
|
116
vm.go
116
vm.go
|
@ -35,7 +35,6 @@ import (
|
|||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
import "encoding/binary"
|
||||
import "golang.org/x/crypto/blake2b"
|
||||
|
||||
type REG struct {
|
||||
|
@ -44,29 +43,14 @@ type REG struct {
|
|||
}
|
||||
|
||||
type VM struct {
|
||||
StateStart [64]byte
|
||||
ScratchPad ScratchPad
|
||||
|
||||
ByteCode ByteCode
|
||||
|
||||
mem MemoryRegisters
|
||||
config Config // configuration
|
||||
datasetOffset uint64
|
||||
|
||||
Dataset Randomx_Dataset
|
||||
|
||||
Cache *Randomx_Cache // randomx cache
|
||||
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
eMask [2]uint64
|
||||
readReg [4]uint64
|
||||
}
|
||||
|
||||
// Run calculate hash based on input
|
||||
// Warning: Underlying callers will run asm.SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
|
||||
// Warning: Underlying callers will run float64 SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
||||
|
||||
|
@ -86,28 +70,33 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
|
||||
}
|
||||
|
||||
vm.mem.ma = entropy[8] & CacheLineAlignMask
|
||||
vm.mem.mx = entropy[10]
|
||||
var mem MemoryRegisters
|
||||
|
||||
mem.ma = entropy[8] & CacheLineAlignMask
|
||||
mem.mx = entropy[10]
|
||||
|
||||
addressRegisters := entropy[12]
|
||||
for i := range vm.config.readReg {
|
||||
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
|
||||
var readReg [4]uint64
|
||||
|
||||
for i := range readReg {
|
||||
readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
}
|
||||
|
||||
vm.datasetOffset = (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
vm.config.eMask[LOW] = EMask(entropy[14])
|
||||
vm.config.eMask[HIGH] = EMask(entropy[15])
|
||||
datasetOffset := (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
|
||||
vm.ByteCode = CompileProgramToByteCode(prog)
|
||||
eMask := [2]uint64{EMask(entropy[14]), EMask(entropy[15])}
|
||||
|
||||
spAddr0 := vm.mem.mx
|
||||
spAddr1 := vm.mem.ma
|
||||
byteCode := CompileProgramToByteCode(prog)
|
||||
|
||||
spAddr0 := mem.mx
|
||||
spAddr1 := mem.ma
|
||||
|
||||
var rlCache RegisterLine
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := reg.R[vm.config.readReg[0]] ^ reg.R[vm.config.readReg[1]]
|
||||
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
|
||||
|
||||
spAddr0 ^= spMix
|
||||
spAddr0 &= ScratchpadL3Mask64
|
||||
|
@ -126,22 +115,22 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
|
||||
|
||||
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], vm.config.eMask[LOW])
|
||||
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], vm.config.eMask[HIGH])
|
||||
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], eMask[LOW])
|
||||
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
|
||||
}
|
||||
|
||||
// Run the actual bytecode
|
||||
vm.ByteCode.Execute(®, &vm.ScratchPad, vm.config.eMask)
|
||||
byteCode.Execute(®, &vm.ScratchPad, eMask)
|
||||
|
||||
vm.mem.mx ^= reg.R[vm.config.readReg[2]] ^ reg.R[vm.config.readReg[3]]
|
||||
vm.mem.mx &= CacheLineAlignMask
|
||||
mem.mx ^= reg.R[readReg[2]] ^ reg.R[readReg[3]]
|
||||
mem.mx &= CacheLineAlignMask
|
||||
|
||||
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
|
||||
vm.Dataset.PrefetchDataset(datasetOffset + mem.mx)
|
||||
// execute diffuser superscalar program to get dataset 64 bytes
|
||||
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, ®.R, &rlCache)
|
||||
vm.Dataset.ReadDataset(datasetOffset+mem.ma, ®.R, &rlCache)
|
||||
|
||||
// swap the elements
|
||||
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
|
||||
mem.mx, mem.ma = mem.ma, mem.mx
|
||||
|
||||
for i := uint64(0); i < RegistersCount; i++ {
|
||||
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
|
||||
|
@ -169,8 +158,6 @@ func (vm *VM) InitScratchpad(seed *[64]byte) {
|
|||
}
|
||||
|
||||
func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
||||
|
||||
var buf [8]byte
|
||||
hash512, _ := blake2b.New512(nil)
|
||||
|
||||
// Lock thread due to rounding mode flags
|
||||
|
@ -184,30 +171,10 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
|||
roundingMode = reg.FPRC
|
||||
|
||||
hash512.Reset()
|
||||
for i := range reg.R {
|
||||
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
for i := range reg.F {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range reg.E {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range reg.A {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
// write R, F, E, A registers
|
||||
hash512.Write(reg.Memory()[:])
|
||||
runtime.KeepAlive(reg)
|
||||
|
||||
hash512.Sum(tempHash[:0])
|
||||
}
|
||||
|
@ -217,14 +184,12 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
|||
roundingMode = reg.FPRC
|
||||
|
||||
//restore rounding mode
|
||||
vm.ByteCode.SetRoundingMode(®, 0)
|
||||
SetRoundingMode(®, 0)
|
||||
|
||||
return reg
|
||||
}
|
||||
|
||||
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
||||
var buf [8]byte
|
||||
|
||||
tempHash := blake2b.Sum512(input)
|
||||
|
||||
vm.InitScratchpad(&tempHash)
|
||||
|
@ -238,24 +203,9 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
|||
|
||||
hash256.Reset()
|
||||
|
||||
for i := range reg.R {
|
||||
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range reg.F {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
|
||||
hash256.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range reg.E {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
|
||||
hash256.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
// write R, F, E registers
|
||||
hash256.Write(reg.Memory()[:RegisterFileSize-RegistersCountFloat*2*8])
|
||||
runtime.KeepAlive(reg)
|
||||
|
||||
// copy tempHash as it first copied to register and then hashed
|
||||
hash256.Write(tempHash[:])
|
||||
|
|
|
@ -6,6 +6,7 @@ type ByteCodeInstruction struct {
|
|||
Opcode ByteCodeInstructionOp
|
||||
MemMask uint32
|
||||
Imm uint64
|
||||
EMask uint64
|
||||
/*
|
||||
union {
|
||||
int_reg_t* idst;
|
||||
|
|
|
@ -108,7 +108,7 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH])
|
||||
case VM_CFROUND:
|
||||
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
|
||||
c.SetRoundingMode(f, uint8(tmp))
|
||||
SetRoundingMode(f, uint8(tmp))
|
||||
|
||||
case VM_CBRANCH:
|
||||
f.R[i.Src] += i.Imm
|
||||
|
@ -121,7 +121,7 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
func SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
if f.FPRC == mode {
|
||||
return
|
||||
}
|
||||
|
|
|
@ -8,8 +8,8 @@ import (
|
|||
)
|
||||
|
||||
// Execute Runs a RandomX program with the given register file and scratchpad
|
||||
// Warning: This will call asm.SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
|
||||
// Warning: This will call float64 SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
|
@ -120,6 +120,6 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
func SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
f.FPRC = mode
|
||||
}
|
||||
|
|
|
@ -43,6 +43,11 @@ type VM_Instruction [8]byte // it is hardcode 8 bytes
|
|||
func (ins VM_Instruction) IMM() uint32 {
|
||||
return binary.LittleEndian.Uint32(ins[4:])
|
||||
}
|
||||
|
||||
func (ins VM_Instruction) IMM64() uint64 {
|
||||
return signExtend2sCompl(ins.IMM())
|
||||
}
|
||||
|
||||
func (ins VM_Instruction) Mod() byte {
|
||||
return ins[3]
|
||||
}
|
||||
|
@ -84,13 +89,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
//shift
|
||||
ibc.ImmB = (instr.Mod() >> 2) % 4
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
||||
case 16, 17, 18, 19, 20, 21, 22: // 7
|
||||
ibc.Opcode = VM_IADD_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -107,13 +112,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_ISUB_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_ISUB_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 39, 40, 41, 42, 43, 44, 45: // 7
|
||||
ibc.Opcode = VM_ISUB_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -130,13 +135,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_IMUL_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IMUL_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 62, 63, 64, 65: //4
|
||||
ibc.Opcode = VM_IMUL_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -154,7 +159,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
registerUsage[dst] = i
|
||||
case 70: //1
|
||||
ibc.Opcode = VM_IMULH_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -172,7 +177,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
registerUsage[dst] = i
|
||||
case 75: //1
|
||||
ibc.Opcode = VM_ISMULH_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -202,13 +207,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_IXOR_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IXOR_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 101, 102, 103, 104, 105: //5
|
||||
ibc.Opcode = VM_IXOR_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -224,7 +229,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
case 106, 107, 108, 109, 110, 111, 112, 113: //8
|
||||
ibc.Opcode = VM_IROR_R
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IROR_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
@ -232,7 +237,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_IROL_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IROL_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
@ -269,7 +274,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
|
||||
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
|
@ -283,7 +288,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
|
||||
case 166, 167, 168, 169, 170, 171: //6
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
|
@ -300,7 +305,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
case 208, 209, 210, 211, 212, 213: //6
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Opcode = VM_FSQRT_R
|
||||
|
@ -315,7 +320,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
|
||||
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
|
||||
//conditionmask := CONDITIONMASK << shift
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
|
||||
ibc.Imm = instr.IMM64() | (uint64(1) << shift)
|
||||
if CONDITIONOFFSET > 0 || shift > 0 {
|
||||
ibc.Imm &= ^(uint64(1) << (shift - 1))
|
||||
}
|
||||
|
@ -331,7 +336,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
|
||||
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
|
||||
ibc.Opcode = VM_ISTORE
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if (instr.Mod() >> 4) < STOREL3CONDITION {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
|
Loading…
Reference in a new issue