From d20dd880cebca048011076865d3db44ec771df71 Mon Sep 17 00:00:00 2001 From: WeebDataHoarder <57538841+WeebDataHoarder@users.noreply.github.com> Date: Thu, 18 Apr 2024 12:09:05 +0200 Subject: [PATCH] amd64: Implemented VM JIT --- README.md | 27 ++-- aes/round_amd64.go | 29 ++-- asm/aes_amd64.s | 78 +++++----- asm/cpuid_amd64.go | 7 + asm/cpuid_amd64.s | 34 ++++ cache.go | 9 +- config.go | 14 +- dataset.go | 1 + dataset_light.go | 4 + exec.go | 2 + exec_generic.go | 21 +++ exec_mmap_unix.go | 52 ++++++- jit_amd64.go | 176 ++++++++++++++++++++- jit_generic.go | 5 + randomx_test.go | 3 + superscalar.go | 3 + superscalar_jit_amd64.go | 18 +-- superscalar_jit_amd64.s | 1 + vm.go | 30 +++- vm_bytecode.go | 2 +- vm_bytecode_jit_amd64.go | 312 +++++++++++++++++++++++++++++++++++++ vm_bytecode_jit_amd64.s | 91 +++++++++++ vm_bytecode_jit_generic.go | 11 ++ vm_bytecode_native.go | 6 +- vm_bytecode_purego.go | 6 +- vm_instruction.go | 10 +- 26 files changed, 849 insertions(+), 103 deletions(-) create mode 100644 asm/cpuid_amd64.go create mode 100644 asm/cpuid_amd64.s create mode 100644 jit_generic.go create mode 100644 vm_bytecode_jit_amd64.go create mode 100644 vm_bytecode_jit_amd64.s create mode 100644 vm_bytecode_jit_generic.go diff --git a/README.md b/README.md index 884dd3f..8d2eace 100644 --- a/README.md +++ b/README.md @@ -8,21 +8,22 @@ This package implements RandomX without CGO, using only Golang code, pure float6 All test cases pass properly. -Uses minimal Go assembly due to having to set rounding mode natively. Native hard float can be added with supporting rounding mode under _asm_. - JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime. A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually. -| Platform | Supported | Hard Float | SuperScalar JIT | Notes | -|:-----------:|:---------:|:----------:|:---------------:|:----------------:| -| **386** | ✅ | ✅ | ❌ | | -| **amd64** | ✅ | ✅ | ✅* | JIT only on Unix | -| **arm** | ✅* | ❌ | ❌ | | -| **arm64** | ✅ | ✅ | ❌ | | -| **mips** | ✅* | ❌ | ❌ | | -| **mips64** | ✅* | ❌ | ❌ | | -| **riscv64** | ✅* | ❌ | ❌ | | -| **wasm** | ✅* | ❌ | ❌ | | +| Platform | Hard Float | Hard AES | JIT | Native | purego | Notes | +|:-----------:|:----------:|:--------:|:---:|:------:|:------:|:----------------:| +| **386** | ✅ | ❌ | ❌ | ✅ | ✅ | | +| **amd64** | ✅ | ✅ | ✅* | ✅ | ✅ | JIT only on Unix | +| **arm** | ❌ | ❌ | ❌ | ❌ | ✅ | | +| **arm64** | ✅ | ❌ | ❌ | ✅ | ✅ | | +| **mips** | ❌ | ❌ | ❌ | ❌ | ✅ | | +| **mips64** | ❌ | ❌ | ❌ | ❌ | ✅ | | +| **riscv64** | ❌ | ❌ | ❌ | ❌ | ✅ | | +| **wasm** | ❌ | ❌ | ❌ | ❌ | ✅ | | -* these platforms only support software floating point / purego and will not be performant. \ No newline at end of file + +Any platform with no hard float support (soft float using [softfloat64](git.gammaspectra.live/P2Pool/softfloat64)) will be vastly slow. + +Native hard float can be added with supporting rounding mode under _asm_. \ No newline at end of file diff --git a/aes/round_amd64.go b/aes/round_amd64.go index 8363096..c63f6ff 100644 --- a/aes/round_amd64.go +++ b/aes/round_amd64.go @@ -8,18 +8,23 @@ import ( _ "unsafe" ) +//go:noescape //go:linkname hard_aesdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesdec func hard_aesdec(state *[4]uint32, key *[4]uint32) +//go:noescape //go:linkname hard_aesenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesenc func hard_aesenc(state *[4]uint32, key *[4]uint32) +//go:noescape //go:linkname hard_aesroundtrip_decenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_decenc func hard_aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) +//go:noescape //go:linkname hard_aesroundtrip_encdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec func hard_aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) +//go:noescape //go:linkname hard_aesroundtrip_encdec1 git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec1 func hard_aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) @@ -45,10 +50,10 @@ func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) { if supportsAES { hard_aesroundtrip_decenc(states, keys) } else { - aesdec(&states[0], &keys[0]) - aesenc(&states[1], &keys[1]) - aesdec(&states[2], &keys[2]) - aesenc(&states[3], &keys[3]) + soft_aesdec(&states[0], &keys[0]) + soft_aesenc(&states[1], &keys[1]) + soft_aesdec(&states[2], &keys[2]) + soft_aesenc(&states[3], &keys[3]) } } @@ -56,10 +61,10 @@ func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) { if supportsAES { hard_aesroundtrip_encdec(states, keys) } else { - aesenc(&states[0], &keys[0]) - aesdec(&states[1], &keys[1]) - aesenc(&states[2], &keys[2]) - aesdec(&states[3], &keys[3]) + soft_aesenc(&states[0], &keys[0]) + soft_aesdec(&states[1], &keys[1]) + soft_aesenc(&states[2], &keys[2]) + soft_aesdec(&states[3], &keys[3]) } } @@ -67,9 +72,9 @@ func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) { if supportsAES { hard_aesroundtrip_encdec1(states, key) } else { - aesenc(&states[0], key) - aesdec(&states[1], key) - aesenc(&states[2], key) - aesdec(&states[3], key) + soft_aesenc(&states[0], key) + soft_aesdec(&states[1], key) + soft_aesenc(&states[2], key) + soft_aesdec(&states[3], key) } } diff --git a/asm/aes_amd64.s b/asm/aes_amd64.s index 3c161fe..40ac125 100644 --- a/asm/aes_amd64.s +++ b/asm/aes_amd64.s @@ -5,43 +5,43 @@ TEXT ·aesenc(SB),NOSPLIT|NOFRAME,$0-16 MOVQ state+0(FP), AX MOVQ key+8(FP), BX - MOVUPS 0(AX), X0 - MOVUPS 0(BX), X1 + VMOVDQU32 0(AX), X0 + VMOVDQU32 0(BX), X1 AESENC X1, X0 - MOVUPS X0, 0(AX) + VMOVDQU32 X0, 0(AX) RET TEXT ·aesdec(SB),NOSPLIT|NOFRAME,$0-16 MOVQ state+0(FP), AX MOVQ key+8(FP), BX - MOVUPS 0(AX), X0 - MOVUPS 0(BX), X1 + VMOVDQU32 0(AX), X0 + VMOVDQU32 0(BX), X1 AESDEC X1, X0 - MOVUPS X0, 0(AX) + VMOVDQU32 X0, 0(AX) RET TEXT ·aesroundtrip_decenc(SB),NOSPLIT|NOFRAME,$0-16 MOVQ states+0(FP), AX MOVQ keys+8(FP), BX - MOVUPS 0(AX), X0 - MOVUPS 0(BX), X1 - MOVUPS 16(AX), X2 - MOVUPS 16(BX), X3 - MOVUPS 32(AX), X4 - MOVUPS 32(BX), X5 - MOVUPS 48(AX), X6 - MOVUPS 48(BX), X7 + VMOVDQU32 0(AX), X0 + VMOVDQU32 0(BX), X1 + VMOVDQU32 16(AX), X2 + VMOVDQU32 16(BX), X3 + VMOVDQU32 32(AX), X4 + VMOVDQU32 32(BX), X5 + VMOVDQU32 48(AX), X6 + VMOVDQU32 48(BX), X7 AESDEC X1, X0 AESENC X3, X2 AESDEC X5, X4 AESENC X7, X6 - MOVUPS X0, 0(AX) - MOVUPS X2, 16(AX) - MOVUPS X4, 32(AX) - MOVUPS X6, 48(AX) + VMOVDQU32 X0, 0(AX) + VMOVDQU32 X2, 16(AX) + VMOVDQU32 X4, 32(AX) + VMOVDQU32 X6, 48(AX) RET @@ -49,24 +49,24 @@ TEXT ·aesroundtrip_encdec(SB),NOSPLIT|NOFRAME,$0-16 MOVQ states+0(FP), AX MOVQ keys+8(FP), BX - MOVUPS 0(AX), X0 - MOVUPS 0(BX), X1 - MOVUPS 16(AX), X2 - MOVUPS 16(BX), X3 - MOVUPS 32(AX), X4 - MOVUPS 32(BX), X5 - MOVUPS 48(AX), X6 - MOVUPS 48(BX), X7 + VMOVDQU32 0(AX), X0 + VMOVDQU32 0(BX), X1 + VMOVDQU32 16(AX), X2 + VMOVDQU32 16(BX), X3 + VMOVDQU32 32(AX), X4 + VMOVDQU32 32(BX), X5 + VMOVDQU32 48(AX), X6 + VMOVDQU32 48(BX), X7 AESENC X1, X0 AESDEC X3, X2 AESENC X5, X4 AESDEC X7, X6 - MOVUPS X0, 0(AX) - MOVUPS X2, 16(AX) - MOVUPS X4, 32(AX) - MOVUPS X6, 48(AX) + VMOVDQU32 X0, 0(AX) + VMOVDQU32 X2, 16(AX) + VMOVDQU32 X4, 32(AX) + VMOVDQU32 X6, 48(AX) RET @@ -74,20 +74,20 @@ TEXT ·aesroundtrip_encdec1(SB),NOSPLIT|NOFRAME,$0-16 MOVQ states+0(FP), AX MOVQ key+8(FP), BX - MOVUPS 0(BX), X0 - MOVUPS 0(AX), X1 - MOVUPS 16(AX), X2 - MOVUPS 32(AX), X3 - MOVUPS 48(AX), X4 + VMOVDQU32 0(BX), X0 + VMOVDQU32 0(AX), X1 + VMOVDQU32 16(AX), X2 + VMOVDQU32 32(AX), X3 + VMOVDQU32 48(AX), X4 AESENC X0, X1 AESDEC X0, X2 AESENC X0, X3 AESDEC X0, X4 - MOVUPS X1, 0(AX) - MOVUPS X2, 16(AX) - MOVUPS X3, 32(AX) - MOVUPS X4, 48(AX) + VMOVDQU32 X1, 0(AX) + VMOVDQU32 X2, 16(AX) + VMOVDQU32 X3, 32(AX) + VMOVDQU32 X4, 48(AX) RET diff --git a/asm/cpuid_amd64.go b/asm/cpuid_amd64.go new file mode 100644 index 0000000..494e896 --- /dev/null +++ b/asm/cpuid_amd64.go @@ -0,0 +1,7 @@ +//go:build amd64 && !purego + +package asm + +func Cpuid(op uint32) (eax, ebx, ecx, edx uint32) +func Cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func Xgetbv(index uint32) (eax, edx uint32) diff --git a/asm/cpuid_amd64.s b/asm/cpuid_amd64.s new file mode 100644 index 0000000..aa5073d --- /dev/null +++ b/asm/cpuid_amd64.s @@ -0,0 +1,34 @@ +//go:build amd64 && !purego + +#include "textflag.h" + +// func Cpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·Cpuid(SB), 7, $0 + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + + +// func Cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·Cpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·Xgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/cache.go b/cache.go index 67b65cf..7bd43e2 100644 --- a/cache.go +++ b/cache.go @@ -40,11 +40,18 @@ func (cache *Randomx_Cache) HasJIT() bool { func (cache *Randomx_Cache) VM_Initialize() *VM { - return &VM{ + vm := &VM{ Dataset: &Randomx_DatasetLight{ Cache: cache, }, } + if cache.HasJIT() { + vm.JITProgram = mapProgram(nil, int(RandomXCodeSize)) + if cache.Flags&RANDOMX_FLAG_SECURE == 0 { + mapProgramRWX(vm.JITProgram) + } + } + return vm } func (cache *Randomx_Cache) Close() error { diff --git a/config.go b/config.go index 7fe7c20..92aa9a2 100644 --- a/config.go +++ b/config.go @@ -106,8 +106,18 @@ const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1 const STOREL3CONDITION = 14 -const RANDOMX_FLAG_DEFAULT = uint64(0) -const RANDOMX_FLAG_JIT = uint64(1 << iota) +const RANDOMX_FLAG_DEFAULT = 0 + +const ( + RANDOMX_FLAG_LARGE_PAGES = 1 << iota + RANDOMX_FLAG_HARD_AES + RANDOMX_FLAG_FULL_MEM + RANDOMX_FLAG_JIT + RANDOMX_FLAG_SECURE + RANDOMX_FLAG_ARGON2_SSSE3 + RANDOMX_FLAG_ARGON2_AVX2 + RANDOMX_FLAG_ARGON2 +) func isZeroOrPowerOf2(x uint32) bool { return (x & (x - 1)) == 0 diff --git a/dataset.go b/dataset.go index 3614177..a642f9d 100644 --- a/dataset.go +++ b/dataset.go @@ -4,4 +4,5 @@ type Randomx_Dataset interface { InitDataset(startItem, endItem uint64) ReadDataset(address uint64, r, cache *RegisterLine) PrefetchDataset(address uint64) + Flags() uint64 } diff --git a/dataset_light.go b/dataset_light.go index 21cb681..5a88d92 100644 --- a/dataset_light.go +++ b/dataset_light.go @@ -21,6 +21,10 @@ func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLin } } +func (d *Randomx_DatasetLight) Flags() uint64 { + return d.Cache.Flags +} + func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) { //d.Cache.initDataset(d.Cache.Programs) } diff --git a/exec.go b/exec.go index c10b7b5..be707f3 100644 --- a/exec.go +++ b/exec.go @@ -1,3 +1,5 @@ package randomx type SuperScalarProgramFunc []byte + +type VMProgramFunc []byte diff --git a/exec_generic.go b/exec_generic.go index 00d14aa..b9ab294 100644 --- a/exec_generic.go +++ b/exec_generic.go @@ -5,3 +5,24 @@ package randomx func (f SuperScalarProgramFunc) Close() error { return nil } + +func (f VMProgramFunc) Close() error { + return nil +} + +func mapProgram(program []byte, size int) []byte { + return nil +} + +func mapProgramRW(execFunc []byte) { + +} + +func mapProgramRX(execFunc []byte) { + +} + +// mapProgramRWX insecure! +func mapProgramRWX(execFunc []byte) { + +} diff --git a/exec_mmap_unix.go b/exec_mmap_unix.go index 5471cd5..f20a95f 100644 --- a/exec_mmap_unix.go +++ b/exec_mmap_unix.go @@ -9,10 +9,56 @@ import ( func (f SuperScalarProgramFunc) Close() error { return unix.Munmap(f) } +func (f VMProgramFunc) Close() error { + return unix.Munmap(f) +} -func mapProgram(program []byte) []byte { - // Write only - execFunc, err := unix.Mmap(-1, 0, len(program), unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS) +func mapProgramRW(execFunc []byte) { + err := unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_WRITE) + if err != nil { + defer func() { + // unmap if we err + err := unix.Munmap(execFunc) + if err != nil { + panic(err) + } + }() + panic(err) + } +} + +func mapProgramRX(execFunc []byte) { + err := unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_EXEC) + if err != nil { + defer func() { + // unmap if we err + err := unix.Munmap(execFunc) + if err != nil { + panic(err) + } + }() + panic(err) + } +} + +// mapProgramRWX insecure! +func mapProgramRWX(execFunc []byte) { + err := unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_WRITE|unix.PROT_EXEC) + if err != nil { + defer func() { + // unmap if we err + err := unix.Munmap(execFunc) + if err != nil { + panic(err) + } + }() + panic(err) + } +} + +func mapProgram(program []byte, size int) []byte { + // Read and Write only + execFunc, err := unix.Mmap(-1, 0, max(size, len(program)), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS) if err != nil { panic(err) } diff --git a/jit_amd64.go b/jit_amd64.go index 96fe7c7..cc9b4a2 100644 --- a/jit_amd64.go +++ b/jit_amd64.go @@ -2,6 +2,12 @@ package randomx +import ( + "bytes" + "encoding/binary" + "git.gammaspectra.live/P2Pool/go-randomx/v2/asm" +) + /* REGISTER ALLOCATION: @@ -11,7 +17,7 @@ package randomx ; rcx -> temporary ; rdx -> temporary ; rsi -> scratchpad pointer - ; rdi -> return address // dataset pointer + ; rdi -> (not used) ; rbp -> (do not use, it's used by Golang sampling) jump target //todo: memory registers "ma" (high 32 bits), "mx" (low 32 bits) ; rsp -> stack pointer ; r8 -> "r0" @@ -134,7 +140,7 @@ var CALL = 0xe8 var REX_ADD_I = []byte{0x49, 0x81} var REX_TEST = []byte{0x49, 0xF7} var JZ = []byte{0x0f, 0x84} -var JZ_SHORT = 0x74 +var JZ_SHORT byte = 0x74 var RET byte = 0xc3 @@ -151,6 +157,172 @@ var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00} var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00} var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00} +var JMP_ALIGN_PREFIX = [14][]byte{ + {}, + {0x2E}, + {0x2E, 0x2E}, + {0x2E, 0x2E, 0x2E}, + {0x2E, 0x2E, 0x2E, 0x2E}, + {0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, + {0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E}, +} + func genSIB(scale, index, base int) byte { return byte((scale << 6) | (index << 3) | base) } +func genAddressReg(buf []byte, instr *ByteCodeInstruction, rax bool) []byte { + buf = append(buf, LEA_32...) + if rax { + buf = append(buf, 0x80+instr.Src+0) + } else { + buf = append(buf, 0x80+instr.Src+8) + } + if instr.Src == RegisterNeedsSib { + buf = append(buf, 0x24) + } + buf = binary.LittleEndian.AppendUint32(buf, uint32(instr.Imm)) + if rax { + buf = append(buf, AND_EAX_I) + } else { + buf = append(buf, AND_ECX_I...) + } + buf = binary.LittleEndian.AppendUint32(buf, instr.MemMask) + return buf +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} + +func familyModel(maxFunctionId uint32) (family, model, stepping int) { + if maxFunctionId < 0x1 { + return 0, 0, 0 + } + eax, _, _, _ := asm.Cpuid(1) + // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. + family = int((eax >> 8) & 0xf) + extFam := family == 0x6 // Intel is 0x6, needs extended model. + if family == 0xf { + // Add ExtFamily + family += int((eax >> 20) & 0xff) + extFam = true + } + // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. + model = int((eax >> 4) & 0xf) + if extFam { + // Add ExtModel + model += int((eax >> 12) & 0xf0) + } + stepping = int(eax & 0xf) + return family, model, stepping +} + +var BranchesWithin32B = func() bool { + a, b, c, d := asm.Cpuid(0) + v := string(valAsString(b, d, c)) + + if v == "GenuineIntel" { + family, model, stepping := familyModel(a) + + // Intel JCC erratum mitigation + if family == 6 { + // Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf + return ((model == 0x4E) && (stepping == 0x3)) || + ((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) || + ((model == 0x5E) && (stepping == 0x3)) || + ((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) || + ((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) || + ((model == 0xA6) && (stepping == 0x0)) || + ((model == 0xAE) && (stepping == 0xA)) + } + } + return false +}() + +/* +;# callee-saved registers - Microsoft x64 calling convention +push rbx +push rbp +push rdi +push rsi +push r12 +push r13 +push r14 +push r15 +sub rsp, 80 +movdqu xmmword ptr [rsp+64], xmm6 +movdqu xmmword ptr [rsp+48], xmm7 +movdqu xmmword ptr [rsp+32], xmm8 +movdqu xmmword ptr [rsp+16], xmm9 +movdqu xmmword ptr [rsp+0], xmm10 +sub rsp, 80 +movdqu xmmword ptr [rsp+64], xmm11 +movdqu xmmword ptr [rsp+48], xmm12 +movdqu xmmword ptr [rsp+32], xmm13 +movdqu xmmword ptr [rsp+16], xmm14 +movdqu xmmword ptr [rsp+0], xmm15 + +;# function arguments +push rcx ;# RegisterFile& registerFile +mov rbp, qword ptr [rdx] ;# "mx", "ma" +mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset +mov rsi, r8 ;# uint8_t* scratchpad +mov rbx, r9 ;# loop counter + +mov rax, rbp +ror rbp, 32 + +;# zero integer registers +xor r8, r8 +xor r9, r9 +xor r10, r10 +xor r11, r11 +xor r12, r12 +xor r13, r13 +xor r14, r14 +xor r15, r15 + +;# load constant registers +lea rcx, [rcx+120] +movapd xmm8, xmmword ptr [rcx+72] +movapd xmm9, xmmword ptr [rcx+88] +movapd xmm10, xmmword ptr [rcx+104] +movapd xmm11, xmmword ptr [rcx+120] + +movapd xmm13, xmmword ptr [mantissaMask] +movapd xmm14, xmmword ptr [exp240] +movapd xmm15, xmmword ptr [scaleMask] +mov rdx, rax +and eax, RANDOMX_SCRATCHPAD_MASK +ror rdx, 32 +and edx, RANDOMX_SCRATCHPAD_MASK +jmp rx_program_loop_begin +*/ +var randomx_program_prologue = bytes.Repeat(NOP1, 64) + +var randomx_program_loop_begin = bytes.Repeat(NOP1, 64) diff --git a/jit_generic.go b/jit_generic.go new file mode 100644 index 0000000..e84154f --- /dev/null +++ b/jit_generic.go @@ -0,0 +1,5 @@ +//go:build !unix || !amd64 || disable_jit || purego + +package randomx + +var RandomXCodeSize uint64 = 0 diff --git a/randomx_test.go b/randomx_test.go index 2988bb9..a94e914 100644 --- a/randomx_test.go +++ b/randomx_test.go @@ -63,6 +63,7 @@ func Test_Randomx(t *testing.T) { }() vm := c.VM_Initialize() + defer vm.Close() var output_hash [32]byte vm.CalculateHash(tt.input, &output_hash) @@ -92,6 +93,7 @@ func Benchmark_RandomX(b *testing.B) { }() vm := c.VM_Initialize() + defer vm.Close() b.ResetTimer() for i := 0; i < b.N; i++ { var output_hash [32]byte @@ -119,6 +121,7 @@ func Benchmark_RandomXParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { var output_hash [32]byte vm := c.VM_Initialize() + defer vm.Close() for pb.Next() { vm.CalculateHash(tt.input, &output_hash) diff --git a/superscalar.go b/superscalar.go index 6cd9c0b..5b83032 100644 --- a/superscalar.go +++ b/superscalar.go @@ -702,7 +702,10 @@ type Register struct { //RegisterNeedsSib = 4; //x86 r12 register } +// RegisterNeedsDisplacement x86 r13 register const RegisterNeedsDisplacement = 5 + +// RegisterNeedsSib x86 r12 register const RegisterNeedsSib = 4 func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters []int, cycle int, Registers []Register, gen *Blake2Generator) bool { diff --git a/superscalar_jit_amd64.go b/superscalar_jit_amd64.go index 562532d..c1f2111 100644 --- a/superscalar_jit_amd64.go +++ b/superscalar_jit_amd64.go @@ -4,7 +4,6 @@ package randomx import ( "encoding/binary" - "runtime" "unsafe" ) @@ -17,21 +16,6 @@ func (f SuperScalarProgramFunc) Execute(rf uintptr) { } superscalar_run(rf, uintptr(unsafe.Pointer(unsafe.SliceData(f)))) - return - - var reservedStackHack [8 * 8]byte - for i := range reservedStackHack { - reservedStackHack[i] = uint8(i) - } - - memoryPtr := &f - fun := *(*func(v uintptr))(unsafe.Pointer(&memoryPtr)) - fun(rf) - - for i := range reservedStackHack { - reservedStackHack[i] = uint8(-i) - } - runtime.KeepAlive(reservedStackHack) } // generateSuperscalarCode @@ -106,5 +90,5 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra program = append(program, RET) - return mapProgram(program) + return mapProgram(program, len(program)) } diff --git a/superscalar_jit_amd64.s b/superscalar_jit_amd64.s index 2b078aa..397cef6 100644 --- a/superscalar_jit_amd64.s +++ b/superscalar_jit_amd64.s @@ -25,6 +25,7 @@ TEXT ·superscalar_run(SB),$0-16 // todo: not supported by golang // PREFETCHW 0(SI) + PREFETCHT0 0(SI) // move registers back to register line MOVQ R8, 0(SI) diff --git a/vm.go b/vm.go index a6c6b54..4657825 100644 --- a/vm.go +++ b/vm.go @@ -46,6 +46,8 @@ type VM struct { ScratchPad ScratchPad Dataset Randomx_Dataset + + JITProgram VMProgramFunc } // Run calculate hash based on input @@ -95,6 +97,16 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) { var rlCache RegisterLine + if vm.JITProgram != nil { + if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 { + mapProgramRW(vm.JITProgram) + byteCode.generateCode(vm.JITProgram) + mapProgramRX(vm.JITProgram) + } else { + byteCode.generateCode(vm.JITProgram) + } + } + for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ { spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]] @@ -120,7 +132,11 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) { } // Run the actual bytecode - byteCode.Execute(®, &vm.ScratchPad, eMask) + if vm.JITProgram != nil { + vm.JITProgram.Execute(®, &vm.ScratchPad, eMask) + } else { + byteCode.Execute(®, &vm.ScratchPad, eMask) + } mem.mx ^= reg.R[readReg[2]] ^ reg.R[readReg[3]] mem.mx &= CacheLineAlignMask @@ -183,9 +199,10 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile { // final loop executes here reg := vm.Run(tempHash, roundingMode) - roundingMode = reg.FPRC + // always force a restore + reg.FPRC = 0xff - //restore rounding mode + // restore rounding mode to 0 SetRoundingMode(®, 0) return reg @@ -214,3 +231,10 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) { hash256.Sum(output[:0]) } + +func (vm *VM) Close() error { + if vm.JITProgram != nil { + return vm.JITProgram.Close() + } + return nil +} diff --git a/vm_bytecode.go b/vm_bytecode.go index 556dd3b..8865a5e 100644 --- a/vm_bytecode.go +++ b/vm_bytecode.go @@ -31,7 +31,7 @@ type ByteCodeInstruction struct { } func (i ByteCodeInstruction) jumpTarget() int { - return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst))) + return int(int16((uint16(i.ImmB) << 8) | uint16(i.Src))) } func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 { diff --git a/vm_bytecode_jit_amd64.go b/vm_bytecode_jit_amd64.go new file mode 100644 index 0000000..32f68c5 --- /dev/null +++ b/vm_bytecode_jit_amd64.go @@ -0,0 +1,312 @@ +//go:build unix && amd64 && !disable_jit && !purego + +package randomx + +import ( + "encoding/binary" + "math/bits" + "unsafe" +) + +//go:noescape +func vm_run(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64, jmp uintptr) + +func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) { + if f == nil { + panic("program is nil") + } + + jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f))) + vm_run(rf, pad, eMask, jmpPtr) +} + +func (c *ByteCode) generateCode(program []byte) { + program = program[:0] + + var instructionOffsets [RANDOMX_PROGRAM_SIZE]int32 + var codePos int32 + + for ix := range c { + instructionOffsets[ix] = codePos + curLen := len(program) + + instr := &c[ix] + switch instr.Opcode { + + case VM_IADD_RS: + program = append(program, REX_LEA...) + if instr.Dst == RegisterNeedsDisplacement { + program = append(program, 0xac) + } else { + program = append(program, 0x04+8*instr.Dst) + } + program = append(program, genSIB(int(instr.ImmB), int(instr.Src), int(instr.Dst))) + if instr.Dst == RegisterNeedsDisplacement { + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + } + + case VM_IADD_M: + program = genAddressReg(program, instr, true) + program = append(program, REX_ADD_RM...) + program = append(program, 0x04+8*instr.Dst) + program = append(program, 0x06) + case VM_IADD_MZ: + program = append(program, REX_ADD_RM...) + program = append(program, 0x86+8*instr.Dst) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + + case VM_ISUB_R: + program = append(program, REX_SUB_RR...) + program = append(program, 0xc0+8*instr.Dst+instr.Src) + case VM_ISUB_I: + program = append(program, REX_81...) + program = append(program, 0xe8+instr.Dst) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + + case VM_ISUB_M: + program = genAddressReg(program, instr, true) + program = append(program, REX_SUB_RM...) + program = append(program, 0x04+8*instr.Dst) + program = append(program, 0x06) + case VM_ISUB_MZ: + program = append(program, REX_SUB_RM...) + program = append(program, 0x86+8*instr.Dst) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + + case VM_IMUL_R: + program = append(program, REX_IMUL_RR...) + program = append(program, 0xc0+8*instr.Dst+instr.Src) + case VM_IMUL_I: + // also handles imul_rcp, with 64-bit special + if bits.Len64(instr.Imm) > 32 { + program = append(program, MOV_RAX_I...) + program = binary.LittleEndian.AppendUint64(program, instr.Imm) + program = append(program, REX_IMUL_RM...) + program = append(program, 0xc0+8*instr.Dst) + } else { + program = append(program, REX_IMUL_RRI...) + program = append(program, 0xc0+9*instr.Dst) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + } + + case VM_IMUL_M: + program = genAddressReg(program, instr, true) + program = append(program, REX_IMUL_RM...) + program = append(program, 0x04+8*instr.Dst) + program = append(program, 0x06) + case VM_IMUL_MZ: + program = append(program, REX_IMUL_RM...) + program = append(program, 0x86+8*instr.Dst) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + + case VM_IMULH_R: + program = append(program, REX_MOV_RR64...) + program = append(program, 0xc0+instr.Dst) + program = append(program, REX_MUL_R...) + program = append(program, 0xe0+instr.Src) + program = append(program, REX_MOV_R64R...) + program = append(program, 0xc2+8*instr.Dst) + + case VM_IMULH_M: + program = genAddressReg(program, instr, false) + program = append(program, REX_MOV_RR64...) + program = append(program, 0xc0+instr.Dst) + program = append(program, REX_MUL_MEM...) + program = append(program, REX_MOV_R64R...) + program = append(program, 0xc2+8*instr.Dst) + case VM_IMULH_MZ: + program = append(program, REX_MOV_RR64...) + program = append(program, 0xc0+instr.Dst) + program = append(program, REX_MUL_M...) + program = append(program, 0xa6) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + program = append(program, REX_MOV_R64R...) + program = append(program, 0xc2+8*instr.Dst) + + case VM_ISMULH_R: + program = append(program, REX_MOV_RR64...) + program = append(program, 0xc0+instr.Dst) + program = append(program, REX_MUL_R...) + program = append(program, 0xe8+instr.Src) + program = append(program, REX_MOV_R64R...) + program = append(program, 0xc2+8*instr.Dst) + + case VM_ISMULH_M: + program = genAddressReg(program, instr, false) + program = append(program, REX_MOV_RR64...) + program = append(program, 0xc0+instr.Dst) + program = append(program, REX_IMUL_MEM...) + program = append(program, REX_MOV_R64R...) + program = append(program, 0xc2+8*instr.Dst) + case VM_ISMULH_MZ: + program = append(program, REX_MOV_RR64...) + program = append(program, 0xc0+instr.Dst) + program = append(program, REX_MUL_M...) + program = append(program, 0xae) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + program = append(program, REX_MOV_R64R...) + program = append(program, 0xc2+8*instr.Dst) + + case VM_INEG_R: + program = append(program, REX_NEG...) + program = append(program, 0xd8+instr.Dst) + + case VM_IXOR_R: + program = append(program, REX_XOR_RR...) + program = append(program, 0xc0+8*instr.Dst+instr.Src) + case VM_IXOR_I: + program = append(program, REX_XOR_RI...) + program = append(program, 0xf0+instr.Dst) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + + case VM_IXOR_M: + program = genAddressReg(program, instr, true) + program = append(program, REX_XOR_RM...) + program = append(program, 0x04+8*instr.Dst) + program = append(program, 0x06) + case VM_IXOR_MZ: + program = append(program, REX_XOR_RM...) + program = append(program, 0x86+8*instr.Dst) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + + case VM_IROR_R: + program = append(program, REX_MOV_RR...) + program = append(program, 0xc8+instr.Src) + program = append(program, REX_ROT_CL...) + program = append(program, 0xc8+instr.Dst) + case VM_IROR_I: + program = append(program, REX_ROT_I8...) + program = append(program, 0xc8+instr.Dst) + program = append(program, byte(instr.Imm&63)) + + case VM_IROL_R: + program = append(program, REX_MOV_RR...) + program = append(program, 0xc8+instr.Src) + program = append(program, REX_ROT_CL...) + program = append(program, 0xc0+instr.Dst) + case VM_IROL_I: + program = append(program, REX_ROT_I8...) + program = append(program, 0xc0+instr.Dst) + program = append(program, byte(instr.Imm&63)) + + case VM_ISWAP_R: + program = append(program, REX_XCHG...) + program = append(program, 0xc0+instr.Src+8*instr.Dst) + + case VM_FSWAP_RF: + program = append(program, SHUFPD...) + program = append(program, 0xc0+9*instr.Dst) + program = append(program, 1) + case VM_FSWAP_RE: + program = append(program, SHUFPD...) + program = append(program, 0xc0+9*(instr.Dst+RegistersCountFloat)) + program = append(program, 1) + + case VM_FADD_R: + program = append(program, REX_ADDPD...) + program = append(program, 0xc0+instr.Src+8*instr.Dst) + + case VM_FADD_M: + program = genAddressReg(program, instr, true) + program = append(program, REX_CVTDQ2PD_XMM12...) + program = append(program, REX_ADDPD...) + program = append(program, 0xc4+8*instr.Dst) + + case VM_FSUB_R: + program = append(program, REX_SUBPD...) + program = append(program, 0xc0+instr.Src+8*instr.Dst) + + case VM_FSUB_M: + program = genAddressReg(program, instr, true) + program = append(program, REX_CVTDQ2PD_XMM12...) + program = append(program, REX_SUBPD...) + program = append(program, 0xc4+8*instr.Dst) + + case VM_FSCAL_R: + program = append(program, REX_XORPS...) + program = append(program, 0xc7+8*instr.Dst) + + case VM_FMUL_R: + program = append(program, REX_MULPD...) + program = append(program, 0xe0+instr.Src+8*instr.Dst) + + case VM_FDIV_M: + program = genAddressReg(program, instr, true) + program = append(program, REX_CVTDQ2PD_XMM12...) + program = append(program, REX_ANDPS_XMM12...) + program = append(program, REX_DIVPD...) + program = append(program, 0xe4+8*instr.Dst) + + case VM_FSQRT_R: + program = append(program, SQRTPD...) + program = append(program, 0xe4+9*instr.Dst) + + case VM_CFROUND: + program = append(program, REX_MOV_RR64...) + program = append(program, 0xc0+instr.Src) + rotate := byte((13 - instr.Imm) & 63) + if rotate != 0 { + program = append(program, ROL_RAX...) + program = append(program, rotate) + } + program = append(program, AND_OR_MOV_LDMXCSR...) + case VM_CBRANCH: + reg := instr.Dst + target := instr.jumpTarget() + 1 + + jmpOffset := instructionOffsets[target] - (codePos + 16) + + if BranchesWithin32B { + branchBegin := uint32(codePos + 7) + branchEnd := branchBegin + if jmpOffset >= -128 { + branchEnd += 9 + } else { + branchEnd += 13 + } + // If the jump crosses or touches 32-byte boundary, align it + if (branchBegin ^ branchEnd) >= 32 { + alignmentSize := 32 - (branchBegin & 31) + alignmentSize -= alignmentSize + + program = append(program, JMP_ALIGN_PREFIX[alignmentSize]...) + } + } + program = append(program, REX_ADD_I...) + program = append(program, 0xc0+reg) + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + + program = append(program, REX_TEST...) + program = append(program, 0xc0+reg) + program = binary.LittleEndian.AppendUint32(program, instr.MemMask) + + if jmpOffset >= -128 { + program = append(program, JZ_SHORT) + program = append(program, byte(jmpOffset)) + } else { + program = append(program, JZ...) + program = binary.LittleEndian.AppendUint32(program, uint32(jmpOffset-4)) + } + + case VM_ISTORE: + //genAddressRegDst + program = append(program, LEA_32...) + program = append(program, 0x80+instr.Dst) + if instr.Dst == RegisterNeedsSib { + program = append(program, 0x24) + } + program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm)) + program = append(program, AND_EAX_I) + program = binary.LittleEndian.AppendUint32(program, instr.MemMask) + + program = append(program, REX_MOV_MR...) + program = append(program, 0x04+8*instr.Src) + program = append(program, 0x06) + case VM_NOP: + program = append(program, NOP1...) + } + + codePos += int32(len(program) - curLen) + } + program = append(program, RET) +} diff --git a/vm_bytecode_jit_amd64.s b/vm_bytecode_jit_amd64.s new file mode 100644 index 0000000..708581c --- /dev/null +++ b/vm_bytecode_jit_amd64.s @@ -0,0 +1,91 @@ +//go:build unix && amd64 && !disable_jit && !purego + +#include "textflag.h" + +TEXT ·vm_run(SB),$8-40 + + // move register file to registers + MOVQ rf+0(FP), AX + + PREFETCHNTA 0(AX) + // r0-r7 + MOVQ (0*8)(AX), R8 + MOVQ (1*8)(AX), R9 + MOVQ (2*8)(AX), R10 + MOVQ (3*8)(AX), R11 + MOVQ (4*8)(AX), R12 + MOVQ (5*8)(AX), R13 + MOVQ (6*8)(AX), R14 + MOVQ (7*8)(AX), R15 + + // f0-f3 + VMOVUPD (8*8)(AX), X0 + VMOVUPD (10*8)(AX), X1 + VMOVUPD (12*8)(AX), X2 + VMOVUPD (14*8)(AX), X3 + // e0-e3 + VMOVUPD (16*8)(AX), X4 + VMOVUPD (18*8)(AX), X5 + VMOVUPD (20*8)(AX), X6 + VMOVUPD (22*8)(AX), X7 + // a0-a3 + VMOVUPD (24*8)(AX), X8 + VMOVUPD (26*8)(AX), X9 + VMOVUPD (28*8)(AX), X10 + VMOVUPD (30*8)(AX), X11 + + //TODO: rest of init + + // mantissa mask + //VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13 + MOVQ $0x00ffffffffffffff, AX + VMOVQ AX, X13 + VPBROADCASTQ X13, X13 + + // eMask + VMOVDQU64 eMask+16(FP), X14 + + // scale mask + //VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15 + MOVQ $0x80F0000000000000, AX + VMOVQ AX, X15 + VPBROADCASTQ X15, X15 + + // scratchpad pointer + MOVQ pad+8(FP), SI + + // JIT location + MOVQ jmp+32(FP), AX + + // jump to JIT code + CALL AX + + + // move register file back to registers + MOVQ rf+0(FP), AX + + PREFETCHT0 0(AX) + // r0-r7 + MOVQ R8, (0*8)(AX) + MOVQ R9, (1*8)(AX) + MOVQ R10, (2*8)(AX) + MOVQ R11, (3*8)(AX) + MOVQ R12, (4*8)(AX) + MOVQ R13, (5*8)(AX) + MOVQ R14, (6*8)(AX) + MOVQ R15, (7*8)(AX) + + // f0-f3 + VMOVUPD X0, (8*8)(AX) + VMOVUPD X1, (10*8)(AX) + VMOVUPD X2, (12*8)(AX) + VMOVUPD X3, (14*8)(AX) + // e0-e3 + VMOVUPD X4, (16*8)(AX) + VMOVUPD X5, (18*8)(AX) + VMOVUPD X6, (20*8)(AX) + VMOVUPD X7, (22*8)(AX) + + // a0-a3 are constant, no need to move + + RET diff --git a/vm_bytecode_jit_generic.go b/vm_bytecode_jit_generic.go new file mode 100644 index 0000000..915c989 --- /dev/null +++ b/vm_bytecode_jit_generic.go @@ -0,0 +1,11 @@ +//go:build !unix || !amd64 || disable_jit || purego + +package randomx + +func (c *ByteCode) generateCode(program []byte) { + +} + +func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) { + +} diff --git a/vm_bytecode_native.go b/vm_bytecode_native.go index 8c5a148..dc25404 100644 --- a/vm_bytecode_native.go +++ b/vm_bytecode_native.go @@ -13,7 +13,7 @@ import ( // It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions // Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) { - for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ { + for pc := 0; pc < len(c); pc++ { i := &c[pc] switch i.Opcode { case VM_NOP: // we do nothing @@ -111,8 +111,8 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) { SetRoundingMode(f, uint8(tmp)) case VM_CBRANCH: - f.R[i.Src] += i.Imm - if (f.R[i.Src] & uint64(i.MemMask)) == 0 { + f.R[i.Dst] += i.Imm + if (f.R[i.Dst] & uint64(i.MemMask)) == 0 { pc = i.jumpTarget() } case VM_ISTORE: diff --git a/vm_bytecode_purego.go b/vm_bytecode_purego.go index 65b5722..d78582e 100644 --- a/vm_bytecode_purego.go +++ b/vm_bytecode_purego.go @@ -12,7 +12,7 @@ import ( // It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions // Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) { - for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ { + for pc := 0; pc < len(c); pc++ { i := &c[pc] switch i.Opcode { case VM_NOP: // we do nothing @@ -110,8 +110,8 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) { SetRoundingMode(f, uint8(tmp)) case VM_CBRANCH: - f.R[i.Src] += i.Imm - if (f.R[i.Src] & uint64(i.MemMask)) == 0 { + f.R[i.Dst] += i.Imm + if (f.R[i.Dst] & uint64(i.MemMask)) == 0 { pc = i.jumpTarget() } case VM_ISTORE: diff --git a/vm_instruction.go b/vm_instruction.go index 66192b7..46c8d26 100644 --- a/vm_instruction.go +++ b/vm_instruction.go @@ -70,7 +70,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) { registerUsage[i] = -1 } - for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ { + for i := 0; i < len(bc); i++ { instr := VM_Instruction(prog[i*8:]) ibc := &bc[i] @@ -312,10 +312,12 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) { case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged ibc.Opcode = VM_CBRANCH - ibc.Src = instr.Dst() % RegistersCount + //TODO:??? it's +1 on other + ibc.Dst = instr.Dst() % RegistersCount - target := uint16(int16(registerUsage[ibc.Src])) - ibc.Dst = uint8(target) + target := uint16(int16(registerUsage[ibc.Dst])) + // set target! + ibc.Src = uint8(target) ibc.ImmB = uint8(target >> 8) shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET