amd64: Implemented VM JIT
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
d72726b0fe
commit
d20dd880ce
27
README.md
27
README.md
|
@ -8,21 +8,22 @@ This package implements RandomX without CGO, using only Golang code, pure float6
|
|||
|
||||
All test cases pass properly.
|
||||
|
||||
Uses minimal Go assembly due to having to set rounding mode natively. Native hard float can be added with supporting rounding mode under _asm_.
|
||||
|
||||
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime.
|
||||
|
||||
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.
|
||||
|
||||
| Platform | Supported | Hard Float | SuperScalar JIT | Notes |
|
||||
|:-----------:|:---------:|:----------:|:---------------:|:----------------:|
|
||||
| **386** | ✅ | ✅ | ❌ | |
|
||||
| **amd64** | ✅ | ✅ | ✅* | JIT only on Unix |
|
||||
| **arm** | ✅* | ❌ | ❌ | |
|
||||
| **arm64** | ✅ | ✅ | ❌ | |
|
||||
| **mips** | ✅* | ❌ | ❌ | |
|
||||
| **mips64** | ✅* | ❌ | ❌ | |
|
||||
| **riscv64** | ✅* | ❌ | ❌ | |
|
||||
| **wasm** | ✅* | ❌ | ❌ | |
|
||||
| Platform | Hard Float | Hard AES | JIT | Native | purego | Notes |
|
||||
|:-----------:|:----------:|:--------:|:---:|:------:|:------:|:----------------:|
|
||||
| **386** | ✅ | ❌ | ❌ | ✅ | ✅ | |
|
||||
| **amd64** | ✅ | ✅ | ✅* | ✅ | ✅ | JIT only on Unix |
|
||||
| **arm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **arm64** | ✅ | ❌ | ❌ | ✅ | ✅ | |
|
||||
| **mips** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **mips64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **riscv64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **wasm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
|
||||
* these platforms only support software floating point / purego and will not be performant.
|
||||
|
||||
Any platform with no hard float support (soft float using [softfloat64](git.gammaspectra.live/P2Pool/softfloat64)) will be vastly slow.
|
||||
|
||||
Native hard float can be added with supporting rounding mode under _asm_.
|
|
@ -8,18 +8,23 @@ import (
|
|||
_ "unsafe"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesdec
|
||||
func hard_aesdec(state *[4]uint32, key *[4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesenc
|
||||
func hard_aesenc(state *[4]uint32, key *[4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesroundtrip_decenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_decenc
|
||||
func hard_aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesroundtrip_encdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec
|
||||
func hard_aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesroundtrip_encdec1 git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec1
|
||||
func hard_aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32)
|
||||
|
||||
|
@ -45,10 +50,10 @@ func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
|
|||
if supportsAES {
|
||||
hard_aesroundtrip_decenc(states, keys)
|
||||
} else {
|
||||
aesdec(&states[0], &keys[0])
|
||||
aesenc(&states[1], &keys[1])
|
||||
aesdec(&states[2], &keys[2])
|
||||
aesenc(&states[3], &keys[3])
|
||||
soft_aesdec(&states[0], &keys[0])
|
||||
soft_aesenc(&states[1], &keys[1])
|
||||
soft_aesdec(&states[2], &keys[2])
|
||||
soft_aesenc(&states[3], &keys[3])
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -56,10 +61,10 @@ func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
|
|||
if supportsAES {
|
||||
hard_aesroundtrip_encdec(states, keys)
|
||||
} else {
|
||||
aesenc(&states[0], &keys[0])
|
||||
aesdec(&states[1], &keys[1])
|
||||
aesenc(&states[2], &keys[2])
|
||||
aesdec(&states[3], &keys[3])
|
||||
soft_aesenc(&states[0], &keys[0])
|
||||
soft_aesdec(&states[1], &keys[1])
|
||||
soft_aesenc(&states[2], &keys[2])
|
||||
soft_aesdec(&states[3], &keys[3])
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,9 +72,9 @@ func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
|
|||
if supportsAES {
|
||||
hard_aesroundtrip_encdec1(states, key)
|
||||
} else {
|
||||
aesenc(&states[0], key)
|
||||
aesdec(&states[1], key)
|
||||
aesenc(&states[2], key)
|
||||
aesdec(&states[3], key)
|
||||
soft_aesenc(&states[0], key)
|
||||
soft_aesdec(&states[1], key)
|
||||
soft_aesenc(&states[2], key)
|
||||
soft_aesdec(&states[3], key)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,43 +5,43 @@
|
|||
TEXT ·aesenc(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ key+8(FP), BX
|
||||
MOVUPS 0(AX), X0
|
||||
MOVUPS 0(BX), X1
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
AESENC X1, X0
|
||||
MOVUPS X0, 0(AX)
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
RET
|
||||
|
||||
TEXT ·aesdec(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ key+8(FP), BX
|
||||
MOVUPS 0(AX), X0
|
||||
MOVUPS 0(BX), X1
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
AESDEC X1, X0
|
||||
MOVUPS X0, 0(AX)
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
RET
|
||||
|
||||
TEXT ·aesroundtrip_decenc(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
|
||||
MOVUPS 0(AX), X0
|
||||
MOVUPS 0(BX), X1
|
||||
MOVUPS 16(AX), X2
|
||||
MOVUPS 16(BX), X3
|
||||
MOVUPS 32(AX), X4
|
||||
MOVUPS 32(BX), X5
|
||||
MOVUPS 48(AX), X6
|
||||
MOVUPS 48(BX), X7
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
VMOVDQU32 16(AX), X2
|
||||
VMOVDQU32 16(BX), X3
|
||||
VMOVDQU32 32(AX), X4
|
||||
VMOVDQU32 32(BX), X5
|
||||
VMOVDQU32 48(AX), X6
|
||||
VMOVDQU32 48(BX), X7
|
||||
|
||||
AESDEC X1, X0
|
||||
AESENC X3, X2
|
||||
AESDEC X5, X4
|
||||
AESENC X7, X6
|
||||
|
||||
MOVUPS X0, 0(AX)
|
||||
MOVUPS X2, 16(AX)
|
||||
MOVUPS X4, 32(AX)
|
||||
MOVUPS X6, 48(AX)
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
VMOVDQU32 X2, 16(AX)
|
||||
VMOVDQU32 X4, 32(AX)
|
||||
VMOVDQU32 X6, 48(AX)
|
||||
RET
|
||||
|
||||
|
||||
|
@ -49,24 +49,24 @@ TEXT ·aesroundtrip_encdec(SB),NOSPLIT|NOFRAME,$0-16
|
|||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
|
||||
MOVUPS 0(AX), X0
|
||||
MOVUPS 0(BX), X1
|
||||
MOVUPS 16(AX), X2
|
||||
MOVUPS 16(BX), X3
|
||||
MOVUPS 32(AX), X4
|
||||
MOVUPS 32(BX), X5
|
||||
MOVUPS 48(AX), X6
|
||||
MOVUPS 48(BX), X7
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
VMOVDQU32 16(AX), X2
|
||||
VMOVDQU32 16(BX), X3
|
||||
VMOVDQU32 32(AX), X4
|
||||
VMOVDQU32 32(BX), X5
|
||||
VMOVDQU32 48(AX), X6
|
||||
VMOVDQU32 48(BX), X7
|
||||
|
||||
AESENC X1, X0
|
||||
AESDEC X3, X2
|
||||
AESENC X5, X4
|
||||
AESDEC X7, X6
|
||||
|
||||
MOVUPS X0, 0(AX)
|
||||
MOVUPS X2, 16(AX)
|
||||
MOVUPS X4, 32(AX)
|
||||
MOVUPS X6, 48(AX)
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
VMOVDQU32 X2, 16(AX)
|
||||
VMOVDQU32 X4, 32(AX)
|
||||
VMOVDQU32 X6, 48(AX)
|
||||
RET
|
||||
|
||||
|
||||
|
@ -74,20 +74,20 @@ TEXT ·aesroundtrip_encdec1(SB),NOSPLIT|NOFRAME,$0-16
|
|||
MOVQ states+0(FP), AX
|
||||
MOVQ key+8(FP), BX
|
||||
|
||||
MOVUPS 0(BX), X0
|
||||
MOVUPS 0(AX), X1
|
||||
MOVUPS 16(AX), X2
|
||||
MOVUPS 32(AX), X3
|
||||
MOVUPS 48(AX), X4
|
||||
VMOVDQU32 0(BX), X0
|
||||
VMOVDQU32 0(AX), X1
|
||||
VMOVDQU32 16(AX), X2
|
||||
VMOVDQU32 32(AX), X3
|
||||
VMOVDQU32 48(AX), X4
|
||||
|
||||
AESENC X0, X1
|
||||
AESDEC X0, X2
|
||||
AESENC X0, X3
|
||||
AESDEC X0, X4
|
||||
|
||||
MOVUPS X1, 0(AX)
|
||||
MOVUPS X2, 16(AX)
|
||||
MOVUPS X3, 32(AX)
|
||||
MOVUPS X4, 48(AX)
|
||||
VMOVDQU32 X1, 0(AX)
|
||||
VMOVDQU32 X2, 16(AX)
|
||||
VMOVDQU32 X3, 32(AX)
|
||||
VMOVDQU32 X4, 48(AX)
|
||||
RET
|
||||
|
||||
|
|
7
asm/cpuid_amd64.go
Normal file
7
asm/cpuid_amd64.go
Normal file
|
@ -0,0 +1,7 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
func Cpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
func Cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
||||
func Xgetbv(index uint32) (eax, edx uint32)
|
34
asm/cpuid_amd64.s
Normal file
34
asm/cpuid_amd64.s
Normal file
|
@ -0,0 +1,34 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func Cpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·Cpuid(SB), 7, $0
|
||||
XORQ CX, CX
|
||||
MOVL op+0(FP), AX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func Cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·Cpuidex(SB), 7, $0
|
||||
MOVL op+0(FP), AX
|
||||
MOVL op2+4(FP), CX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
// func xgetbv(index uint32) (eax, edx uint32)
|
||||
TEXT ·Xgetbv(SB), 7, $0
|
||||
MOVL index+0(FP), CX
|
||||
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL DX, edx+12(FP)
|
||||
RET
|
9
cache.go
9
cache.go
|
@ -40,11 +40,18 @@ func (cache *Randomx_Cache) HasJIT() bool {
|
|||
|
||||
func (cache *Randomx_Cache) VM_Initialize() *VM {
|
||||
|
||||
return &VM{
|
||||
vm := &VM{
|
||||
Dataset: &Randomx_DatasetLight{
|
||||
Cache: cache,
|
||||
},
|
||||
}
|
||||
if cache.HasJIT() {
|
||||
vm.JITProgram = mapProgram(nil, int(RandomXCodeSize))
|
||||
if cache.Flags&RANDOMX_FLAG_SECURE == 0 {
|
||||
mapProgramRWX(vm.JITProgram)
|
||||
}
|
||||
}
|
||||
return vm
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Close() error {
|
||||
|
|
14
config.go
14
config.go
|
@ -106,8 +106,18 @@ const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
|
|||
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
|
||||
const STOREL3CONDITION = 14
|
||||
|
||||
const RANDOMX_FLAG_DEFAULT = uint64(0)
|
||||
const RANDOMX_FLAG_JIT = uint64(1 << iota)
|
||||
const RANDOMX_FLAG_DEFAULT = 0
|
||||
|
||||
const (
|
||||
RANDOMX_FLAG_LARGE_PAGES = 1 << iota
|
||||
RANDOMX_FLAG_HARD_AES
|
||||
RANDOMX_FLAG_FULL_MEM
|
||||
RANDOMX_FLAG_JIT
|
||||
RANDOMX_FLAG_SECURE
|
||||
RANDOMX_FLAG_ARGON2_SSSE3
|
||||
RANDOMX_FLAG_ARGON2_AVX2
|
||||
RANDOMX_FLAG_ARGON2
|
||||
)
|
||||
|
||||
func isZeroOrPowerOf2(x uint32) bool {
|
||||
return (x & (x - 1)) == 0
|
||||
|
|
|
@ -4,4 +4,5 @@ type Randomx_Dataset interface {
|
|||
InitDataset(startItem, endItem uint64)
|
||||
ReadDataset(address uint64, r, cache *RegisterLine)
|
||||
PrefetchDataset(address uint64)
|
||||
Flags() uint64
|
||||
}
|
||||
|
|
|
@ -21,6 +21,10 @@ func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLin
|
|||
}
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) Flags() uint64 {
|
||||
return d.Cache.Flags
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) {
|
||||
//d.Cache.initDataset(d.Cache.Programs)
|
||||
}
|
||||
|
|
2
exec.go
2
exec.go
|
@ -1,3 +1,5 @@
|
|||
package randomx
|
||||
|
||||
type SuperScalarProgramFunc []byte
|
||||
|
||||
type VMProgramFunc []byte
|
||||
|
|
|
@ -5,3 +5,24 @@ package randomx
|
|||
func (f SuperScalarProgramFunc) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f VMProgramFunc) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func mapProgram(program []byte, size int) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func mapProgramRW(execFunc []byte) {
|
||||
|
||||
}
|
||||
|
||||
func mapProgramRX(execFunc []byte) {
|
||||
|
||||
}
|
||||
|
||||
// mapProgramRWX insecure!
|
||||
func mapProgramRWX(execFunc []byte) {
|
||||
|
||||
}
|
||||
|
|
|
@ -9,10 +9,56 @@ import (
|
|||
func (f SuperScalarProgramFunc) Close() error {
|
||||
return unix.Munmap(f)
|
||||
}
|
||||
func (f VMProgramFunc) Close() error {
|
||||
return unix.Munmap(f)
|
||||
}
|
||||
|
||||
func mapProgram(program []byte) []byte {
|
||||
// Write only
|
||||
execFunc, err := unix.Mmap(-1, 0, len(program), unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
|
||||
func mapProgramRW(execFunc []byte) {
|
||||
err := unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_WRITE)
|
||||
if err != nil {
|
||||
defer func() {
|
||||
// unmap if we err
|
||||
err := unix.Munmap(execFunc)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func mapProgramRX(execFunc []byte) {
|
||||
err := unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_EXEC)
|
||||
if err != nil {
|
||||
defer func() {
|
||||
// unmap if we err
|
||||
err := unix.Munmap(execFunc)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// mapProgramRWX insecure!
|
||||
func mapProgramRWX(execFunc []byte) {
|
||||
err := unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_WRITE|unix.PROT_EXEC)
|
||||
if err != nil {
|
||||
defer func() {
|
||||
// unmap if we err
|
||||
err := unix.Munmap(execFunc)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func mapProgram(program []byte, size int) []byte {
|
||||
// Read and Write only
|
||||
execFunc, err := unix.Mmap(-1, 0, max(size, len(program)), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
|
176
jit_amd64.go
176
jit_amd64.go
|
@ -2,6 +2,12 @@
|
|||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
REGISTER ALLOCATION:
|
||||
|
@ -11,7 +17,7 @@ package randomx
|
|||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> scratchpad pointer
|
||||
; rdi -> return address // dataset pointer
|
||||
; rdi -> (not used)
|
||||
; rbp -> (do not use, it's used by Golang sampling) jump target //todo: memory registers "ma" (high 32 bits), "mx" (low 32 bits)
|
||||
; rsp -> stack pointer
|
||||
; r8 -> "r0"
|
||||
|
@ -134,7 +140,7 @@ var CALL = 0xe8
|
|||
var REX_ADD_I = []byte{0x49, 0x81}
|
||||
var REX_TEST = []byte{0x49, 0xF7}
|
||||
var JZ = []byte{0x0f, 0x84}
|
||||
var JZ_SHORT = 0x74
|
||||
var JZ_SHORT byte = 0x74
|
||||
|
||||
var RET byte = 0xc3
|
||||
|
||||
|
@ -151,6 +157,172 @@ var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
|
|||
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
|
||||
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||
|
||||
var JMP_ALIGN_PREFIX = [14][]byte{
|
||||
{},
|
||||
{0x2E},
|
||||
{0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
func genAddressReg(buf []byte, instr *ByteCodeInstruction, rax bool) []byte {
|
||||
buf = append(buf, LEA_32...)
|
||||
if rax {
|
||||
buf = append(buf, 0x80+instr.Src+0)
|
||||
} else {
|
||||
buf = append(buf, 0x80+instr.Src+8)
|
||||
}
|
||||
if instr.Src == RegisterNeedsSib {
|
||||
buf = append(buf, 0x24)
|
||||
}
|
||||
buf = binary.LittleEndian.AppendUint32(buf, uint32(instr.Imm))
|
||||
if rax {
|
||||
buf = append(buf, AND_EAX_I)
|
||||
} else {
|
||||
buf = append(buf, AND_ECX_I...)
|
||||
}
|
||||
buf = binary.LittleEndian.AppendUint32(buf, instr.MemMask)
|
||||
return buf
|
||||
}
|
||||
|
||||
func valAsString(values ...uint32) []byte {
|
||||
r := make([]byte, 4*len(values))
|
||||
for i, v := range values {
|
||||
dst := r[i*4:]
|
||||
dst[0] = byte(v & 0xff)
|
||||
dst[1] = byte((v >> 8) & 0xff)
|
||||
dst[2] = byte((v >> 16) & 0xff)
|
||||
dst[3] = byte((v >> 24) & 0xff)
|
||||
switch {
|
||||
case dst[0] == 0:
|
||||
return r[:i*4]
|
||||
case dst[1] == 0:
|
||||
return r[:i*4+1]
|
||||
case dst[2] == 0:
|
||||
return r[:i*4+2]
|
||||
case dst[3] == 0:
|
||||
return r[:i*4+3]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func familyModel(maxFunctionId uint32) (family, model, stepping int) {
|
||||
if maxFunctionId < 0x1 {
|
||||
return 0, 0, 0
|
||||
}
|
||||
eax, _, _, _ := asm.Cpuid(1)
|
||||
// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
|
||||
family = int((eax >> 8) & 0xf)
|
||||
extFam := family == 0x6 // Intel is 0x6, needs extended model.
|
||||
if family == 0xf {
|
||||
// Add ExtFamily
|
||||
family += int((eax >> 20) & 0xff)
|
||||
extFam = true
|
||||
}
|
||||
// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
|
||||
model = int((eax >> 4) & 0xf)
|
||||
if extFam {
|
||||
// Add ExtModel
|
||||
model += int((eax >> 12) & 0xf0)
|
||||
}
|
||||
stepping = int(eax & 0xf)
|
||||
return family, model, stepping
|
||||
}
|
||||
|
||||
var BranchesWithin32B = func() bool {
|
||||
a, b, c, d := asm.Cpuid(0)
|
||||
v := string(valAsString(b, d, c))
|
||||
|
||||
if v == "GenuineIntel" {
|
||||
family, model, stepping := familyModel(a)
|
||||
|
||||
// Intel JCC erratum mitigation
|
||||
if family == 6 {
|
||||
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||
return ((model == 0x4E) && (stepping == 0x3)) ||
|
||||
((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) ||
|
||||
((model == 0x5E) && (stepping == 0x3)) ||
|
||||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||
((model == 0xA6) && (stepping == 0x0)) ||
|
||||
((model == 0xAE) && (stepping == 0xA))
|
||||
}
|
||||
}
|
||||
return false
|
||||
}()
|
||||
|
||||
/*
|
||||
;# callee-saved registers - Microsoft x64 calling convention
|
||||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm6
|
||||
movdqu xmmword ptr [rsp+48], xmm7
|
||||
movdqu xmmword ptr [rsp+32], xmm8
|
||||
movdqu xmmword ptr [rsp+16], xmm9
|
||||
movdqu xmmword ptr [rsp+0], xmm10
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm11
|
||||
movdqu xmmword ptr [rsp+48], xmm12
|
||||
movdqu xmmword ptr [rsp+32], xmm13
|
||||
movdqu xmmword ptr [rsp+16], xmm14
|
||||
movdqu xmmword ptr [rsp+0], xmm15
|
||||
|
||||
;# function arguments
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||
mov rsi, r8 ;# uint8_t* scratchpad
|
||||
mov rbx, r9 ;# loop counter
|
||||
|
||||
mov rax, rbp
|
||||
ror rbp, 32
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
||||
|
||||
movapd xmm13, xmmword ptr [mantissaMask]
|
||||
movapd xmm14, xmmword ptr [exp240]
|
||||
movapd xmm15, xmmword ptr [scaleMask]
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
jmp rx_program_loop_begin
|
||||
*/
|
||||
var randomx_program_prologue = bytes.Repeat(NOP1, 64)
|
||||
|
||||
var randomx_program_loop_begin = bytes.Repeat(NOP1, 64)
|
||||
|
|
5
jit_generic.go
Normal file
5
jit_generic.go
Normal file
|
@ -0,0 +1,5 @@
|
|||
//go:build !unix || !amd64 || disable_jit || purego
|
||||
|
||||
package randomx
|
||||
|
||||
var RandomXCodeSize uint64 = 0
|
|
@ -63,6 +63,7 @@ func Test_Randomx(t *testing.T) {
|
|||
}()
|
||||
|
||||
vm := c.VM_Initialize()
|
||||
defer vm.Close()
|
||||
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
|
@ -92,6 +93,7 @@ func Benchmark_RandomX(b *testing.B) {
|
|||
}()
|
||||
|
||||
vm := c.VM_Initialize()
|
||||
defer vm.Close()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var output_hash [32]byte
|
||||
|
@ -119,6 +121,7 @@ func Benchmark_RandomXParallel(b *testing.B) {
|
|||
b.RunParallel(func(pb *testing.PB) {
|
||||
var output_hash [32]byte
|
||||
vm := c.VM_Initialize()
|
||||
defer vm.Close()
|
||||
|
||||
for pb.Next() {
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
|
|
|
@ -702,7 +702,10 @@ type Register struct {
|
|||
//RegisterNeedsSib = 4; //x86 r12 register
|
||||
}
|
||||
|
||||
// RegisterNeedsDisplacement x86 r13 register
|
||||
const RegisterNeedsDisplacement = 5
|
||||
|
||||
// RegisterNeedsSib x86 r12 register
|
||||
const RegisterNeedsSib = 4
|
||||
|
||||
func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters []int, cycle int, Registers []Register, gen *Blake2Generator) bool {
|
||||
|
|
|
@ -4,7 +4,6 @@ package randomx
|
|||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
|
@ -17,21 +16,6 @@ func (f SuperScalarProgramFunc) Execute(rf uintptr) {
|
|||
}
|
||||
|
||||
superscalar_run(rf, uintptr(unsafe.Pointer(unsafe.SliceData(f))))
|
||||
return
|
||||
|
||||
var reservedStackHack [8 * 8]byte
|
||||
for i := range reservedStackHack {
|
||||
reservedStackHack[i] = uint8(i)
|
||||
}
|
||||
|
||||
memoryPtr := &f
|
||||
fun := *(*func(v uintptr))(unsafe.Pointer(&memoryPtr))
|
||||
fun(rf)
|
||||
|
||||
for i := range reservedStackHack {
|
||||
reservedStackHack[i] = uint8(-i)
|
||||
}
|
||||
runtime.KeepAlive(reservedStackHack)
|
||||
}
|
||||
|
||||
// generateSuperscalarCode
|
||||
|
@ -106,5 +90,5 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
|
|||
|
||||
program = append(program, RET)
|
||||
|
||||
return mapProgram(program)
|
||||
return mapProgram(program, len(program))
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ TEXT ·superscalar_run(SB),$0-16
|
|||
|
||||
// todo: not supported by golang
|
||||
// PREFETCHW 0(SI)
|
||||
PREFETCHT0 0(SI)
|
||||
|
||||
// move registers back to register line
|
||||
MOVQ R8, 0(SI)
|
||||
|
|
30
vm.go
30
vm.go
|
@ -46,6 +46,8 @@ type VM struct {
|
|||
ScratchPad ScratchPad
|
||||
|
||||
Dataset Randomx_Dataset
|
||||
|
||||
JITProgram VMProgramFunc
|
||||
}
|
||||
|
||||
// Run calculate hash based on input
|
||||
|
@ -95,6 +97,16 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
|
||||
var rlCache RegisterLine
|
||||
|
||||
if vm.JITProgram != nil {
|
||||
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
|
||||
mapProgramRW(vm.JITProgram)
|
||||
byteCode.generateCode(vm.JITProgram)
|
||||
mapProgramRX(vm.JITProgram)
|
||||
} else {
|
||||
byteCode.generateCode(vm.JITProgram)
|
||||
}
|
||||
}
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
|
||||
|
||||
|
@ -120,7 +132,11 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
}
|
||||
|
||||
// Run the actual bytecode
|
||||
byteCode.Execute(®, &vm.ScratchPad, eMask)
|
||||
if vm.JITProgram != nil {
|
||||
vm.JITProgram.Execute(®, &vm.ScratchPad, eMask)
|
||||
} else {
|
||||
byteCode.Execute(®, &vm.ScratchPad, eMask)
|
||||
}
|
||||
|
||||
mem.mx ^= reg.R[readReg[2]] ^ reg.R[readReg[3]]
|
||||
mem.mx &= CacheLineAlignMask
|
||||
|
@ -183,9 +199,10 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
|||
|
||||
// final loop executes here
|
||||
reg := vm.Run(tempHash, roundingMode)
|
||||
roundingMode = reg.FPRC
|
||||
// always force a restore
|
||||
reg.FPRC = 0xff
|
||||
|
||||
//restore rounding mode
|
||||
// restore rounding mode to 0
|
||||
SetRoundingMode(®, 0)
|
||||
|
||||
return reg
|
||||
|
@ -214,3 +231,10 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
|||
|
||||
hash256.Sum(output[:0])
|
||||
}
|
||||
|
||||
func (vm *VM) Close() error {
|
||||
if vm.JITProgram != nil {
|
||||
return vm.JITProgram.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ type ByteCodeInstruction struct {
|
|||
}
|
||||
|
||||
func (i ByteCodeInstruction) jumpTarget() int {
|
||||
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst)))
|
||||
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Src)))
|
||||
}
|
||||
|
||||
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
|
||||
|
|
312
vm_bytecode_jit_amd64.go
Normal file
312
vm_bytecode_jit_amd64.go
Normal file
|
@ -0,0 +1,312 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func vm_run(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64, jmp uintptr)
|
||||
|
||||
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
|
||||
jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f)))
|
||||
vm_run(rf, pad, eMask, jmpPtr)
|
||||
}
|
||||
|
||||
func (c *ByteCode) generateCode(program []byte) {
|
||||
program = program[:0]
|
||||
|
||||
var instructionOffsets [RANDOMX_PROGRAM_SIZE]int32
|
||||
var codePos int32
|
||||
|
||||
for ix := range c {
|
||||
instructionOffsets[ix] = codePos
|
||||
curLen := len(program)
|
||||
|
||||
instr := &c[ix]
|
||||
switch instr.Opcode {
|
||||
|
||||
case VM_IADD_RS:
|
||||
program = append(program, REX_LEA...)
|
||||
if instr.Dst == RegisterNeedsDisplacement {
|
||||
program = append(program, 0xac)
|
||||
} else {
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
}
|
||||
program = append(program, genSIB(int(instr.ImmB), int(instr.Src), int(instr.Dst)))
|
||||
if instr.Dst == RegisterNeedsDisplacement {
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
}
|
||||
|
||||
case VM_IADD_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_ADD_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_IADD_MZ:
|
||||
program = append(program, REX_ADD_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_ISUB_R:
|
||||
program = append(program, REX_SUB_RR...)
|
||||
program = append(program, 0xc0+8*instr.Dst+instr.Src)
|
||||
case VM_ISUB_I:
|
||||
program = append(program, REX_81...)
|
||||
program = append(program, 0xe8+instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_ISUB_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_SUB_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_ISUB_MZ:
|
||||
program = append(program, REX_SUB_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IMUL_R:
|
||||
program = append(program, REX_IMUL_RR...)
|
||||
program = append(program, 0xc0+8*instr.Dst+instr.Src)
|
||||
case VM_IMUL_I:
|
||||
// also handles imul_rcp, with 64-bit special
|
||||
if bits.Len64(instr.Imm) > 32 {
|
||||
program = append(program, MOV_RAX_I...)
|
||||
program = binary.LittleEndian.AppendUint64(program, instr.Imm)
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, 0xc0+8*instr.Dst)
|
||||
} else {
|
||||
program = append(program, REX_IMUL_RRI...)
|
||||
program = append(program, 0xc0+9*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
}
|
||||
|
||||
case VM_IMUL_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_IMUL_MZ:
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, 0xe0+instr.Src)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_IMULH_M:
|
||||
program = genAddressReg(program, instr, false)
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_MEM...)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
case VM_IMULH_MZ:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_M...)
|
||||
program = append(program, 0xa6)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_ISMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, 0xe8+instr.Src)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_ISMULH_M:
|
||||
program = genAddressReg(program, instr, false)
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_IMUL_MEM...)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
case VM_ISMULH_MZ:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_M...)
|
||||
program = append(program, 0xae)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_INEG_R:
|
||||
program = append(program, REX_NEG...)
|
||||
program = append(program, 0xd8+instr.Dst)
|
||||
|
||||
case VM_IXOR_R:
|
||||
program = append(program, REX_XOR_RR...)
|
||||
program = append(program, 0xc0+8*instr.Dst+instr.Src)
|
||||
case VM_IXOR_I:
|
||||
program = append(program, REX_XOR_RI...)
|
||||
program = append(program, 0xf0+instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IXOR_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_XOR_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_IXOR_MZ:
|
||||
program = append(program, REX_XOR_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IROR_R:
|
||||
program = append(program, REX_MOV_RR...)
|
||||
program = append(program, 0xc8+instr.Src)
|
||||
program = append(program, REX_ROT_CL...)
|
||||
program = append(program, 0xc8+instr.Dst)
|
||||
case VM_IROR_I:
|
||||
program = append(program, REX_ROT_I8...)
|
||||
program = append(program, 0xc8+instr.Dst)
|
||||
program = append(program, byte(instr.Imm&63))
|
||||
|
||||
case VM_IROL_R:
|
||||
program = append(program, REX_MOV_RR...)
|
||||
program = append(program, 0xc8+instr.Src)
|
||||
program = append(program, REX_ROT_CL...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
case VM_IROL_I:
|
||||
program = append(program, REX_ROT_I8...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, byte(instr.Imm&63))
|
||||
|
||||
case VM_ISWAP_R:
|
||||
program = append(program, REX_XCHG...)
|
||||
program = append(program, 0xc0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FSWAP_RF:
|
||||
program = append(program, SHUFPD...)
|
||||
program = append(program, 0xc0+9*instr.Dst)
|
||||
program = append(program, 1)
|
||||
case VM_FSWAP_RE:
|
||||
program = append(program, SHUFPD...)
|
||||
program = append(program, 0xc0+9*(instr.Dst+RegistersCountFloat))
|
||||
program = append(program, 1)
|
||||
|
||||
case VM_FADD_R:
|
||||
program = append(program, REX_ADDPD...)
|
||||
program = append(program, 0xc0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FADD_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_CVTDQ2PD_XMM12...)
|
||||
program = append(program, REX_ADDPD...)
|
||||
program = append(program, 0xc4+8*instr.Dst)
|
||||
|
||||
case VM_FSUB_R:
|
||||
program = append(program, REX_SUBPD...)
|
||||
program = append(program, 0xc0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FSUB_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_CVTDQ2PD_XMM12...)
|
||||
program = append(program, REX_SUBPD...)
|
||||
program = append(program, 0xc4+8*instr.Dst)
|
||||
|
||||
case VM_FSCAL_R:
|
||||
program = append(program, REX_XORPS...)
|
||||
program = append(program, 0xc7+8*instr.Dst)
|
||||
|
||||
case VM_FMUL_R:
|
||||
program = append(program, REX_MULPD...)
|
||||
program = append(program, 0xe0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FDIV_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_CVTDQ2PD_XMM12...)
|
||||
program = append(program, REX_ANDPS_XMM12...)
|
||||
program = append(program, REX_DIVPD...)
|
||||
program = append(program, 0xe4+8*instr.Dst)
|
||||
|
||||
case VM_FSQRT_R:
|
||||
program = append(program, SQRTPD...)
|
||||
program = append(program, 0xe4+9*instr.Dst)
|
||||
|
||||
case VM_CFROUND:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Src)
|
||||
rotate := byte((13 - instr.Imm) & 63)
|
||||
if rotate != 0 {
|
||||
program = append(program, ROL_RAX...)
|
||||
program = append(program, rotate)
|
||||
}
|
||||
program = append(program, AND_OR_MOV_LDMXCSR...)
|
||||
case VM_CBRANCH:
|
||||
reg := instr.Dst
|
||||
target := instr.jumpTarget() + 1
|
||||
|
||||
jmpOffset := instructionOffsets[target] - (codePos + 16)
|
||||
|
||||
if BranchesWithin32B {
|
||||
branchBegin := uint32(codePos + 7)
|
||||
branchEnd := branchBegin
|
||||
if jmpOffset >= -128 {
|
||||
branchEnd += 9
|
||||
} else {
|
||||
branchEnd += 13
|
||||
}
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if (branchBegin ^ branchEnd) >= 32 {
|
||||
alignmentSize := 32 - (branchBegin & 31)
|
||||
alignmentSize -= alignmentSize
|
||||
|
||||
program = append(program, JMP_ALIGN_PREFIX[alignmentSize]...)
|
||||
}
|
||||
}
|
||||
program = append(program, REX_ADD_I...)
|
||||
program = append(program, 0xc0+reg)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
program = append(program, REX_TEST...)
|
||||
program = append(program, 0xc0+reg)
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.MemMask)
|
||||
|
||||
if jmpOffset >= -128 {
|
||||
program = append(program, JZ_SHORT)
|
||||
program = append(program, byte(jmpOffset))
|
||||
} else {
|
||||
program = append(program, JZ...)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(jmpOffset-4))
|
||||
}
|
||||
|
||||
case VM_ISTORE:
|
||||
//genAddressRegDst
|
||||
program = append(program, LEA_32...)
|
||||
program = append(program, 0x80+instr.Dst)
|
||||
if instr.Dst == RegisterNeedsSib {
|
||||
program = append(program, 0x24)
|
||||
}
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
program = append(program, AND_EAX_I)
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.MemMask)
|
||||
|
||||
program = append(program, REX_MOV_MR...)
|
||||
program = append(program, 0x04+8*instr.Src)
|
||||
program = append(program, 0x06)
|
||||
case VM_NOP:
|
||||
program = append(program, NOP1...)
|
||||
}
|
||||
|
||||
codePos += int32(len(program) - curLen)
|
||||
}
|
||||
program = append(program, RET)
|
||||
}
|
91
vm_bytecode_jit_amd64.s
Normal file
91
vm_bytecode_jit_amd64.s
Normal file
|
@ -0,0 +1,91 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·vm_run(SB),$8-40
|
||||
|
||||
// move register file to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHNTA 0(AX)
|
||||
// r0-r7
|
||||
MOVQ (0*8)(AX), R8
|
||||
MOVQ (1*8)(AX), R9
|
||||
MOVQ (2*8)(AX), R10
|
||||
MOVQ (3*8)(AX), R11
|
||||
MOVQ (4*8)(AX), R12
|
||||
MOVQ (5*8)(AX), R13
|
||||
MOVQ (6*8)(AX), R14
|
||||
MOVQ (7*8)(AX), R15
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD (8*8)(AX), X0
|
||||
VMOVUPD (10*8)(AX), X1
|
||||
VMOVUPD (12*8)(AX), X2
|
||||
VMOVUPD (14*8)(AX), X3
|
||||
// e0-e3
|
||||
VMOVUPD (16*8)(AX), X4
|
||||
VMOVUPD (18*8)(AX), X5
|
||||
VMOVUPD (20*8)(AX), X6
|
||||
VMOVUPD (22*8)(AX), X7
|
||||
// a0-a3
|
||||
VMOVUPD (24*8)(AX), X8
|
||||
VMOVUPD (26*8)(AX), X9
|
||||
VMOVUPD (28*8)(AX), X10
|
||||
VMOVUPD (30*8)(AX), X11
|
||||
|
||||
//TODO: rest of init
|
||||
|
||||
// mantissa mask
|
||||
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
||||
MOVQ $0x00ffffffffffffff, AX
|
||||
VMOVQ AX, X13
|
||||
VPBROADCASTQ X13, X13
|
||||
|
||||
// eMask
|
||||
VMOVDQU64 eMask+16(FP), X14
|
||||
|
||||
// scale mask
|
||||
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
|
||||
MOVQ $0x80F0000000000000, AX
|
||||
VMOVQ AX, X15
|
||||
VPBROADCASTQ X15, X15
|
||||
|
||||
// scratchpad pointer
|
||||
MOVQ pad+8(FP), SI
|
||||
|
||||
// JIT location
|
||||
MOVQ jmp+32(FP), AX
|
||||
|
||||
// jump to JIT code
|
||||
CALL AX
|
||||
|
||||
|
||||
// move register file back to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHT0 0(AX)
|
||||
// r0-r7
|
||||
MOVQ R8, (0*8)(AX)
|
||||
MOVQ R9, (1*8)(AX)
|
||||
MOVQ R10, (2*8)(AX)
|
||||
MOVQ R11, (3*8)(AX)
|
||||
MOVQ R12, (4*8)(AX)
|
||||
MOVQ R13, (5*8)(AX)
|
||||
MOVQ R14, (6*8)(AX)
|
||||
MOVQ R15, (7*8)(AX)
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD X0, (8*8)(AX)
|
||||
VMOVUPD X1, (10*8)(AX)
|
||||
VMOVUPD X2, (12*8)(AX)
|
||||
VMOVUPD X3, (14*8)(AX)
|
||||
// e0-e3
|
||||
VMOVUPD X4, (16*8)(AX)
|
||||
VMOVUPD X5, (18*8)(AX)
|
||||
VMOVUPD X6, (20*8)(AX)
|
||||
VMOVUPD X7, (22*8)(AX)
|
||||
|
||||
// a0-a3 are constant, no need to move
|
||||
|
||||
RET
|
11
vm_bytecode_jit_generic.go
Normal file
11
vm_bytecode_jit_generic.go
Normal file
|
@ -0,0 +1,11 @@
|
|||
//go:build !unix || !amd64 || disable_jit || purego
|
||||
|
||||
package randomx
|
||||
|
||||
func (c *ByteCode) generateCode(program []byte) {
|
||||
|
||||
}
|
||||
|
||||
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
|
||||
}
|
|
@ -13,7 +13,7 @@ import (
|
|||
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
for pc := 0; pc < len(c); pc++ {
|
||||
i := &c[pc]
|
||||
switch i.Opcode {
|
||||
case VM_NOP: // we do nothing
|
||||
|
@ -111,8 +111,8 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
SetRoundingMode(f, uint8(tmp))
|
||||
|
||||
case VM_CBRANCH:
|
||||
f.R[i.Src] += i.Imm
|
||||
if (f.R[i.Src] & uint64(i.MemMask)) == 0 {
|
||||
f.R[i.Dst] += i.Imm
|
||||
if (f.R[i.Dst] & uint64(i.MemMask)) == 0 {
|
||||
pc = i.jumpTarget()
|
||||
}
|
||||
case VM_ISTORE:
|
||||
|
|
|
@ -12,7 +12,7 @@ import (
|
|||
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
for pc := 0; pc < len(c); pc++ {
|
||||
i := &c[pc]
|
||||
switch i.Opcode {
|
||||
case VM_NOP: // we do nothing
|
||||
|
@ -110,8 +110,8 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
SetRoundingMode(f, uint8(tmp))
|
||||
|
||||
case VM_CBRANCH:
|
||||
f.R[i.Src] += i.Imm
|
||||
if (f.R[i.Src] & uint64(i.MemMask)) == 0 {
|
||||
f.R[i.Dst] += i.Imm
|
||||
if (f.R[i.Dst] & uint64(i.MemMask)) == 0 {
|
||||
pc = i.jumpTarget()
|
||||
}
|
||||
case VM_ISTORE:
|
||||
|
|
|
@ -70,7 +70,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
registerUsage[i] = -1
|
||||
}
|
||||
|
||||
for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ {
|
||||
for i := 0; i < len(bc); i++ {
|
||||
instr := VM_Instruction(prog[i*8:])
|
||||
ibc := &bc[i]
|
||||
|
||||
|
@ -312,10 +312,12 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
|
||||
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
|
||||
ibc.Opcode = VM_CBRANCH
|
||||
ibc.Src = instr.Dst() % RegistersCount
|
||||
//TODO:??? it's +1 on other
|
||||
ibc.Dst = instr.Dst() % RegistersCount
|
||||
|
||||
target := uint16(int16(registerUsage[ibc.Src]))
|
||||
ibc.Dst = uint8(target)
|
||||
target := uint16(int16(registerUsage[ibc.Dst]))
|
||||
// set target!
|
||||
ibc.Src = uint8(target)
|
||||
ibc.ImmB = uint8(target >> 8)
|
||||
|
||||
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
|
||||
|
|
Loading…
Reference in a new issue