Version v3.0.0, support full datataset mode in 64-bit targets, modified api, optimized allocations, full VM run JIT on amd64, optimize AES asm
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
This commit is contained in:
parent
4903cd7407
commit
6606aaefcc
19
README.md
19
README.md
|
@ -14,17 +14,18 @@ This package implements RandomX without CGO, using only Golang code, native floa
|
|||
|
||||
All test cases pass properly.
|
||||
|
||||
Supports Full mode and Light mode.
|
||||
|
||||
For the C++ implementation and design of RandomX, see [github.com/tevador/RandomX](https://github.com/tevador/RandomX)
|
||||
|
||||
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|
||||
|:----------------------------:|:---:|:-----:|:---:|:-----:|:----:|:------:|:-------:|:----:|
|
||||
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| Hardware Float Operations | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Hardware AES Operations | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Native Superscalar Execution | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| Superscalar JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Native VM Execution | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| VM JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|
||||
|:---------------------:|:----------:|:--------------:|:------:|:----------:|:------:|:------:|:-------:|:------:|
|
||||
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| Full Mode | ❌ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| Float Operations | hw | **hw** | soft | **hw** | soft | soft | soft | soft |
|
||||
| AES Operations | soft | **hw** | soft | soft | soft | soft | soft | soft |
|
||||
| Superscalar Execution | native | **native+jit** | native | native | native | native | native | native |
|
||||
| VM Execution | **native** | **native+jit** | soft | **native** | soft | soft | soft | soft |
|
||||
|
||||
|
||||
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.
|
||||
|
|
28
aes/hash.go
28
aes/hash.go
|
@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package aes
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
|
@ -50,21 +50,7 @@ func HashAes1Rx4(input []byte, output *[64]byte) {
|
|||
if len(input)%64 != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// states are copied
|
||||
states := keys.AesHash1R_State
|
||||
|
||||
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
|
||||
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
|
||||
|
||||
aesroundtrip_encdec(&states, in)
|
||||
}
|
||||
|
||||
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
|
||||
|
||||
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
|
||||
|
||||
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
|
||||
hashAes1Rx4(input, output)
|
||||
}
|
||||
|
||||
// FillAes1Rx4
|
||||
|
@ -81,15 +67,7 @@ func FillAes1Rx4(state *[64]byte, output []byte) {
|
|||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// Reference to state without copying
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
fillAes1Rx4(state, output)
|
||||
}
|
||||
|
||||
var fillAes4Rx4Keys0 = [4][4]uint32{
|
||||
|
|
50
aes/hash_amd64.go
Normal file
50
aes/hash_amd64.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package aes
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
|
||||
"golang.org/x/sys/cpu"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var supportsAES = cpu.X86.HasAES
|
||||
|
||||
func fillAes1Rx4(state *[64]byte, output []byte) {
|
||||
// Reference to state without copying
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(state))
|
||||
|
||||
if supportsAES {
|
||||
asm.FillAes1Rx4(states, &keys.AesGenerator1R_Keys, unsafe.SliceData(output), uint64(len(output)))
|
||||
return
|
||||
}
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
func hashAes1Rx4(input []byte, output *[64]byte) {
|
||||
if supportsAES {
|
||||
asm.HashAes1Rx4(&keys.AesHash1R_State, &keys.AesHash1R_XKeys, output, unsafe.SliceData(input), uint64(len(input)))
|
||||
return
|
||||
}
|
||||
|
||||
// states are copied
|
||||
states := keys.AesHash1R_State
|
||||
|
||||
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
|
||||
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
|
||||
|
||||
aesroundtrip_encdec(&states, in)
|
||||
}
|
||||
|
||||
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
|
||||
|
||||
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
|
||||
|
||||
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
|
||||
}
|
36
aes/hash_generic.go
Normal file
36
aes/hash_generic.go
Normal file
|
@ -0,0 +1,36 @@
|
|||
//go:build !amd64 || purego
|
||||
|
||||
package aes
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func fillAes1Rx4(state *[64]byte, output []byte) {
|
||||
// Reference to state without copying
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
func hashAes1Rx4(input []byte, output *[64]byte) {
|
||||
// states are copied
|
||||
states := keys.AesHash1R_State
|
||||
|
||||
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
|
||||
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
|
||||
|
||||
aesroundtrip_encdec(&states, in)
|
||||
}
|
||||
|
||||
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
|
||||
|
||||
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
|
||||
|
||||
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
|
||||
}
|
|
@ -3,52 +3,12 @@
|
|||
package aes
|
||||
|
||||
import (
|
||||
_ "git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
|
||||
"golang.org/x/sys/cpu"
|
||||
_ "unsafe"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesdec
|
||||
func hard_aesdec(state *[4]uint32, key *[4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesenc
|
||||
func hard_aesenc(state *[4]uint32, key *[4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesroundtrip_decenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_decenc
|
||||
func hard_aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesroundtrip_encdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec
|
||||
func hard_aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
//go:linkname hard_aesroundtrip_encdec1 git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec1
|
||||
func hard_aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32)
|
||||
|
||||
var supportsAES = cpu.X86.HasAES
|
||||
|
||||
func aesenc(state *[4]uint32, key *[4]uint32) {
|
||||
if supportsAES {
|
||||
hard_aesenc(state, key)
|
||||
} else {
|
||||
soft_aesenc(state, key)
|
||||
}
|
||||
}
|
||||
|
||||
func aesdec(state *[4]uint32, key *[4]uint32) {
|
||||
if supportsAES {
|
||||
hard_aesdec(state, key)
|
||||
} else {
|
||||
soft_aesdec(state, key)
|
||||
}
|
||||
}
|
||||
|
||||
func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
|
||||
if supportsAES {
|
||||
hard_aesroundtrip_decenc(states, keys)
|
||||
asm.AESRoundTrip_DecEnc(states, keys)
|
||||
} else {
|
||||
soft_aesdec(&states[0], &keys[0])
|
||||
soft_aesenc(&states[1], &keys[1])
|
||||
|
@ -59,7 +19,7 @@ func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
|
|||
|
||||
func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
|
||||
if supportsAES {
|
||||
hard_aesroundtrip_encdec(states, keys)
|
||||
asm.AESRoundTrip_EncDec(states, keys)
|
||||
} else {
|
||||
soft_aesenc(&states[0], &keys[0])
|
||||
soft_aesdec(&states[1], &keys[1])
|
||||
|
@ -70,7 +30,7 @@ func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
|
|||
|
||||
func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
|
||||
if supportsAES {
|
||||
hard_aesroundtrip_encdec1(states, key)
|
||||
asm.AESRoundTrip_EncDec1(states, key)
|
||||
} else {
|
||||
soft_aesenc(&states[0], key)
|
||||
soft_aesdec(&states[1], key)
|
||||
|
|
11
asm/aes.go
11
asm/aes.go
|
@ -1,11 +0,0 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
func AESRoundEncrypt(state *[4]uint32, key *[4]uint32) {
|
||||
aesenc(state, key)
|
||||
}
|
||||
|
||||
func AESRoundDecrypt(state *[4]uint32, key *[4]uint32) {
|
||||
aesdec(state, key)
|
||||
}
|
|
@ -3,16 +3,16 @@
|
|||
package asm
|
||||
|
||||
//go:noescape
|
||||
func aesenc(state *[4]uint32, key *[4]uint32)
|
||||
func FillAes1Rx4(states *[4][4]uint32, keys *[4][4]uint32, output *byte, outputLen uint64)
|
||||
|
||||
//go:noescape
|
||||
func aesdec(state *[4]uint32, key *[4]uint32)
|
||||
func HashAes1Rx4(initialState *[4][4]uint32, xKeys *[2][4]uint32, output *[64]byte, input *byte, inputLen uint64)
|
||||
|
||||
//go:noescape
|
||||
func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
func AESRoundTrip_DecEnc(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
func AESRoundTrip_EncDec(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32)
|
||||
func AESRoundTrip_EncDec1(states *[4][4]uint32, key *[4]uint32)
|
||||
|
|
179
asm/aes_amd64.s
179
asm/aes_amd64.s
|
@ -2,92 +2,171 @@
|
|||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·aesenc(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ key+8(FP), BX
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
AESENC X1, X0
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
TEXT ·FillAes1Rx4(SB),NOSPLIT|NOFRAME,$0-32
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
MOVQ output+16(FP), CX
|
||||
MOVQ outputLen+24(FP), DX
|
||||
|
||||
// initial state
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 16(AX), X1
|
||||
VMOVDQU 32(AX), X2
|
||||
VMOVDQU 48(AX), X3
|
||||
|
||||
// keys: X4-X7
|
||||
VMOVDQU 0(BX), X4
|
||||
VMOVDQU 16(BX), X5
|
||||
VMOVDQU 32(BX), X6
|
||||
VMOVDQU 48(BX), X7
|
||||
|
||||
loop:
|
||||
|
||||
AESDEC X4, X0
|
||||
AESENC X5, X1
|
||||
AESDEC X6, X2
|
||||
AESENC X7, X3
|
||||
|
||||
// store state onto output
|
||||
VMOVDQU X0, 0(CX)
|
||||
VMOVDQU X1, 16(CX)
|
||||
VMOVDQU X2, 32(CX)
|
||||
VMOVDQU X3, 48(CX)
|
||||
ADDQ $64, CX
|
||||
|
||||
// outputLen -= 64, continue if not 0
|
||||
SUBQ $64, DX
|
||||
JNE loop
|
||||
|
||||
// offload initial state
|
||||
VMOVDQU X0, 0(AX)
|
||||
VMOVDQU X1, 16(AX)
|
||||
VMOVDQU X2, 32(AX)
|
||||
VMOVDQU X3, 48(AX)
|
||||
RET
|
||||
|
||||
TEXT ·aesdec(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ state+0(FP), AX
|
||||
MOVQ key+8(FP), BX
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
AESDEC X1, X0
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
|
||||
TEXT ·HashAes1Rx4(SB),NOSPLIT|NOFRAME,$0-40
|
||||
MOVQ initialState+0(FP), AX
|
||||
|
||||
// initial state
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 16(AX), X1
|
||||
VMOVDQU 32(AX), X2
|
||||
VMOVDQU 48(AX), X3
|
||||
|
||||
|
||||
MOVQ xKeys+8(FP), AX
|
||||
MOVQ output+16(FP), BX
|
||||
MOVQ input+24(FP), CX
|
||||
MOVQ inputLen+32(FP), DX
|
||||
|
||||
loop:
|
||||
// input as keys: X4-X7
|
||||
VMOVDQU 0(CX), X4
|
||||
VMOVDQU 16(CX), X5
|
||||
VMOVDQU 32(CX), X6
|
||||
VMOVDQU 48(CX), X7
|
||||
|
||||
AESENC X4, X0
|
||||
AESDEC X5, X1
|
||||
AESENC X6, X2
|
||||
AESDEC X7, X3
|
||||
|
||||
ADDQ $64, CX
|
||||
// inputLen -= 64, continue if not 0
|
||||
SUBQ $64, DX
|
||||
JNE loop
|
||||
|
||||
// do encdec1 with both keys!
|
||||
VMOVDQU 0(AX), X4
|
||||
VMOVDQU 16(AX), X5
|
||||
|
||||
AESENC X4, X0
|
||||
AESDEC X4, X1
|
||||
AESENC X4, X2
|
||||
AESDEC X4, X3
|
||||
|
||||
AESENC X5, X0
|
||||
AESDEC X5, X1
|
||||
AESENC X5, X2
|
||||
AESDEC X5, X3
|
||||
|
||||
// offload into output
|
||||
VMOVDQU X0, 0(BX)
|
||||
VMOVDQU X1, 16(BX)
|
||||
VMOVDQU X2, 32(BX)
|
||||
VMOVDQU X3, 48(BX)
|
||||
RET
|
||||
|
||||
TEXT ·aesroundtrip_decenc(SB),NOSPLIT|NOFRAME,$0-16
|
||||
TEXT ·AESRoundTrip_DecEnc(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
VMOVDQU32 16(AX), X2
|
||||
VMOVDQU32 16(BX), X3
|
||||
VMOVDQU32 32(AX), X4
|
||||
VMOVDQU32 32(BX), X5
|
||||
VMOVDQU32 48(AX), X6
|
||||
VMOVDQU32 48(BX), X7
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 0(BX), X1
|
||||
VMOVDQU 16(AX), X2
|
||||
VMOVDQU 16(BX), X3
|
||||
VMOVDQU 32(AX), X4
|
||||
VMOVDQU 32(BX), X5
|
||||
VMOVDQU 48(AX), X6
|
||||
VMOVDQU 48(BX), X7
|
||||
|
||||
AESDEC X1, X0
|
||||
AESENC X3, X2
|
||||
AESDEC X5, X4
|
||||
AESENC X7, X6
|
||||
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
VMOVDQU32 X2, 16(AX)
|
||||
VMOVDQU32 X4, 32(AX)
|
||||
VMOVDQU32 X6, 48(AX)
|
||||
VMOVDQU X0, 0(AX)
|
||||
VMOVDQU X2, 16(AX)
|
||||
VMOVDQU X4, 32(AX)
|
||||
VMOVDQU X6, 48(AX)
|
||||
RET
|
||||
|
||||
|
||||
TEXT ·aesroundtrip_encdec(SB),NOSPLIT|NOFRAME,$0-16
|
||||
TEXT ·AESRoundTrip_EncDec(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
|
||||
VMOVDQU32 0(AX), X0
|
||||
VMOVDQU32 0(BX), X1
|
||||
VMOVDQU32 16(AX), X2
|
||||
VMOVDQU32 16(BX), X3
|
||||
VMOVDQU32 32(AX), X4
|
||||
VMOVDQU32 32(BX), X5
|
||||
VMOVDQU32 48(AX), X6
|
||||
VMOVDQU32 48(BX), X7
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 0(BX), X1
|
||||
VMOVDQU 16(AX), X2
|
||||
VMOVDQU 16(BX), X3
|
||||
VMOVDQU 32(AX), X4
|
||||
VMOVDQU 32(BX), X5
|
||||
VMOVDQU 48(AX), X6
|
||||
VMOVDQU 48(BX), X7
|
||||
|
||||
AESENC X1, X0
|
||||
AESDEC X3, X2
|
||||
AESENC X5, X4
|
||||
AESDEC X7, X6
|
||||
|
||||
VMOVDQU32 X0, 0(AX)
|
||||
VMOVDQU32 X2, 16(AX)
|
||||
VMOVDQU32 X4, 32(AX)
|
||||
VMOVDQU32 X6, 48(AX)
|
||||
VMOVDQU X0, 0(AX)
|
||||
VMOVDQU X2, 16(AX)
|
||||
VMOVDQU X4, 32(AX)
|
||||
VMOVDQU X6, 48(AX)
|
||||
RET
|
||||
|
||||
|
||||
TEXT ·aesroundtrip_encdec1(SB),NOSPLIT|NOFRAME,$0-16
|
||||
TEXT ·AESRoundTrip_EncDec1(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ key+8(FP), BX
|
||||
|
||||
VMOVDQU32 0(BX), X0
|
||||
VMOVDQU32 0(AX), X1
|
||||
VMOVDQU32 16(AX), X2
|
||||
VMOVDQU32 32(AX), X3
|
||||
VMOVDQU32 48(AX), X4
|
||||
VMOVDQU 0(BX), X0
|
||||
VMOVDQU 0(AX), X1
|
||||
VMOVDQU 16(AX), X2
|
||||
VMOVDQU 32(AX), X3
|
||||
VMOVDQU 48(AX), X4
|
||||
|
||||
AESENC X0, X1
|
||||
AESDEC X0, X2
|
||||
AESENC X0, X3
|
||||
AESDEC X0, X4
|
||||
|
||||
VMOVDQU32 X1, 0(AX)
|
||||
VMOVDQU32 X2, 16(AX)
|
||||
VMOVDQU32 X3, 32(AX)
|
||||
VMOVDQU32 X4, 48(AX)
|
||||
VMOVDQU X1, 0(AX)
|
||||
VMOVDQU X2, 16(AX)
|
||||
VMOVDQU X3, 32(AX)
|
||||
VMOVDQU X4, 48(AX)
|
||||
RET
|
||||
|
||||
|
|
46
blake2/generator.go
Normal file
46
blake2/generator.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
package blake2
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"golang.org/x/crypto/blake2b"
|
||||
)
|
||||
|
||||
type Generator struct {
|
||||
state [blake2b.Size]byte
|
||||
i int
|
||||
}
|
||||
|
||||
func New(seed []byte, nonce uint32) *Generator {
|
||||
var state [blake2b.Size]byte
|
||||
copy(state[:60], seed)
|
||||
binary.LittleEndian.PutUint32(state[60:], nonce)
|
||||
g := &Generator{
|
||||
i: len(state),
|
||||
state: state,
|
||||
}
|
||||
|
||||
return g
|
||||
}
|
||||
|
||||
func (g *Generator) GetUint32() (v uint32) {
|
||||
if (g.i + 4) > len(g.state) {
|
||||
g.reseed()
|
||||
}
|
||||
v = binary.LittleEndian.Uint32(g.state[g.i:])
|
||||
g.i += 4
|
||||
return v
|
||||
}
|
||||
|
||||
func (g *Generator) GetByte() (v byte) {
|
||||
if (g.i + 1) > len(g.state) {
|
||||
g.reseed()
|
||||
}
|
||||
v = g.state[g.i]
|
||||
g.i++
|
||||
return v
|
||||
}
|
||||
|
||||
func (g *Generator) reseed() {
|
||||
g.state = blake2b.Sum512(g.state[:])
|
||||
g.i = 0
|
||||
}
|
50
blake2b.go
50
blake2b.go
|
@ -1,50 +0,0 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"golang.org/x/crypto/blake2b"
|
||||
)
|
||||
|
||||
type Blake2Generator struct {
|
||||
data [64]byte
|
||||
dataindex int
|
||||
allocRegIndex [8]int
|
||||
allocRegisters [8]Register
|
||||
}
|
||||
|
||||
func Init_Blake2Generator(key []byte, nonce uint32) *Blake2Generator {
|
||||
var b Blake2Generator
|
||||
b.dataindex = len(b.data)
|
||||
if len(key) > 60 {
|
||||
copy(b.data[:], key[0:60])
|
||||
} else {
|
||||
copy(b.data[:], key)
|
||||
}
|
||||
binary.LittleEndian.PutUint32(b.data[60:], nonce)
|
||||
|
||||
return &b
|
||||
}
|
||||
|
||||
func (b *Blake2Generator) checkdata(bytesNeeded int) {
|
||||
if b.dataindex+bytesNeeded > cap(b.data) {
|
||||
//blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
h := blake2b.Sum512(b.data[:])
|
||||
copy(b.data[:], h[:])
|
||||
b.dataindex = 0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (b *Blake2Generator) GetByte() byte {
|
||||
b.checkdata(1)
|
||||
ret := b.data[b.dataindex]
|
||||
b.dataindex++
|
||||
return ret
|
||||
}
|
||||
func (b *Blake2Generator) GetUint32() uint32 {
|
||||
b.checkdata(4)
|
||||
ret := binary.LittleEndian.Uint32(b.data[b.dataindex:])
|
||||
b.dataindex += 4
|
||||
|
||||
return ret
|
||||
}
|
63
cache.go
63
cache.go
|
@ -1,8 +1,9 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/argon2"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
|
||||
"runtime"
|
||||
"slices"
|
||||
"unsafe"
|
||||
|
@ -15,7 +16,7 @@ func (m *MemoryBlock) GetLine(addr uint64) *RegisterLine {
|
|||
return (*RegisterLine)(unsafe.Pointer(unsafe.SliceData(m[addr : addr+8 : addr+8])))
|
||||
}
|
||||
|
||||
type Randomx_Cache struct {
|
||||
type Cache struct {
|
||||
Blocks []MemoryBlock
|
||||
|
||||
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
|
||||
|
@ -25,36 +26,20 @@ type Randomx_Cache struct {
|
|||
Flags uint64
|
||||
}
|
||||
|
||||
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
|
||||
func NewCache(flags uint64) *Cache {
|
||||
if flags == RANDOMX_FLAG_DEFAULT {
|
||||
flags = RANDOMX_FLAG_JIT
|
||||
}
|
||||
return &Randomx_Cache{
|
||||
return &Cache{
|
||||
Flags: flags,
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) HasJIT() bool {
|
||||
func (cache *Cache) HasJIT() bool {
|
||||
return cache.Flags&RANDOMX_FLAG_JIT > 0 && cache.JitPrograms[0] != nil
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) VM_Initialize() *VM {
|
||||
|
||||
vm := &VM{
|
||||
Dataset: &Randomx_DatasetLight{
|
||||
Cache: cache,
|
||||
},
|
||||
}
|
||||
if cache.HasJIT() {
|
||||
vm.JITProgram = mapProgram(nil, int(RandomXCodeSize))
|
||||
if cache.Flags&RANDOMX_FLAG_SECURE == 0 {
|
||||
mapProgramRWX(vm.JITProgram)
|
||||
}
|
||||
}
|
||||
return vm
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Close() error {
|
||||
func (cache *Cache) Close() error {
|
||||
for _, p := range cache.JitPrograms {
|
||||
if p != nil {
|
||||
err := p.Close()
|
||||
|
@ -66,10 +51,12 @@ func (cache *Randomx_Cache) Close() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Init(key []byte) {
|
||||
// Lock due to external JIT madness
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
func (cache *Cache) Init(key []byte) {
|
||||
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
|
||||
// Lock due to external JIT madness
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
}
|
||||
|
||||
kkey := slices.Clone(key)
|
||||
|
||||
|
@ -79,10 +66,11 @@ func (cache *Randomx_Cache) Init(key []byte) {
|
|||
|
||||
cache.Blocks = memoryBlocks
|
||||
|
||||
nonce := uint32(0) //uint32(len(key))
|
||||
gen := Init_Blake2Generator(key, nonce)
|
||||
const nonce uint32 = 0
|
||||
|
||||
gen := blake2.New(key, nonce)
|
||||
for i := 0; i < 8; i++ {
|
||||
cache.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
|
||||
cache.Programs[i] = BuildSuperScalarProgram(gen) // build a superscalar program
|
||||
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
|
||||
cache.JitPrograms[i] = generateSuperscalarCode(cache.Programs[i])
|
||||
}
|
||||
|
@ -93,7 +81,7 @@ func (cache *Randomx_Cache) Init(key []byte) {
|
|||
const Mask = CacheSize/CacheLineSize - 1
|
||||
|
||||
// GetMixBlock fetch a 64 byte block in uint64 form
|
||||
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
|
||||
func (cache *Cache) GetMixBlock(addr uint64) *RegisterLine {
|
||||
|
||||
addr = (addr & Mask) * CacheLineSize
|
||||
|
||||
|
@ -101,7 +89,7 @@ func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
|
|||
return cache.Blocks[block].GetLine(addr % 1024)
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
|
||||
func (cache *Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
|
||||
registerValue := itemNumber
|
||||
|
||||
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
|
||||
|
@ -129,7 +117,7 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
|
|||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
|
||||
func (cache *Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
|
||||
registerValue := itemNumber
|
||||
|
||||
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
|
||||
|
@ -155,9 +143,12 @@ func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint
|
|||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
|
||||
panic("todo")
|
||||
func (cache *Cache) InitDataset(dataset []RegisterLine, startItem, endItem uint64) {
|
||||
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
|
||||
cache.InitDatasetItem(&dataset[0], itemNumber)
|
||||
if cache.HasJIT() {
|
||||
cache.InitDatasetItemJIT(&dataset[0], itemNumber)
|
||||
} else {
|
||||
cache.InitDatasetItem(&dataset[0], itemNumber)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
package randomx
|
||||
|
||||
import "git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
|
||||
import "git.gammaspectra.live/P2Pool/go-randomx/v3/argon2"
|
||||
|
||||
// see reference configuration.h
|
||||
// Cache size in KiB. Must be a power of 2.
|
||||
|
@ -81,7 +81,7 @@ const RANDOMX_JUMP_BITS = 8
|
|||
// Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
|
||||
const RANDOMX_JUMP_OFFSET = 8
|
||||
|
||||
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
|
||||
const DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
|
||||
|
||||
const SuperscalarMaxSize = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
|
||||
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64
|
||||
|
|
29
dataset.go
29
dataset.go
|
@ -1,8 +1,31 @@
|
|||
package randomx
|
||||
|
||||
type Randomx_Dataset interface {
|
||||
InitDataset(startItem, endItem uint64)
|
||||
ReadDataset(address uint64, r, cache *RegisterLine)
|
||||
import "sync"
|
||||
|
||||
type Dataset interface {
|
||||
InitDataset(startItem, itemCount uint64)
|
||||
ReadDataset(address uint64, r *RegisterLine)
|
||||
PrefetchDataset(address uint64)
|
||||
Flags() uint64
|
||||
Cache() *Cache
|
||||
Memory() *[DatasetItemCount]RegisterLine
|
||||
}
|
||||
|
||||
func InitDatasetParallel(dataset Dataset, n int) {
|
||||
n = max(1, n)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := uint64(1); i < uint64(n); i++ {
|
||||
a := (DatasetItemCount * i) / uint64(n)
|
||||
b := (DatasetItemCount * (i + 1)) / uint64(n)
|
||||
|
||||
wg.Add(1)
|
||||
go func(a, b uint64) {
|
||||
defer wg.Done()
|
||||
dataset.InitDataset(a, b-a)
|
||||
}(a, b)
|
||||
}
|
||||
|
||||
dataset.InitDataset(0, DatasetItemCount/uint64(n))
|
||||
wg.Wait()
|
||||
}
|
||||
|
|
52
dataset_full.go
Normal file
52
dataset_full.go
Normal file
|
@ -0,0 +1,52 @@
|
|||
//go:build amd64 || arm64 || arm64be || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || sparc64
|
||||
|
||||
package randomx
|
||||
|
||||
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
|
||||
|
||||
const DatasetItemCount = DatasetSize / CacheLineSize
|
||||
|
||||
type DatasetFull struct {
|
||||
cache *Cache
|
||||
memory [DatasetItemCount]RegisterLine
|
||||
}
|
||||
|
||||
func NewFullDataset(cache *Cache) *DatasetFull {
|
||||
return &DatasetFull{
|
||||
cache: cache,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DatasetFull) PrefetchDataset(address uint64) {
|
||||
|
||||
}
|
||||
|
||||
func (d *DatasetFull) ReadDataset(address uint64, r *RegisterLine) {
|
||||
cache := &d.memory[address/CacheLineSize]
|
||||
|
||||
for i := range r {
|
||||
r[i] ^= cache[i]
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DatasetFull) Cache() *Cache {
|
||||
return d.cache
|
||||
}
|
||||
|
||||
func (d *DatasetFull) Flags() uint64 {
|
||||
return d.cache.Flags
|
||||
}
|
||||
|
||||
func (d *DatasetFull) Memory() *[DatasetItemCount]RegisterLine {
|
||||
return &d.memory
|
||||
}
|
||||
|
||||
func (d *DatasetFull) InitDataset(startItem, itemCount uint64) {
|
||||
if startItem >= DatasetItemCount || itemCount > DatasetItemCount {
|
||||
panic("out of range")
|
||||
}
|
||||
if startItem+itemCount > DatasetItemCount {
|
||||
panic("out of range")
|
||||
}
|
||||
d.cache.InitDataset(d.memory[startItem:startItem+itemCount], startItem, startItem+itemCount)
|
||||
}
|
34
dataset_full_no64.go
Normal file
34
dataset_full_no64.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
//go:build !(amd64 || arm64 || arm64be || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || sparc64)
|
||||
|
||||
package randomx
|
||||
|
||||
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
|
||||
|
||||
const DatasetItemCount = DatasetSize / CacheLineSize
|
||||
|
||||
type DatasetFull struct {
|
||||
}
|
||||
|
||||
func NewFullDataset(cache *Cache) *DatasetFull {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DatasetFull) PrefetchDataset(address uint64) {
|
||||
|
||||
}
|
||||
|
||||
func (d *DatasetFull) ReadDataset(address uint64, r *RegisterLine) {
|
||||
|
||||
}
|
||||
|
||||
func (d *DatasetFull) Cache() *Cache {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DatasetFull) Flags() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (d *DatasetFull) InitDataset(startItem, itemCount uint64) {
|
||||
|
||||
}
|
|
@ -1,19 +1,25 @@
|
|||
package randomx
|
||||
|
||||
type Randomx_DatasetLight struct {
|
||||
Cache *Randomx_Cache
|
||||
Memory []uint64
|
||||
type DatasetLight struct {
|
||||
cache *Cache
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
|
||||
func NewLightDataset(cache *Cache) *DatasetLight {
|
||||
return &DatasetLight{
|
||||
cache: cache,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DatasetLight) PrefetchDataset(address uint64) {
|
||||
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLine) {
|
||||
if d.Cache.HasJIT() {
|
||||
d.Cache.InitDatasetItemJIT(cache, address/CacheLineSize)
|
||||
func (d *DatasetLight) ReadDataset(address uint64, r *RegisterLine) {
|
||||
var cache RegisterLine
|
||||
if d.cache.HasJIT() {
|
||||
d.cache.InitDatasetItemJIT(&cache, address/CacheLineSize)
|
||||
} else {
|
||||
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
|
||||
d.cache.InitDatasetItem(&cache, address/CacheLineSize)
|
||||
}
|
||||
|
||||
for i := range r {
|
||||
|
@ -21,10 +27,18 @@ func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLin
|
|||
}
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) Flags() uint64 {
|
||||
return d.Cache.Flags
|
||||
func (d *DatasetLight) Flags() uint64 {
|
||||
return d.cache.Flags
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) {
|
||||
//d.Cache.initDataset(d.Cache.Programs)
|
||||
func (d *DatasetLight) Cache() *Cache {
|
||||
return d.cache
|
||||
}
|
||||
|
||||
func (d *DatasetLight) Memory() *[DatasetItemCount]RegisterLine {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DatasetLight) InitDataset(startItem, itemCount uint64) {
|
||||
|
||||
}
|
||||
|
|
2
go.mod
2
go.mod
|
@ -1,4 +1,4 @@
|
|||
module git.gammaspectra.live/P2Pool/go-randomx/v2
|
||||
module git.gammaspectra.live/P2Pool/go-randomx/v3
|
||||
|
||||
go 1.21
|
||||
|
||||
|
|
74
jit_amd64.go
74
jit_amd64.go
|
@ -3,9 +3,8 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
|
||||
)
|
||||
|
||||
/*
|
||||
|
@ -13,11 +12,11 @@ import (
|
|||
REGISTER ALLOCATION:
|
||||
|
||||
; rax -> temporary
|
||||
; rbx -> iteration counter "ic"
|
||||
; rbx -> todo: iteration counter "ic"
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> scratchpad pointer
|
||||
; rdi -> (not used)
|
||||
; rdi -> todo: dataset pointer
|
||||
; rbp -> (do not use, it's used by Golang sampling) jump target //todo: memory registers "ma" (high 32 bits), "mx" (low 32 bits)
|
||||
; rsp -> stack pointer
|
||||
; r8 -> "r0"
|
||||
|
@ -128,7 +127,7 @@ var REX_MOV_MR = []byte{0x4c, 0x89}
|
|||
var REX_XOR_EAX = []byte{0x41, 0x33}
|
||||
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
|
||||
var JNZ = []byte{0x0f, 0x85}
|
||||
var JMP = 0xe9
|
||||
var JMP byte = 0xe9
|
||||
|
||||
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
|
||||
var REX_XCHG = []byte{0x4d, 0x87}
|
||||
|
@ -157,6 +156,8 @@ var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
|
|||
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
|
||||
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||
|
||||
var NOPX = [][]byte{NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8}
|
||||
|
||||
var JMP_ALIGN_PREFIX = [14][]byte{
|
||||
{},
|
||||
{0x2E},
|
||||
|
@ -263,66 +264,3 @@ var BranchesWithin32B = func() bool {
|
|||
}
|
||||
return false
|
||||
}()
|
||||
|
||||
/*
|
||||
;# callee-saved registers - Microsoft x64 calling convention
|
||||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm6
|
||||
movdqu xmmword ptr [rsp+48], xmm7
|
||||
movdqu xmmword ptr [rsp+32], xmm8
|
||||
movdqu xmmword ptr [rsp+16], xmm9
|
||||
movdqu xmmword ptr [rsp+0], xmm10
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm11
|
||||
movdqu xmmword ptr [rsp+48], xmm12
|
||||
movdqu xmmword ptr [rsp+32], xmm13
|
||||
movdqu xmmword ptr [rsp+16], xmm14
|
||||
movdqu xmmword ptr [rsp+0], xmm15
|
||||
|
||||
;# function arguments
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||
mov rsi, r8 ;# uint8_t* scratchpad
|
||||
mov rbx, r9 ;# loop counter
|
||||
|
||||
mov rax, rbp
|
||||
ror rbp, 32
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
||||
|
||||
movapd xmm13, xmmword ptr [mantissaMask]
|
||||
movapd xmm14, xmmword ptr [exp240]
|
||||
movapd xmm15, xmmword ptr [scaleMask]
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
jmp rx_program_loop_begin
|
||||
*/
|
||||
var randomx_program_prologue = bytes.Repeat(NOP1, 64)
|
||||
|
||||
var randomx_program_loop_begin = bytes.Repeat(NOP1, 64)
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
package randomx
|
||||
|
||||
import "math"
|
||||
import (
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
const (
|
||||
mantbits64 uint = 52
|
||||
|
@ -41,10 +44,36 @@ func StaticExponent(entropy uint64) uint64 {
|
|||
return exponent
|
||||
}
|
||||
|
||||
func EMask(entropy uint64) uint64 {
|
||||
func ExponentMask(entropy uint64) uint64 {
|
||||
return (entropy & mask22bit) | StaticExponent(entropy)
|
||||
}
|
||||
|
||||
func Xor(a, b float64) float64 {
|
||||
return math.Float64frombits(math.Float64bits(a) ^ math.Float64bits(b))
|
||||
}
|
||||
|
||||
func smulh(a, b int64) uint64 {
|
||||
hi_, _ := bits.Mul64(uint64(a), uint64(b))
|
||||
t1 := (a >> 63) & b
|
||||
t2 := (b >> 63) & a
|
||||
return uint64(int64(hi_) - t1 - t2)
|
||||
}
|
||||
|
||||
// reciprocal
|
||||
// Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64.
|
||||
// divisor must not be 0 or a power of 2
|
||||
func reciprocal(divisor uint32) uint64 {
|
||||
|
||||
const p2exp63 = uint64(1) << 63
|
||||
|
||||
quotient := p2exp63 / uint64(divisor)
|
||||
remainder := p2exp63 % uint64(divisor)
|
||||
|
||||
shift := bits.Len32(divisor)
|
||||
|
||||
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
|
||||
}
|
||||
|
||||
func signExtend2sCompl(x uint32) uint64 {
|
||||
return uint64(int64(int32(x)))
|
||||
}
|
139
randomx_test.go
139
randomx_test.go
|
@ -31,7 +31,9 @@ package randomx
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"slices"
|
||||
)
|
||||
import "testing"
|
||||
|
||||
|
@ -47,9 +49,9 @@ var Tests = []struct {
|
|||
{[]byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"}, // test d
|
||||
}
|
||||
|
||||
func Test_Randomx(t *testing.T) {
|
||||
func Test_RandomXLight(t *testing.T) {
|
||||
|
||||
c := Randomx_alloc_cache(0)
|
||||
c := NewCache(0)
|
||||
|
||||
for ix, tt := range Tests {
|
||||
|
||||
|
@ -62,7 +64,10 @@ func Test_Randomx(t *testing.T) {
|
|||
}
|
||||
}()
|
||||
|
||||
vm := c.VM_Initialize()
|
||||
dataset := NewLightDataset(c)
|
||||
dataset.InitDataset(0, DatasetItemCount)
|
||||
|
||||
vm := NewVM(dataset)
|
||||
defer vm.Close()
|
||||
|
||||
var output_hash [32]byte
|
||||
|
@ -74,57 +79,125 @@ func Test_Randomx(t *testing.T) {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func Benchmark_RandomX(b *testing.B) {
|
||||
func Test_RandomXFull(t *testing.T) {
|
||||
if os.Getenv("CI") != "" {
|
||||
t.Skip("Skipping full mode in CI environment")
|
||||
}
|
||||
|
||||
c := NewCache(0)
|
||||
|
||||
for ix, tt := range Tests {
|
||||
|
||||
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
|
||||
c.Init(tt.key)
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
dataset := NewFullDataset(c)
|
||||
if dataset == nil {
|
||||
t.Skip("Skipping full mode in 32-bit environment")
|
||||
}
|
||||
InitDatasetParallel(dataset, runtime.NumCPU())
|
||||
|
||||
vm := NewVM(dataset)
|
||||
defer vm.Close()
|
||||
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
|
||||
actual := fmt.Sprintf("%x", output_hash)
|
||||
if actual != tt.expected {
|
||||
t.Errorf("#%d Fib(%v): expected %s, actual %s", ix, tt.key, tt.expected, actual)
|
||||
}
|
||||
})
|
||||
|
||||
// cleanup 2GiB between runs
|
||||
runtime.GC()
|
||||
}
|
||||
}
|
||||
|
||||
var BenchmarkTest = Tests[0]
|
||||
var BenchmarkCache *Cache
|
||||
var BenchmarkDatasetLight *DatasetLight
|
||||
var BenchmarkDatasetFull *DatasetFull
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if slices.Contains(os.Args, "-test.bench") {
|
||||
//init light and full dataset
|
||||
BenchmarkCache = NewCache(0)
|
||||
BenchmarkCache.Init(BenchmarkTest.key)
|
||||
BenchmarkDatasetLight = NewLightDataset(BenchmarkCache)
|
||||
BenchmarkDatasetLight.InitDataset(0, DatasetItemCount)
|
||||
BenchmarkDatasetFull = NewFullDataset(BenchmarkCache)
|
||||
InitDatasetParallel(BenchmarkDatasetFull, runtime.NumCPU())
|
||||
defer BenchmarkCache.Close()
|
||||
}
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func Benchmark_RandomXLight(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
tt := Tests[0]
|
||||
|
||||
c := Randomx_alloc_cache(0)
|
||||
|
||||
c.Init(tt.key)
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
vm := c.VM_Initialize()
|
||||
vm := NewVM(BenchmarkDatasetLight)
|
||||
defer vm.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_RandomXParallel(b *testing.B) {
|
||||
func Benchmark_RandomXFull(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
tt := Tests[0]
|
||||
|
||||
c := Randomx_alloc_cache(0)
|
||||
|
||||
c.Init(tt.key)
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}()
|
||||
vm := NewVM(BenchmarkDatasetFull)
|
||||
defer vm.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_RandomXLight_Parallel(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var output_hash [32]byte
|
||||
vm := c.VM_Initialize()
|
||||
|
||||
vm := NewVM(BenchmarkDatasetLight)
|
||||
defer vm.Close()
|
||||
|
||||
for pb.Next() {
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Benchmark_RandomXFull_Parallel(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var output_hash [32]byte
|
||||
|
||||
vm := NewVM(BenchmarkDatasetFull)
|
||||
defer vm.Close()
|
||||
|
||||
for pb.Next() {
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
})
|
||||
|
|
|
@ -24,7 +24,3 @@ const RegisterFileSize = RegistersCount*8 + RegistersCountFloat*2*8*3
|
|||
func (rf *RegisterFile) Memory() *[RegisterFileSize]byte {
|
||||
return (*[RegisterFileSize]byte)(unsafe.Pointer(rf))
|
||||
}
|
||||
|
||||
type MemoryRegisters struct {
|
||||
mx, ma uint64
|
||||
}
|
||||
|
|
233
superscalar.go
233
superscalar.go
|
@ -29,7 +29,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
package randomx
|
||||
|
||||
import "math/bits"
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
type ExecutionPort byte
|
||||
|
||||
|
@ -201,7 +204,7 @@ var buffer3 = []int{4, 9, 3}
|
|||
var buffer4 = []int{4, 4, 4, 4}
|
||||
var buffer5 = []int{3, 3, 10}
|
||||
|
||||
var Decoder_To_Instruction_Length = [][]int{
|
||||
var decoderToInstructionSize = [][]int{
|
||||
buffer0,
|
||||
buffer1,
|
||||
buffer2,
|
||||
|
@ -258,7 +261,7 @@ func (d DecoderType) String() string {
|
|||
}
|
||||
}
|
||||
|
||||
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Generator) DecoderType {
|
||||
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *blake2.Generator) DecoderType {
|
||||
|
||||
if ins.Opcode == S_IMULH_R || ins.Opcode == S_ISMULH_R {
|
||||
return Decoder3310
|
||||
|
@ -295,158 +298,6 @@ func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Gene
|
|||
return Decoder484
|
||||
}
|
||||
|
||||
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
|
||||
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
|
||||
|
||||
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
|
||||
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
|
||||
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
|
||||
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
|
||||
var slot10 = []*Instruction{&IMUL_RCP}
|
||||
|
||||
// SuperScalarInstruction superscalar program is built with superscalar instructions
|
||||
type SuperScalarInstruction struct {
|
||||
Opcode byte
|
||||
Dst int
|
||||
Src int
|
||||
Mod byte
|
||||
Imm32 uint32
|
||||
Imm64 uint64
|
||||
OpGroup int
|
||||
OpGroupPar int
|
||||
GroupParIsSource int
|
||||
ins *Instruction
|
||||
CanReuse bool
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) FixSrcReg() {
|
||||
if sins.Src == 0xff {
|
||||
sins.Src = sins.Dst
|
||||
}
|
||||
|
||||
}
|
||||
func (sins *SuperScalarInstruction) Reset() {
|
||||
sins.Opcode = 99
|
||||
sins.Src = 0xff
|
||||
sins.Dst = 0xff
|
||||
sins.CanReuse = false
|
||||
sins.GroupParIsSource = 0
|
||||
}
|
||||
func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator) {
|
||||
sins.Reset()
|
||||
sins.ins = ins
|
||||
sins.OpGroupPar = -1
|
||||
sins.Opcode = ins.Opcode
|
||||
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IXOR_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IXOR_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IADD_RS:
|
||||
sins.Mod = gen.GetByte()
|
||||
// set modshift on Imm32
|
||||
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
|
||||
//sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IMUL_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMUL_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IROR_C:
|
||||
sins.Mod = 0
|
||||
|
||||
for sins.Imm32 = 0; sins.Imm32 == 0; {
|
||||
sins.Imm32 = uint32(gen.GetByte() & 63)
|
||||
}
|
||||
|
||||
sins.OpGroup = S_IROR_C
|
||||
sins.OpGroupPar = -1
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IADD_C7
|
||||
sins.OpGroupPar = -1
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IXOR_C7
|
||||
sins.OpGroupPar = -1
|
||||
|
||||
case S_IMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
case S_ISMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_ISMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
|
||||
case S_IMUL_RCP:
|
||||
|
||||
sins.Mod = 0
|
||||
for {
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
if (sins.Imm32&sins.Imm32 - 1) != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
sins.Imm64 = randomx_reciprocal(sins.Imm32)
|
||||
|
||||
sins.OpGroup = S_IMUL_RCP
|
||||
|
||||
default:
|
||||
panic("should not occur")
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Generator, instruction_len int, decoder_type int, islast, isfirst bool) {
|
||||
|
||||
switch instruction_len {
|
||||
case 3:
|
||||
if islast {
|
||||
create(sins, slot3L[gen.GetByte()&3], gen)
|
||||
} else {
|
||||
create(sins, slot3[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 4:
|
||||
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
|
||||
if decoder_type == int(Decoder4444) && !islast {
|
||||
create(sins, &IMUL_R, gen)
|
||||
} else {
|
||||
create(sins, slot4[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 7:
|
||||
create(sins, slot7[gen.GetByte()&1], gen)
|
||||
|
||||
case 8:
|
||||
create(sins, slot8[gen.GetByte()&1], gen)
|
||||
|
||||
case 9:
|
||||
create(sins, slot9[gen.GetByte()&1], gen)
|
||||
case 10:
|
||||
create(sins, slot10[0], gen)
|
||||
|
||||
default:
|
||||
panic("should not be possible")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type SuperScalarProgram []SuperScalarInstruction
|
||||
|
||||
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
|
||||
|
@ -460,7 +311,7 @@ func (p SuperScalarProgram) Program() []SuperScalarInstruction {
|
|||
return p[1:]
|
||||
}
|
||||
|
||||
func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
||||
func BuildSuperScalarProgram(gen *blake2.Generator) SuperScalarProgram {
|
||||
cycle := 0
|
||||
depcycle := 0
|
||||
//retire_cycle := 0
|
||||
|
@ -474,12 +325,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
code_size := 0
|
||||
program := make(SuperScalarProgram, 1, 512)
|
||||
|
||||
preAllocatedRegisters := gen.allocRegIndex[:]
|
||||
|
||||
registers := gen.allocRegisters[:]
|
||||
for i := range registers {
|
||||
registers[i] = Register{}
|
||||
}
|
||||
var registers [8]Register
|
||||
|
||||
sins := &SuperScalarInstruction{}
|
||||
sins.ins = &Instruction{Opcode: S_NOP}
|
||||
|
@ -508,7 +354,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
if ports_saturated || program_size >= SuperscalarMaxSize {
|
||||
break
|
||||
}
|
||||
CreateSuperScalarInstruction(sins, gen, Decoder_To_Instruction_Length[int(decoder)][buffer_index], int(decoder), len(Decoder_To_Instruction_Length[decoder]) == (buffer_index+1), buffer_index == 0)
|
||||
CreateSuperScalarInstruction(sins, gen, decoderToInstructionSize[decoder][buffer_index], decoder, len(decoderToInstructionSize[decoder]) == (buffer_index+1), buffer_index == 0)
|
||||
macro_op_index = 0
|
||||
|
||||
}
|
||||
|
@ -529,7 +375,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
|
||||
if macro_op_index == sins.ins.SrcOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(preAllocatedRegisters, scheduleCycle, registers, gen); forward++ {
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(scheduleCycle, ®isters, gen); forward++ {
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
@ -547,7 +393,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
|
||||
if macro_op_index == sins.ins.DstOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(preAllocatedRegisters, scheduleCycle, throwAwayCount > 0, registers, gen); forward++ {
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(scheduleCycle, throwAwayCount > 0, ®isters, gen); forward++ {
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
@ -708,24 +554,24 @@ const RegisterNeedsDisplacement = 5
|
|||
// RegisterNeedsSib x86 r12 register
|
||||
const RegisterNeedsSib = 4
|
||||
|
||||
func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters []int, cycle int, Registers []Register, gen *Blake2Generator) bool {
|
||||
available_registers := preAllocatedAvailableRegisters[:0]
|
||||
func (sins *SuperScalarInstruction) SelectSource(cycle int, registers *[8]Register, gen *blake2.Generator) bool {
|
||||
availableRegisters := make([]int, 0, 8)
|
||||
|
||||
for i := range Registers {
|
||||
if Registers[i].Latency <= cycle {
|
||||
available_registers = append(available_registers, i)
|
||||
for i := range registers {
|
||||
if registers[i].Latency <= cycle {
|
||||
availableRegisters = append(availableRegisters, i)
|
||||
}
|
||||
}
|
||||
|
||||
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
|
||||
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
|
||||
if len(availableRegisters) == 2 && sins.Opcode == S_IADD_RS {
|
||||
if availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement {
|
||||
sins.Src = RegisterNeedsDisplacement
|
||||
sins.OpGroupPar = sins.Src
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if selectRegister(available_registers, gen, &sins.Src) {
|
||||
if selectRegister(availableRegisters, gen, &sins.Src) {
|
||||
|
||||
if sins.GroupParIsSource == 0 {
|
||||
|
||||
|
@ -737,35 +583,35 @@ func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters
|
|||
return false
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegisters []int, cycle int, allowChainedMul bool, Registers []Register, gen *Blake2Generator) bool {
|
||||
preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0]
|
||||
func (sins *SuperScalarInstruction) SelectDestination(cycle int, allowChainedMul bool, Registers *[8]Register, gen *blake2.Generator) bool {
|
||||
var availableRegisters = make([]int, 0, 8)
|
||||
|
||||
for i := range Registers {
|
||||
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src) &&
|
||||
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
|
||||
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
|
||||
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
|
||||
preAllocatedAvailableRegisters = append(preAllocatedAvailableRegisters, i)
|
||||
availableRegisters = append(availableRegisters, i)
|
||||
}
|
||||
}
|
||||
|
||||
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst)
|
||||
return selectRegister(availableRegisters, gen, &sins.Dst)
|
||||
}
|
||||
|
||||
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
|
||||
func selectRegister(availableRegisters []int, gen *blake2.Generator, reg *int) bool {
|
||||
index := 0
|
||||
if len(available_registers) == 0 {
|
||||
if len(availableRegisters) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(available_registers) > 1 {
|
||||
if len(availableRegisters) > 1 {
|
||||
tmp := gen.GetUint32()
|
||||
|
||||
index = int(tmp % uint32(len(available_registers)))
|
||||
index = int(tmp % uint32(len(availableRegisters)))
|
||||
} else {
|
||||
index = 0
|
||||
}
|
||||
*reg = available_registers[index]
|
||||
*reg = availableRegisters[index]
|
||||
return true
|
||||
}
|
||||
|
||||
|
@ -799,26 +645,3 @@ func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
func smulh(a, b int64) uint64 {
|
||||
hi_, _ := bits.Mul64(uint64(a), uint64(b))
|
||||
t1 := (a >> 63) & b
|
||||
t2 := (b >> 63) & a
|
||||
return uint64(int64(hi_) - t1 - t2)
|
||||
}
|
||||
|
||||
func randomx_reciprocal(divisor uint32) uint64 {
|
||||
|
||||
const p2exp63 = uint64(1) << 63
|
||||
|
||||
quotient := p2exp63 / uint64(divisor)
|
||||
remainder := p2exp63 % uint64(divisor)
|
||||
|
||||
shift := bits.Len32(divisor)
|
||||
|
||||
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
|
||||
}
|
||||
|
||||
func signExtend2sCompl(x uint32) uint64 {
|
||||
return uint64(int64(int32(x)))
|
||||
}
|
||||
|
|
157
superscalar_instruction.go
Normal file
157
superscalar_instruction.go
Normal file
|
@ -0,0 +1,157 @@
|
|||
package randomx
|
||||
|
||||
import "git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
|
||||
|
||||
// SuperScalarInstruction superscalar program is built with superscalar instructions
|
||||
type SuperScalarInstruction struct {
|
||||
Opcode byte
|
||||
Dst int
|
||||
Src int
|
||||
Mod byte
|
||||
Imm32 uint32
|
||||
Imm64 uint64
|
||||
OpGroup int
|
||||
OpGroupPar int
|
||||
GroupParIsSource int
|
||||
ins *Instruction
|
||||
CanReuse bool
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) FixSrcReg() {
|
||||
if sins.Src == 0xff {
|
||||
sins.Src = sins.Dst
|
||||
}
|
||||
|
||||
}
|
||||
func (sins *SuperScalarInstruction) Reset() {
|
||||
sins.Opcode = 99
|
||||
sins.Src = 0xff
|
||||
sins.Dst = 0xff
|
||||
sins.CanReuse = false
|
||||
sins.GroupParIsSource = 0
|
||||
}
|
||||
|
||||
func createSuperScalarInstruction(sins *SuperScalarInstruction, ins *Instruction, gen *blake2.Generator) {
|
||||
sins.Reset()
|
||||
sins.ins = ins
|
||||
sins.OpGroupPar = -1
|
||||
sins.Opcode = ins.Opcode
|
||||
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IXOR_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IXOR_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IADD_RS:
|
||||
sins.Mod = gen.GetByte()
|
||||
// set modshift on Imm32
|
||||
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
|
||||
//sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IMUL_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMUL_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IROR_C:
|
||||
sins.Mod = 0
|
||||
|
||||
for sins.Imm32 = 0; sins.Imm32 == 0; {
|
||||
sins.Imm32 = uint32(gen.GetByte() & 63)
|
||||
}
|
||||
|
||||
sins.OpGroup = S_IROR_C
|
||||
sins.OpGroupPar = -1
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IADD_C7
|
||||
sins.OpGroupPar = -1
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IXOR_C7
|
||||
sins.OpGroupPar = -1
|
||||
|
||||
case S_IMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
case S_ISMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_ISMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
|
||||
case S_IMUL_RCP:
|
||||
|
||||
sins.Mod = 0
|
||||
for {
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
if (sins.Imm32&sins.Imm32 - 1) != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
sins.Imm64 = reciprocal(sins.Imm32)
|
||||
|
||||
sins.OpGroup = S_IMUL_RCP
|
||||
|
||||
default:
|
||||
panic("should not occur")
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
|
||||
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
|
||||
|
||||
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
|
||||
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
|
||||
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
|
||||
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
|
||||
var slot10 = []*Instruction{&IMUL_RCP}
|
||||
|
||||
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *blake2.Generator, instructionLen int, decoderType DecoderType, last, first bool) {
|
||||
|
||||
switch instructionLen {
|
||||
case 3:
|
||||
if last {
|
||||
createSuperScalarInstruction(sins, slot3L[gen.GetByte()&3], gen)
|
||||
} else {
|
||||
createSuperScalarInstruction(sins, slot3[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 4:
|
||||
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
|
||||
if decoderType == Decoder4444 && !last {
|
||||
createSuperScalarInstruction(sins, &IMUL_R, gen)
|
||||
} else {
|
||||
createSuperScalarInstruction(sins, slot4[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 7:
|
||||
createSuperScalarInstruction(sins, slot7[gen.GetByte()&1], gen)
|
||||
|
||||
case 8:
|
||||
createSuperScalarInstruction(sins, slot8[gen.GetByte()&1], gen)
|
||||
|
||||
case 9:
|
||||
createSuperScalarInstruction(sins, slot9[gen.GetByte()&1], gen)
|
||||
case 10:
|
||||
createSuperScalarInstruction(sins, slot10[0], gen)
|
||||
|
||||
default:
|
||||
panic("should not be possible")
|
||||
}
|
||||
|
||||
}
|
138
vm.go
138
vm.go
|
@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/aes"
|
||||
"math"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
|
@ -45,16 +45,30 @@ type REG struct {
|
|||
type VM struct {
|
||||
ScratchPad ScratchPad
|
||||
|
||||
Dataset Randomx_Dataset
|
||||
Dataset Dataset
|
||||
|
||||
JITProgram VMProgramFunc
|
||||
program ByteCode
|
||||
jitProgram VMProgramFunc
|
||||
}
|
||||
|
||||
// Run calculate hash based on input
|
||||
func NewVM(dataset Dataset) *VM {
|
||||
vm := &VM{
|
||||
Dataset: dataset,
|
||||
}
|
||||
if dataset.Cache().HasJIT() {
|
||||
vm.jitProgram = mapProgram(nil, int(RandomXCodeSize))
|
||||
if dataset.Flags()&RANDOMX_FLAG_SECURE == 0 {
|
||||
mapProgramRWX(vm.jitProgram)
|
||||
}
|
||||
}
|
||||
return vm
|
||||
}
|
||||
|
||||
// run calculate hash based on input. Not thread safe.
|
||||
// Warning: Underlying callers will run float64 SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
||||
func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
||||
|
||||
reg.FPRC = roundingMode
|
||||
|
||||
|
@ -64,49 +78,64 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
|
||||
entropy := (*[16]uint64)(unsafe.Pointer(&buffer))
|
||||
|
||||
prog := buffer[len(entropy)*8:]
|
||||
|
||||
// do more initialization before we run
|
||||
|
||||
for i := range entropy[:8] {
|
||||
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
|
||||
}
|
||||
|
||||
var mem MemoryRegisters
|
||||
// memory registers
|
||||
var ma, mx uint32
|
||||
|
||||
mem.ma = entropy[8] & CacheLineAlignMask
|
||||
mem.mx = entropy[10]
|
||||
ma = uint32(entropy[8] & CacheLineAlignMask)
|
||||
mx = uint32(entropy[10])
|
||||
|
||||
addressRegisters := entropy[12]
|
||||
|
||||
var readReg [4]uint64
|
||||
|
||||
for i := range readReg {
|
||||
readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
}
|
||||
|
||||
datasetOffset := (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
datasetOffset := (entropy[13] % (DatasetExtraItems + 1)) * CacheLineSize
|
||||
|
||||
eMask := [2]uint64{EMask(entropy[14]), EMask(entropy[15])}
|
||||
eMask := [2]uint64{ExponentMask(entropy[14]), ExponentMask(entropy[15])}
|
||||
|
||||
byteCode := CompileProgramToByteCode(prog)
|
||||
prog := buffer[len(entropy)*8:]
|
||||
CompileProgramToByteCode(prog, &vm.program)
|
||||
|
||||
spAddr0 := mem.mx
|
||||
spAddr1 := mem.ma
|
||||
datasetMemory := vm.Dataset.Memory()
|
||||
|
||||
var rlCache RegisterLine
|
||||
var jitProgram VMProgramFunc
|
||||
|
||||
if vm.JITProgram != nil {
|
||||
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
|
||||
mapProgramRW(vm.JITProgram)
|
||||
byteCode.generateCode(vm.JITProgram)
|
||||
mapProgramRX(vm.JITProgram)
|
||||
if vm.jitProgram != nil {
|
||||
if datasetMemory == nil {
|
||||
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
|
||||
mapProgramRW(vm.jitProgram)
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
|
||||
mapProgramRX(vm.jitProgram)
|
||||
} else {
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
|
||||
}
|
||||
} else {
|
||||
byteCode.generateCode(vm.JITProgram)
|
||||
// full mode and we have JIT
|
||||
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
|
||||
mapProgramRW(vm.jitProgram)
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
|
||||
mapProgramRX(vm.jitProgram)
|
||||
} else {
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
|
||||
}
|
||||
|
||||
vm.jitProgram.ExecuteFull(®, &vm.ScratchPad, &datasetMemory[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
|
||||
return reg
|
||||
}
|
||||
}
|
||||
|
||||
spAddr0 := uint64(mx)
|
||||
spAddr1 := uint64(ma)
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
|
||||
|
||||
|
@ -131,22 +160,23 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
|
||||
}
|
||||
|
||||
// Run the actual bytecode
|
||||
if vm.JITProgram != nil {
|
||||
vm.JITProgram.Execute(®, &vm.ScratchPad, eMask)
|
||||
// run the actual bytecode
|
||||
if jitProgram != nil {
|
||||
// light mode
|
||||
jitProgram.Execute(®, &vm.ScratchPad, eMask)
|
||||
} else {
|
||||
byteCode.Execute(®, &vm.ScratchPad, eMask)
|
||||
vm.program.Execute(®, &vm.ScratchPad, eMask)
|
||||
}
|
||||
|
||||
mem.mx ^= reg.R[readReg[2]] ^ reg.R[readReg[3]]
|
||||
mem.mx &= CacheLineAlignMask
|
||||
mx ^= uint32(reg.R[readReg[2]] ^ reg.R[readReg[3]])
|
||||
mx &= uint32(CacheLineAlignMask)
|
||||
|
||||
vm.Dataset.PrefetchDataset(datasetOffset + mem.mx)
|
||||
// execute diffuser superscalar program to get dataset 64 bytes
|
||||
vm.Dataset.ReadDataset(datasetOffset+mem.ma, ®.R, &rlCache)
|
||||
vm.Dataset.PrefetchDataset(datasetOffset + uint64(mx))
|
||||
// execute / load output from diffuser superscalar program to get dataset 64 bytes
|
||||
vm.Dataset.ReadDataset(datasetOffset+uint64(ma), ®.R)
|
||||
|
||||
// swap the elements
|
||||
mem.mx, mem.ma = mem.ma, mem.mx
|
||||
mx, ma = ma, mx
|
||||
|
||||
for i := uint64(0); i < RegistersCount; i++ {
|
||||
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
|
||||
|
@ -165,17 +195,17 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
|
||||
}
|
||||
|
||||
runtime.KeepAlive(buffer)
|
||||
|
||||
return reg
|
||||
|
||||
}
|
||||
|
||||
func (vm *VM) InitScratchpad(seed *[64]byte) {
|
||||
func (vm *VM) initScratchpad(seed *[64]byte) {
|
||||
vm.ScratchPad.Init(seed)
|
||||
}
|
||||
|
||||
func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
||||
hash512, _ := blake2b.New512(nil)
|
||||
|
||||
func (vm *VM) runLoops(tempHash [64]byte) RegisterFile {
|
||||
if lockThreadDueToRoundingMode {
|
||||
// Lock thread due to rounding mode flags
|
||||
runtime.LockOSThread()
|
||||
|
@ -185,20 +215,16 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
|||
roundingMode := uint8(0)
|
||||
|
||||
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
|
||||
reg := vm.Run(tempHash, roundingMode)
|
||||
reg := vm.run(tempHash, roundingMode)
|
||||
roundingMode = reg.FPRC
|
||||
|
||||
hash512.Reset()
|
||||
|
||||
// write R, F, E, A registers
|
||||
hash512.Write(reg.Memory()[:])
|
||||
tempHash = blake2b.Sum512(reg.Memory()[:])
|
||||
runtime.KeepAlive(reg)
|
||||
|
||||
hash512.Sum(tempHash[:0])
|
||||
}
|
||||
|
||||
// final loop executes here
|
||||
reg := vm.Run(tempHash, roundingMode)
|
||||
reg := vm.run(tempHash, roundingMode)
|
||||
// always force a restore
|
||||
reg.FPRC = 0xff
|
||||
|
||||
|
@ -208,33 +234,29 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
|||
return reg
|
||||
}
|
||||
|
||||
// CalculateHash Not thread safe.
|
||||
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
||||
tempHash := blake2b.Sum512(input)
|
||||
|
||||
vm.InitScratchpad(&tempHash)
|
||||
vm.initScratchpad(&tempHash)
|
||||
|
||||
reg := vm.RunLoops(tempHash)
|
||||
reg := vm.runLoops(tempHash)
|
||||
|
||||
// now hash the scratch pad as it will act as register A
|
||||
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
|
||||
|
||||
hash256, _ := blake2b.New256(nil)
|
||||
regMem := reg.Memory()
|
||||
// write hash onto register A
|
||||
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], tempHash[:])
|
||||
|
||||
hash256.Reset()
|
||||
|
||||
// write R, F, E registers
|
||||
hash256.Write(reg.Memory()[:RegisterFileSize-RegistersCountFloat*2*8])
|
||||
// write R, F, E, A registers
|
||||
*output = blake2b.Sum256(regMem[:])
|
||||
runtime.KeepAlive(reg)
|
||||
|
||||
// write register A
|
||||
hash256.Write(tempHash[:])
|
||||
|
||||
hash256.Sum(output[:0])
|
||||
}
|
||||
|
||||
func (vm *VM) Close() error {
|
||||
if vm.JITProgram != nil {
|
||||
return vm.JITProgram.Close()
|
||||
if vm.jitProgram != nil {
|
||||
return vm.jitProgram.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -11,6 +11,114 @@ import (
|
|||
//go:noescape
|
||||
func vm_run(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64, jmp uintptr)
|
||||
|
||||
//go:noescape
|
||||
func vm_run_full(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations, memoryRegisters uint64, eMask [2]uint64, jmp uintptr)
|
||||
|
||||
/*
|
||||
#define RANDOMX_DATASET_BASE_SIZE 2147483648
|
||||
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
|
||||
|
||||
mov ecx, ebp ;# ecx = ma
|
||||
;#and ecx, RANDOMX_DATASET_BASE_MASK
|
||||
and ecx, 2147483584
|
||||
xor r8, qword ptr [rdi+rcx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
xor rbp, rax ;# modify "mx"
|
||||
mov edx, ebp ;# edx = mx
|
||||
;#and edx, RANDOMX_DATASET_BASE_MASK
|
||||
and edx, 2147483584
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
xor r9, qword ptr [rdi+rcx+8]
|
||||
xor r10, qword ptr [rdi+rcx+16]
|
||||
xor r11, qword ptr [rdi+rcx+24]
|
||||
xor r12, qword ptr [rdi+rcx+32]
|
||||
xor r13, qword ptr [rdi+rcx+40]
|
||||
xor r14, qword ptr [rdi+rcx+48]
|
||||
xor r15, qword ptr [rdi+rcx+56]
|
||||
*/
|
||||
var programReadDataset = []byte{0x89, 0xE9, 0x81, 0xE1, 0xC0, 0xFF, 0xFF, 0x7F, 0x4C, 0x33, 0x04, 0x0F, 0x48, 0xC1, 0xCD, 0x20, 0x48, 0x31, 0xC5, 0x89, 0xEA, 0x81, 0xE2, 0xC0, 0xFF, 0xFF, 0x7F, 0x0F, 0x18, 0x04, 0x17, 0x4C, 0x33, 0x4C, 0x0F, 0x08, 0x4C, 0x33, 0x54, 0x0F, 0x10, 0x4C, 0x33, 0x5C, 0x0F, 0x18, 0x4C, 0x33, 0x64, 0x0F, 0x20, 0x4C, 0x33, 0x6C, 0x0F, 0x28, 0x4C, 0x33, 0x74, 0x0F, 0x30, 0x4C, 0x33, 0x7C, 0x0F, 0x38}
|
||||
|
||||
/*
|
||||
lea rcx, [rsi+rax]
|
||||
push rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
lea rcx, [rsi+rdx]
|
||||
push rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
cvtdq2pd xmm3, qword ptr [rcx+24]
|
||||
cvtdq2pd xmm4, qword ptr [rcx+32]
|
||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||
andps xmm4, xmm13
|
||||
andps xmm5, xmm13
|
||||
andps xmm6, xmm13
|
||||
andps xmm7, xmm13
|
||||
orps xmm4, xmm14
|
||||
orps xmm5, xmm14
|
||||
orps xmm6, xmm14
|
||||
orps xmm7, xmm14
|
||||
*/
|
||||
var programLoopLoad = []byte{0x48, 0x8D, 0x0C, 0x06, 0x51, 0x4C, 0x33, 0x01, 0x4C, 0x33, 0x49, 0x08, 0x4C, 0x33, 0x51, 0x10, 0x4C, 0x33, 0x59, 0x18, 0x4C, 0x33, 0x61, 0x20, 0x4C, 0x33, 0x69, 0x28, 0x4C, 0x33, 0x71, 0x30, 0x4C, 0x33, 0x79, 0x38, 0x48, 0x8D, 0x0C, 0x16, 0x51, 0xF3, 0x0F, 0xE6, 0x01, 0xF3, 0x0F, 0xE6, 0x49, 0x08, 0xF3, 0x0F, 0xE6, 0x51, 0x10, 0xF3, 0x0F, 0xE6, 0x59, 0x18, 0xF3, 0x0F, 0xE6, 0x61, 0x20, 0xF3, 0x0F, 0xE6, 0x69, 0x28, 0xF3, 0x0F, 0xE6, 0x71, 0x30, 0xF3, 0x0F, 0xE6, 0x79, 0x38, 0x41, 0x0F, 0x54, 0xE5, 0x41, 0x0F, 0x54, 0xED, 0x41, 0x0F, 0x54, 0xF5, 0x41, 0x0F, 0x54, 0xFD, 0x41, 0x0F, 0x56, 0xE6, 0x41, 0x0F, 0x56, 0xEE, 0x41, 0x0F, 0x56, 0xF6, 0x41, 0x0F, 0x56, 0xFE}
|
||||
|
||||
/*
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
pop rcx
|
||||
xorpd xmm0, xmm4
|
||||
xorpd xmm1, xmm5
|
||||
xorpd xmm2, xmm6
|
||||
xorpd xmm3, xmm7
|
||||
|
||||
movupd xmmword ptr [rcx+0], xmm0
|
||||
movupd xmmword ptr [rcx+16], xmm1
|
||||
movupd xmmword ptr [rcx+32], xmm2
|
||||
movupd xmmword ptr [rcx+48], xmm3
|
||||
;#movapd xmmword ptr [rcx+0], xmm0
|
||||
;#movapd xmmword ptr [rcx+16], xmm1
|
||||
;#movapd xmmword ptr [rcx+32], xmm2
|
||||
;#movapd xmmword ptr [rcx+48], xmm3
|
||||
*/
|
||||
//var programLoopStore = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x29, 0x01, 0x66, 0x0F, 0x29, 0x49, 0x10, 0x66, 0x0F, 0x29, 0x51, 0x20, 0x66, 0x0F, 0x29, 0x59, 0x30}
|
||||
var programLoopStore = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x11, 0x01, 0x66, 0x0F, 0x11, 0x49, 0x10, 0x66, 0x0F, 0x11, 0x51, 0x20, 0x66, 0x0F, 0x11, 0x59, 0x30}
|
||||
|
||||
/*
|
||||
#define RANDOMX_SCRATCHPAD_L3 2097152
|
||||
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
|
||||
mov rdx, rax
|
||||
;#and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
and eax, 2097088
|
||||
ror rdx, 32
|
||||
;#and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
and edx, 2097088
|
||||
*/
|
||||
var programCalculateSpAddrs = []byte{0x48, 0x89, 0xC2, 0x25, 0xC0, 0xFF, 0x1F, 0x00, 0x48, 0xC1, 0xCA, 0x20, 0x81, 0xE2, 0xC0, 0xFF, 0x1F, 0x00}
|
||||
|
||||
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations uint64, ma, mx uint32, eMask [2]uint64) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
|
||||
jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f)))
|
||||
vm_run_full(rf, pad, dataset, iterations, (uint64(ma)<<32)|uint64(mx), eMask, jmpPtr)
|
||||
}
|
||||
|
||||
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
|
@ -20,15 +128,22 @@ func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint6
|
|||
vm_run(rf, pad, eMask, jmpPtr)
|
||||
}
|
||||
|
||||
func (c *ByteCode) generateCode(program []byte) {
|
||||
func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
|
||||
program = program[:0]
|
||||
|
||||
isFullMode := readReg != nil
|
||||
|
||||
if isFullMode {
|
||||
|
||||
program = append(program, programCalculateSpAddrs...)
|
||||
// prologue
|
||||
program = append(program, programLoopLoad...)
|
||||
}
|
||||
|
||||
var instructionOffsets [RANDOMX_PROGRAM_SIZE]int32
|
||||
var codePos int32
|
||||
|
||||
for ix := range c {
|
||||
instructionOffsets[ix] = codePos
|
||||
curLen := len(program)
|
||||
instructionOffsets[ix] = int32(len(program))
|
||||
|
||||
instr := &c[ix]
|
||||
switch instr.Opcode {
|
||||
|
@ -254,10 +369,10 @@ func (c *ByteCode) generateCode(program []byte) {
|
|||
reg := instr.Dst
|
||||
target := instr.jumpTarget() + 1
|
||||
|
||||
jmpOffset := instructionOffsets[target] - (codePos + 16)
|
||||
jmpOffset := instructionOffsets[target] - (int32(len(program)) + 16)
|
||||
|
||||
if BranchesWithin32B {
|
||||
branchBegin := uint32(codePos + 7)
|
||||
branchBegin := uint32(int32(len(program)) + 7)
|
||||
branchEnd := branchBegin
|
||||
if jmpOffset >= -128 {
|
||||
branchEnd += 9
|
||||
|
@ -305,8 +420,51 @@ func (c *ByteCode) generateCode(program []byte) {
|
|||
case VM_NOP:
|
||||
program = append(program, NOP1...)
|
||||
}
|
||||
|
||||
codePos += int32(len(program) - curLen)
|
||||
}
|
||||
|
||||
if isFullMode {
|
||||
// end of prologue
|
||||
program = append(program, REX_MOV_RR...)
|
||||
program = append(program, 0xc0+byte(readReg[2]))
|
||||
program = append(program, REX_XOR_EAX...)
|
||||
program = append(program, 0xc0+byte(readReg[3]))
|
||||
|
||||
// read dataset
|
||||
|
||||
program = append(program, programReadDataset...)
|
||||
|
||||
// epilogue
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+byte(readReg[0]))
|
||||
program = append(program, REX_XOR_RAX_R64...)
|
||||
program = append(program, 0xc0+byte(readReg[1]))
|
||||
//todo: prefetch scratchpad
|
||||
|
||||
program = append(program, programLoopStore...)
|
||||
|
||||
if BranchesWithin32B {
|
||||
branchBegin := uint32(len(program))
|
||||
branchEnd := branchBegin + 9
|
||||
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if (branchBegin ^ branchEnd) >= 32 {
|
||||
alignmentSize := 32 - (branchBegin & 31)
|
||||
if alignmentSize > 8 {
|
||||
program = append(program, NOPX[alignmentSize-9][:alignmentSize-8]...)
|
||||
alignmentSize = 8
|
||||
}
|
||||
program = append(program, NOPX[alignmentSize-1][:alignmentSize]...)
|
||||
}
|
||||
}
|
||||
|
||||
program = append(program, SUB_EBX...)
|
||||
program = append(program, JNZ...)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(-len(program)-4))
|
||||
//exit otherwise
|
||||
|
||||
}
|
||||
|
||||
program = append(program, RET)
|
||||
|
||||
return program
|
||||
}
|
||||
|
|
|
@ -34,8 +34,6 @@ TEXT ·vm_run(SB),$8-40
|
|||
VMOVUPD (28*8)(AX), X10
|
||||
VMOVUPD (30*8)(AX), X11
|
||||
|
||||
//TODO: rest of init
|
||||
|
||||
// mantissa mask
|
||||
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
||||
MOVQ $0x00ffffffffffffff, AX
|
||||
|
@ -89,3 +87,107 @@ TEXT ·vm_run(SB),$8-40
|
|||
// a0-a3 are constant, no need to move
|
||||
|
||||
RET
|
||||
|
||||
|
||||
#define RANDOMX_SCRATCHPAD_L3 2097152
|
||||
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
|
||||
|
||||
TEXT ·vm_run_full(SB),$32-64
|
||||
|
||||
// move register file to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHNTA 0(AX)
|
||||
// r0-r7
|
||||
MOVQ (0*8)(AX), R8
|
||||
MOVQ (1*8)(AX), R9
|
||||
MOVQ (2*8)(AX), R10
|
||||
MOVQ (3*8)(AX), R11
|
||||
MOVQ (4*8)(AX), R12
|
||||
MOVQ (5*8)(AX), R13
|
||||
MOVQ (6*8)(AX), R14
|
||||
MOVQ (7*8)(AX), R15
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD (8*8)(AX), X0
|
||||
VMOVUPD (10*8)(AX), X1
|
||||
VMOVUPD (12*8)(AX), X2
|
||||
VMOVUPD (14*8)(AX), X3
|
||||
// e0-e3
|
||||
VMOVUPD (16*8)(AX), X4
|
||||
VMOVUPD (18*8)(AX), X5
|
||||
VMOVUPD (20*8)(AX), X6
|
||||
VMOVUPD (22*8)(AX), X7
|
||||
// load constants a0-a3
|
||||
VMOVUPD (24*8)(AX), X8
|
||||
VMOVUPD (26*8)(AX), X9
|
||||
VMOVUPD (28*8)(AX), X10
|
||||
VMOVUPD (30*8)(AX), X11
|
||||
|
||||
//TODO: rest of init
|
||||
|
||||
// mantissa mask
|
||||
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
||||
MOVQ $0x00ffffffffffffff, AX
|
||||
VMOVQ AX, X13
|
||||
VPBROADCASTQ X13, X13
|
||||
|
||||
// eMask
|
||||
VMOVDQU64 eMask+40(FP), X14
|
||||
|
||||
// scale mask
|
||||
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
|
||||
MOVQ $0x80F0000000000000, AX
|
||||
VMOVQ AX, X15
|
||||
VPBROADCASTQ X15, X15
|
||||
|
||||
// scratchpad pointer on rsi
|
||||
MOVQ pad+8(FP), SI
|
||||
// dataset pointer on rdi
|
||||
MOVQ dataset+16(FP), DI
|
||||
// iterations on rbx
|
||||
MOVQ iterations+24(FP), BX
|
||||
// ma and mx on rbp TODO: change this
|
||||
MOVQ memoryRegisters+32(FP), BP
|
||||
|
||||
// do ma/mx calcs
|
||||
MOVQ BP, AX
|
||||
RORQ $32, BP
|
||||
|
||||
//AX = spAddr0
|
||||
//DX = spAddr1
|
||||
|
||||
// JIT location
|
||||
MOVQ jmp+56(FP), CX
|
||||
// jump to JIT code
|
||||
// this handles readReg[0-3] and dataset reading, load, stores
|
||||
CALL CX
|
||||
|
||||
// move register file back to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHT0 0(AX)
|
||||
// r0-r7
|
||||
MOVQ R8, (0*8)(AX)
|
||||
MOVQ R9, (1*8)(AX)
|
||||
MOVQ R10, (2*8)(AX)
|
||||
MOVQ R11, (3*8)(AX)
|
||||
MOVQ R12, (4*8)(AX)
|
||||
MOVQ R13, (5*8)(AX)
|
||||
MOVQ R14, (6*8)(AX)
|
||||
MOVQ R15, (7*8)(AX)
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD X0, (8*8)(AX)
|
||||
VMOVUPD X1, (10*8)(AX)
|
||||
VMOVUPD X2, (12*8)(AX)
|
||||
VMOVUPD X3, (14*8)(AX)
|
||||
// e0-e3
|
||||
VMOVUPD X4, (16*8)(AX)
|
||||
VMOVUPD X5, (18*8)(AX)
|
||||
VMOVUPD X6, (20*8)(AX)
|
||||
VMOVUPD X7, (22*8)(AX)
|
||||
|
||||
// a0-a3 are constant, no need to move
|
||||
|
||||
RET
|
||||
|
|
|
@ -2,10 +2,13 @@
|
|||
|
||||
package randomx
|
||||
|
||||
func (c *ByteCode) generateCode(program []byte) {
|
||||
|
||||
func (c *ByteCode) generateCode(program []byte) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
|
||||
}
|
||||
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *[DatasetItemCount]RegisterLine, iterations, memoryRegisters uint64, eMask [2]uint64) {
|
||||
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
|
|
@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/aes"
|
||||
"unsafe"
|
||||
)
|
||||
import "encoding/binary"
|
||||
|
@ -63,7 +63,7 @@ func (ins VM_Instruction) Opcode() byte {
|
|||
|
||||
// CompileProgramToByteCode this will interpret single vm instruction into executable opcodes
|
||||
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
|
||||
func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
||||
func CompileProgramToByteCode(prog []byte, bc *ByteCode) {
|
||||
|
||||
var registerUsage [RegistersCount]int
|
||||
for i := range registerUsage {
|
||||
|
@ -194,7 +194,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
divisor := instr.IMM()
|
||||
if !isZeroOrPowerOf2(divisor) {
|
||||
ibc.Opcode = VM_IMUL_I
|
||||
ibc.Imm = randomx_reciprocal(divisor)
|
||||
ibc.Imm = reciprocal(divisor)
|
||||
registerUsage[dst] = i
|
||||
} else {
|
||||
ibc.Opcode = VM_NOP
|
||||
|
@ -355,9 +355,6 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
return bc
|
||||
|
||||
}
|
||||
|
||||
type ScratchPad [ScratchpadSize]byte
|
||||
|
|
Loading…
Reference in a new issue