Version v3.0.0, support full datataset mode in 64-bit targets, modified api, optimized allocations, full VM run JIT on amd64, optimize AES asm
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
DataHoarder 2024-04-20 21:17:33 +02:00
parent 4903cd7407
commit b50481f1de
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
29 changed files with 1138 additions and 637 deletions

View file

@ -14,17 +14,18 @@ This package implements RandomX without CGO, using only Golang code, native floa
All test cases pass properly.
Supports Full mode and Light mode.
For the C++ implementation and design of RandomX, see [github.com/tevador/RandomX](https://github.com/tevador/RandomX)
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|:----------------------------:|:---:|:-----:|:---:|:-----:|:----:|:------:|:-------:|:----:|
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Hardware Float Operations | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| Hardware AES Operations | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Native Superscalar Execution | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Superscalar JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Native VM Execution | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| VM JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|:---------------------:|:----------:|:--------------:|:------:|:----------:|:------:|:------:|:-------:|:------:|
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Full Mode | ❌ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ |
| Float Operations | hw | **hw** | soft | **hw** | soft | soft | soft | soft |
| AES Operations | soft | **hw** | soft | soft | soft | soft | soft | soft |
| Superscalar Execution | native | **native+jit** | native | native | native | native | native | native |
| VM Execution | **native** | **native+jit** | soft | **native** | soft | soft | soft | soft |
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.

View file

@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"unsafe"
)
@ -50,21 +50,7 @@ func HashAes1Rx4(input []byte, output *[64]byte) {
if len(input)%64 != 0 {
panic("unsupported")
}
// states are copied
states := keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
aesroundtrip_encdec(&states, in)
}
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
hashAes1Rx4(input, output)
}
// FillAes1Rx4
@ -81,15 +67,7 @@ func FillAes1Rx4(state *[64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
for outptr := 0; outptr < len(output); outptr += len(state) {
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
copy(output[outptr:], state[:])
}
fillAes1Rx4(state, output)
}
var fillAes4Rx4Keys0 = [4][4]uint32{

50
aes/hash_amd64.go Normal file
View file

@ -0,0 +1,50 @@
//go:build amd64 && !purego
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"golang.org/x/sys/cpu"
"unsafe"
)
var supportsAES = cpu.X86.HasAES
func fillAes1Rx4(state *[64]byte, output []byte) {
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
if supportsAES {
asm.FillAes1Rx4(states, &keys.AesGenerator1R_Keys, unsafe.SliceData(output), uint64(len(output)))
return
}
for outptr := 0; outptr < len(output); outptr += len(state) {
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
copy(output[outptr:], state[:])
}
}
func hashAes1Rx4(input []byte, output *[64]byte) {
if supportsAES {
asm.HashAes1Rx4(&keys.AesHash1R_State, &keys.AesHash1R_XKeys, output, unsafe.SliceData(input), uint64(len(input)))
return
}
// states are copied
states := keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
aesroundtrip_encdec(&states, in)
}
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
}

36
aes/hash_generic.go Normal file
View file

@ -0,0 +1,36 @@
//go:build !amd64 || purego
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"unsafe"
)
func fillAes1Rx4(state *[64]byte, output []byte) {
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
for outptr := 0; outptr < len(output); outptr += len(state) {
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
copy(output[outptr:], state[:])
}
}
func hashAes1Rx4(input []byte, output *[64]byte) {
// states are copied
states := keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
aesroundtrip_encdec(&states, in)
}
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
}

View file

@ -3,52 +3,12 @@
package aes
import (
_ "git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"golang.org/x/sys/cpu"
_ "unsafe"
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
)
//go:noescape
//go:linkname hard_aesdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesdec
func hard_aesdec(state *[4]uint32, key *[4]uint32)
//go:noescape
//go:linkname hard_aesenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesenc
func hard_aesenc(state *[4]uint32, key *[4]uint32)
//go:noescape
//go:linkname hard_aesroundtrip_decenc git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_decenc
func hard_aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32)
//go:noescape
//go:linkname hard_aesroundtrip_encdec git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec
func hard_aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32)
//go:noescape
//go:linkname hard_aesroundtrip_encdec1 git.gammaspectra.live/P2Pool/go-randomx/v2/asm.aesroundtrip_encdec1
func hard_aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32)
var supportsAES = cpu.X86.HasAES
func aesenc(state *[4]uint32, key *[4]uint32) {
if supportsAES {
hard_aesenc(state, key)
} else {
soft_aesenc(state, key)
}
}
func aesdec(state *[4]uint32, key *[4]uint32) {
if supportsAES {
hard_aesdec(state, key)
} else {
soft_aesdec(state, key)
}
}
func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
if supportsAES {
hard_aesroundtrip_decenc(states, keys)
asm.AESRoundTrip_DecEnc(states, keys)
} else {
soft_aesdec(&states[0], &keys[0])
soft_aesenc(&states[1], &keys[1])
@ -59,7 +19,7 @@ func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
if supportsAES {
hard_aesroundtrip_encdec(states, keys)
asm.AESRoundTrip_EncDec(states, keys)
} else {
soft_aesenc(&states[0], &keys[0])
soft_aesdec(&states[1], &keys[1])
@ -70,7 +30,7 @@ func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
if supportsAES {
hard_aesroundtrip_encdec1(states, key)
asm.AESRoundTrip_EncDec1(states, key)
} else {
soft_aesenc(&states[0], key)
soft_aesdec(&states[1], key)

View file

@ -1,11 +0,0 @@
//go:build amd64 && !purego
package asm
func AESRoundEncrypt(state *[4]uint32, key *[4]uint32) {
aesenc(state, key)
}
func AESRoundDecrypt(state *[4]uint32, key *[4]uint32) {
aesdec(state, key)
}

View file

@ -3,16 +3,16 @@
package asm
//go:noescape
func aesenc(state *[4]uint32, key *[4]uint32)
func FillAes1Rx4(states *[4][4]uint32, keys *[4][4]uint32, output *byte, outputLen uint64)
//go:noescape
func aesdec(state *[4]uint32, key *[4]uint32)
func HashAes1Rx4(initialState *[4][4]uint32, xKeys *[2][4]uint32, output *[64]byte, input *byte, inputLen uint64)
//go:noescape
func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32)
func AESRoundTrip_DecEnc(states *[4][4]uint32, keys *[4][4]uint32)
//go:noescape
func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32)
func AESRoundTrip_EncDec(states *[4][4]uint32, keys *[4][4]uint32)
//go:noescape
func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32)
func AESRoundTrip_EncDec1(states *[4][4]uint32, key *[4]uint32)

View file

@ -2,92 +2,171 @@
#include "textflag.h"
TEXT ·aesenc(SB),NOSPLIT|NOFRAME,$0-16
MOVQ state+0(FP), AX
MOVQ key+8(FP), BX
VMOVDQU32 0(AX), X0
VMOVDQU32 0(BX), X1
AESENC X1, X0
VMOVDQU32 X0, 0(AX)
TEXT ·FillAes1Rx4(SB),NOSPLIT|NOFRAME,$0-32
MOVQ states+0(FP), AX
MOVQ keys+8(FP), BX
MOVQ output+16(FP), CX
MOVQ outputLen+24(FP), DX
// initial state
VMOVDQU 0(AX), X0
VMOVDQU 16(AX), X1
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
// keys: X4-X7
VMOVDQU 0(BX), X4
VMOVDQU 16(BX), X5
VMOVDQU 32(BX), X6
VMOVDQU 48(BX), X7
loop:
AESDEC X4, X0
AESENC X5, X1
AESDEC X6, X2
AESENC X7, X3
// store state onto output
VMOVDQU X0, 0(CX)
VMOVDQU X1, 16(CX)
VMOVDQU X2, 32(CX)
VMOVDQU X3, 48(CX)
ADDQ $64, CX
// outputLen -= 64, continue if not 0
SUBQ $64, DX
JNE loop
// offload initial state
VMOVDQU X0, 0(AX)
VMOVDQU X1, 16(AX)
VMOVDQU X2, 32(AX)
VMOVDQU X3, 48(AX)
RET
TEXT ·aesdec(SB),NOSPLIT|NOFRAME,$0-16
MOVQ state+0(FP), AX
MOVQ key+8(FP), BX
VMOVDQU32 0(AX), X0
VMOVDQU32 0(BX), X1
AESDEC X1, X0
VMOVDQU32 X0, 0(AX)
TEXT ·HashAes1Rx4(SB),NOSPLIT|NOFRAME,$0-40
MOVQ initialState+0(FP), AX
// initial state
VMOVDQU 0(AX), X0
VMOVDQU 16(AX), X1
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
MOVQ xKeys+8(FP), AX
MOVQ output+16(FP), BX
MOVQ input+24(FP), CX
MOVQ inputLen+32(FP), DX
loop:
// input as keys: X4-X7
VMOVDQU 0(CX), X4
VMOVDQU 16(CX), X5
VMOVDQU 32(CX), X6
VMOVDQU 48(CX), X7
AESENC X4, X0
AESDEC X5, X1
AESENC X6, X2
AESDEC X7, X3
ADDQ $64, CX
// inputLen -= 64, continue if not 0
SUBQ $64, DX
JNE loop
// do encdec1 with both keys!
VMOVDQU 0(AX), X4
VMOVDQU 16(AX), X5
AESENC X4, X0
AESDEC X4, X1
AESENC X4, X2
AESDEC X4, X3
AESENC X5, X0
AESDEC X5, X1
AESENC X5, X2
AESDEC X5, X3
// offload into output
VMOVDQU X0, 0(BX)
VMOVDQU X1, 16(BX)
VMOVDQU X2, 32(BX)
VMOVDQU X3, 48(BX)
RET
TEXT ·aesroundtrip_decenc(SB),NOSPLIT|NOFRAME,$0-16
TEXT ·AESRoundTrip_DecEnc(SB),NOSPLIT|NOFRAME,$0-16
MOVQ states+0(FP), AX
MOVQ keys+8(FP), BX
VMOVDQU32 0(AX), X0
VMOVDQU32 0(BX), X1
VMOVDQU32 16(AX), X2
VMOVDQU32 16(BX), X3
VMOVDQU32 32(AX), X4
VMOVDQU32 32(BX), X5
VMOVDQU32 48(AX), X6
VMOVDQU32 48(BX), X7
VMOVDQU 0(AX), X0
VMOVDQU 0(BX), X1
VMOVDQU 16(AX), X2
VMOVDQU 16(BX), X3
VMOVDQU 32(AX), X4
VMOVDQU 32(BX), X5
VMOVDQU 48(AX), X6
VMOVDQU 48(BX), X7
AESDEC X1, X0
AESENC X3, X2
AESDEC X5, X4
AESENC X7, X6
VMOVDQU32 X0, 0(AX)
VMOVDQU32 X2, 16(AX)
VMOVDQU32 X4, 32(AX)
VMOVDQU32 X6, 48(AX)
VMOVDQU X0, 0(AX)
VMOVDQU X2, 16(AX)
VMOVDQU X4, 32(AX)
VMOVDQU X6, 48(AX)
RET
TEXT ·aesroundtrip_encdec(SB),NOSPLIT|NOFRAME,$0-16
TEXT ·AESRoundTrip_EncDec(SB),NOSPLIT|NOFRAME,$0-16
MOVQ states+0(FP), AX
MOVQ keys+8(FP), BX
VMOVDQU32 0(AX), X0
VMOVDQU32 0(BX), X1
VMOVDQU32 16(AX), X2
VMOVDQU32 16(BX), X3
VMOVDQU32 32(AX), X4
VMOVDQU32 32(BX), X5
VMOVDQU32 48(AX), X6
VMOVDQU32 48(BX), X7
VMOVDQU 0(AX), X0
VMOVDQU 0(BX), X1
VMOVDQU 16(AX), X2
VMOVDQU 16(BX), X3
VMOVDQU 32(AX), X4
VMOVDQU 32(BX), X5
VMOVDQU 48(AX), X6
VMOVDQU 48(BX), X7
AESENC X1, X0
AESDEC X3, X2
AESENC X5, X4
AESDEC X7, X6
VMOVDQU32 X0, 0(AX)
VMOVDQU32 X2, 16(AX)
VMOVDQU32 X4, 32(AX)
VMOVDQU32 X6, 48(AX)
VMOVDQU X0, 0(AX)
VMOVDQU X2, 16(AX)
VMOVDQU X4, 32(AX)
VMOVDQU X6, 48(AX)
RET
TEXT ·aesroundtrip_encdec1(SB),NOSPLIT|NOFRAME,$0-16
TEXT ·AESRoundTrip_EncDec1(SB),NOSPLIT|NOFRAME,$0-16
MOVQ states+0(FP), AX
MOVQ key+8(FP), BX
VMOVDQU32 0(BX), X0
VMOVDQU32 0(AX), X1
VMOVDQU32 16(AX), X2
VMOVDQU32 32(AX), X3
VMOVDQU32 48(AX), X4
VMOVDQU 0(BX), X0
VMOVDQU 0(AX), X1
VMOVDQU 16(AX), X2
VMOVDQU 32(AX), X3
VMOVDQU 48(AX), X4
AESENC X0, X1
AESDEC X0, X2
AESENC X0, X3
AESDEC X0, X4
VMOVDQU32 X1, 0(AX)
VMOVDQU32 X2, 16(AX)
VMOVDQU32 X3, 32(AX)
VMOVDQU32 X4, 48(AX)
VMOVDQU X1, 0(AX)
VMOVDQU X2, 16(AX)
VMOVDQU X3, 32(AX)
VMOVDQU X4, 48(AX)
RET

46
blake2/generator.go Normal file
View file

@ -0,0 +1,46 @@
package blake2
import (
"encoding/binary"
"golang.org/x/crypto/blake2b"
)
type Generator struct {
state [blake2b.Size]byte
i int
}
func New(seed []byte, nonce uint32) *Generator {
var state [blake2b.Size]byte
copy(state[:60], seed)
binary.LittleEndian.PutUint32(state[60:], nonce)
g := &Generator{
i: len(state),
state: state,
}
return g
}
func (g *Generator) GetUint32() (v uint32) {
if (g.i + 4) > len(g.state) {
g.reseed()
}
v = binary.LittleEndian.Uint32(g.state[g.i:])
g.i += 4
return v
}
func (g *Generator) GetByte() (v byte) {
if (g.i + 1) > len(g.state) {
g.reseed()
}
v = g.state[g.i]
g.i++
return v
}
func (g *Generator) reseed() {
g.state = blake2b.Sum512(g.state[:])
g.i = 0
}

View file

@ -1,50 +0,0 @@
package randomx
import (
"encoding/binary"
"golang.org/x/crypto/blake2b"
)
type Blake2Generator struct {
data [64]byte
dataindex int
allocRegIndex [8]int
allocRegisters [8]Register
}
func Init_Blake2Generator(key []byte, nonce uint32) *Blake2Generator {
var b Blake2Generator
b.dataindex = len(b.data)
if len(key) > 60 {
copy(b.data[:], key[0:60])
} else {
copy(b.data[:], key)
}
binary.LittleEndian.PutUint32(b.data[60:], nonce)
return &b
}
func (b *Blake2Generator) checkdata(bytesNeeded int) {
if b.dataindex+bytesNeeded > cap(b.data) {
//blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
h := blake2b.Sum512(b.data[:])
copy(b.data[:], h[:])
b.dataindex = 0
}
}
func (b *Blake2Generator) GetByte() byte {
b.checkdata(1)
ret := b.data[b.dataindex]
b.dataindex++
return ret
}
func (b *Blake2Generator) GetUint32() uint32 {
b.checkdata(4)
ret := binary.LittleEndian.Uint32(b.data[b.dataindex:])
b.dataindex += 4
return ret
}

View file

@ -1,8 +1,9 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
"git.gammaspectra.live/P2Pool/go-randomx/v3/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"runtime"
"slices"
"unsafe"
@ -15,7 +16,7 @@ func (m *MemoryBlock) GetLine(addr uint64) *RegisterLine {
return (*RegisterLine)(unsafe.Pointer(unsafe.SliceData(m[addr : addr+8 : addr+8])))
}
type Randomx_Cache struct {
type Cache struct {
Blocks []MemoryBlock
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
@ -25,36 +26,20 @@ type Randomx_Cache struct {
Flags uint64
}
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
func NewCache(flags uint64) *Cache {
if flags == RANDOMX_FLAG_DEFAULT {
flags = RANDOMX_FLAG_JIT
}
return &Randomx_Cache{
return &Cache{
Flags: flags,
}
}
func (cache *Randomx_Cache) HasJIT() bool {
func (cache *Cache) HasJIT() bool {
return cache.Flags&RANDOMX_FLAG_JIT > 0 && cache.JitPrograms[0] != nil
}
func (cache *Randomx_Cache) VM_Initialize() *VM {
vm := &VM{
Dataset: &Randomx_DatasetLight{
Cache: cache,
},
}
if cache.HasJIT() {
vm.JITProgram = mapProgram(nil, int(RandomXCodeSize))
if cache.Flags&RANDOMX_FLAG_SECURE == 0 {
mapProgramRWX(vm.JITProgram)
}
}
return vm
}
func (cache *Randomx_Cache) Close() error {
func (cache *Cache) Close() error {
for _, p := range cache.JitPrograms {
if p != nil {
err := p.Close()
@ -66,10 +51,12 @@ func (cache *Randomx_Cache) Close() error {
return nil
}
func (cache *Randomx_Cache) Init(key []byte) {
// Lock due to external JIT madness
runtime.LockOSThread()
defer runtime.UnlockOSThread()
func (cache *Cache) Init(key []byte) {
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
// Lock due to external JIT madness
runtime.LockOSThread()
defer runtime.UnlockOSThread()
}
kkey := slices.Clone(key)
@ -79,10 +66,11 @@ func (cache *Randomx_Cache) Init(key []byte) {
cache.Blocks = memoryBlocks
nonce := uint32(0) //uint32(len(key))
gen := Init_Blake2Generator(key, nonce)
const nonce uint32 = 0
gen := blake2.New(key, nonce)
for i := 0; i < 8; i++ {
cache.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
cache.Programs[i] = BuildSuperScalarProgram(gen) // build a superscalar program
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
cache.JitPrograms[i] = generateSuperscalarCode(cache.Programs[i])
}
@ -93,7 +81,7 @@ func (cache *Randomx_Cache) Init(key []byte) {
const Mask = CacheSize/CacheLineSize - 1
// GetMixBlock fetch a 64 byte block in uint64 form
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
func (cache *Cache) GetMixBlock(addr uint64) *RegisterLine {
addr = (addr & Mask) * CacheLineSize
@ -101,7 +89,7 @@ func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
return cache.Blocks[block].GetLine(addr % 1024)
}
func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
func (cache *Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
registerValue := itemNumber
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
@ -129,7 +117,7 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
}
}
func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
func (cache *Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
registerValue := itemNumber
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
@ -155,9 +143,12 @@ func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint
}
}
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
panic("todo")
func (cache *Cache) InitDataset(dataset []RegisterLine, startItem, endItem uint64) {
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
cache.InitDatasetItem(&dataset[0], itemNumber)
if cache.HasJIT() {
cache.InitDatasetItemJIT(&dataset[0], itemNumber)
} else {
cache.InitDatasetItem(&dataset[0], itemNumber)
}
}
}

View file

@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import "git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
import "git.gammaspectra.live/P2Pool/go-randomx/v3/argon2"
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
@ -81,7 +81,7 @@ const RANDOMX_JUMP_BITS = 8
// Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
const RANDOMX_JUMP_OFFSET = 8
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
const DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
const SuperscalarMaxSize = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64

View file

@ -1,8 +1,31 @@
package randomx
type Randomx_Dataset interface {
InitDataset(startItem, endItem uint64)
ReadDataset(address uint64, r, cache *RegisterLine)
import "sync"
type Dataset interface {
InitDataset(startItem, itemCount uint64)
ReadDataset(address uint64, r *RegisterLine)
PrefetchDataset(address uint64)
Flags() uint64
Cache() *Cache
Memory() *[DatasetItemCount]RegisterLine
}
func InitDatasetParallel(dataset Dataset, n int) {
n = max(1, n)
var wg sync.WaitGroup
for i := uint64(1); i < uint64(n); i++ {
a := (DatasetItemCount * i) / uint64(n)
b := (DatasetItemCount * (i + 1)) / uint64(n)
wg.Add(1)
go func(a, b uint64) {
defer wg.Done()
dataset.InitDataset(a, b-a)
}(a, b)
}
dataset.InitDataset(0, DatasetItemCount/uint64(n))
wg.Wait()
}

52
dataset_full.go Normal file
View file

@ -0,0 +1,52 @@
//go:build amd64 || arm64 || arm64be || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || sparc64
package randomx
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
const DatasetItemCount = DatasetSize / CacheLineSize
type DatasetFull struct {
cache *Cache
memory [DatasetItemCount]RegisterLine
}
func NewFullDataset(cache *Cache) *DatasetFull {
return &DatasetFull{
cache: cache,
}
}
func (d *DatasetFull) PrefetchDataset(address uint64) {
}
func (d *DatasetFull) ReadDataset(address uint64, r *RegisterLine) {
cache := &d.memory[address/CacheLineSize]
for i := range r {
r[i] ^= cache[i]
}
}
func (d *DatasetFull) Cache() *Cache {
return d.cache
}
func (d *DatasetFull) Flags() uint64 {
return d.cache.Flags
}
func (d *DatasetFull) Memory() *[DatasetItemCount]RegisterLine {
return &d.memory
}
func (d *DatasetFull) InitDataset(startItem, itemCount uint64) {
if startItem >= DatasetItemCount || itemCount > DatasetItemCount {
panic("out of range")
}
if startItem+itemCount > DatasetItemCount {
panic("out of range")
}
d.cache.InitDataset(d.memory[startItem:startItem+itemCount], startItem, startItem+itemCount)
}

34
dataset_full_no64.go Normal file
View file

@ -0,0 +1,34 @@
//go:build !(amd64 || arm64 || arm64be || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || sparc64)
package randomx
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
const DatasetItemCount = DatasetSize / CacheLineSize
type DatasetFull struct {
}
func NewFullDataset(cache *Cache) *DatasetFull {
return nil
}
func (d *DatasetFull) PrefetchDataset(address uint64) {
}
func (d *DatasetFull) ReadDataset(address uint64, r *RegisterLine) {
}
func (d *DatasetFull) Cache() *Cache {
return nil
}
func (d *DatasetFull) Flags() uint64 {
return 0
}
func (d *DatasetFull) InitDataset(startItem, itemCount uint64) {
}

View file

@ -1,19 +1,25 @@
package randomx
type Randomx_DatasetLight struct {
Cache *Randomx_Cache
Memory []uint64
type DatasetLight struct {
cache *Cache
}
func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
func NewLightDataset(cache *Cache) *DatasetLight {
return &DatasetLight{
cache: cache,
}
}
func (d *DatasetLight) PrefetchDataset(address uint64) {
}
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLine) {
if d.Cache.HasJIT() {
d.Cache.InitDatasetItemJIT(cache, address/CacheLineSize)
func (d *DatasetLight) ReadDataset(address uint64, r *RegisterLine) {
var cache RegisterLine
if d.cache.HasJIT() {
d.cache.InitDatasetItemJIT(&cache, address/CacheLineSize)
} else {
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
d.cache.InitDatasetItem(&cache, address/CacheLineSize)
}
for i := range r {
@ -21,10 +27,18 @@ func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLin
}
}
func (d *Randomx_DatasetLight) Flags() uint64 {
return d.Cache.Flags
func (d *DatasetLight) Flags() uint64 {
return d.cache.Flags
}
func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) {
//d.Cache.initDataset(d.Cache.Programs)
func (d *DatasetLight) Cache() *Cache {
return d.cache
}
func (d *DatasetLight) Memory() *[DatasetItemCount]RegisterLine {
return nil
}
func (d *DatasetLight) InitDataset(startItem, itemCount uint64) {
}

2
go.mod
View file

@ -1,4 +1,4 @@
module git.gammaspectra.live/P2Pool/go-randomx/v2
module git.gammaspectra.live/P2Pool/go-randomx/v3
go 1.21

View file

@ -3,9 +3,8 @@
package randomx
import (
"bytes"
"encoding/binary"
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
)
/*
@ -13,11 +12,11 @@ import (
REGISTER ALLOCATION:
; rax -> temporary
; rbx -> iteration counter "ic"
; rbx -> todo: iteration counter "ic"
; rcx -> temporary
; rdx -> temporary
; rsi -> scratchpad pointer
; rdi -> (not used)
; rdi -> todo: dataset pointer
; rbp -> (do not use, it's used by Golang sampling) jump target //todo: memory registers "ma" (high 32 bits), "mx" (low 32 bits)
; rsp -> stack pointer
; r8 -> "r0"
@ -128,7 +127,7 @@ var REX_MOV_MR = []byte{0x4c, 0x89}
var REX_XOR_EAX = []byte{0x41, 0x33}
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
var JNZ = []byte{0x0f, 0x85}
var JMP = 0xe9
var JMP byte = 0xe9
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
var REX_XCHG = []byte{0x4d, 0x87}
@ -157,6 +156,8 @@ var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
var NOPX = [][]byte{NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8}
var JMP_ALIGN_PREFIX = [14][]byte{
{},
{0x2E},
@ -263,66 +264,3 @@ var BranchesWithin32B = func() bool {
}
return false
}()
/*
;# callee-saved registers - Microsoft x64 calling convention
push rbx
push rbp
push rdi
push rsi
push r12
push r13
push r14
push r15
sub rsp, 80
movdqu xmmword ptr [rsp+64], xmm6
movdqu xmmword ptr [rsp+48], xmm7
movdqu xmmword ptr [rsp+32], xmm8
movdqu xmmword ptr [rsp+16], xmm9
movdqu xmmword ptr [rsp+0], xmm10
sub rsp, 80
movdqu xmmword ptr [rsp+64], xmm11
movdqu xmmword ptr [rsp+48], xmm12
movdqu xmmword ptr [rsp+32], xmm13
movdqu xmmword ptr [rsp+16], xmm14
movdqu xmmword ptr [rsp+0], xmm15
;# function arguments
push rcx ;# RegisterFile& registerFile
mov rbp, qword ptr [rdx] ;# "mx", "ma"
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
mov rsi, r8 ;# uint8_t* scratchpad
mov rbx, r9 ;# loop counter
mov rax, rbp
ror rbp, 32
;# zero integer registers
xor r8, r8
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor r13, r13
xor r14, r14
xor r15, r15
;# load constant registers
lea rcx, [rcx+120]
movapd xmm8, xmmword ptr [rcx+72]
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp rx_program_loop_begin
*/
var randomx_program_prologue = bytes.Repeat(NOP1, 64)
var randomx_program_loop_begin = bytes.Repeat(NOP1, 64)

View file

@ -1,6 +1,9 @@
package randomx
import "math"
import (
"math"
"math/bits"
)
const (
mantbits64 uint = 52
@ -41,10 +44,36 @@ func StaticExponent(entropy uint64) uint64 {
return exponent
}
func EMask(entropy uint64) uint64 {
func ExponentMask(entropy uint64) uint64 {
return (entropy & mask22bit) | StaticExponent(entropy)
}
func Xor(a, b float64) float64 {
return math.Float64frombits(math.Float64bits(a) ^ math.Float64bits(b))
}
func smulh(a, b int64) uint64 {
hi_, _ := bits.Mul64(uint64(a), uint64(b))
t1 := (a >> 63) & b
t2 := (b >> 63) & a
return uint64(int64(hi_) - t1 - t2)
}
// reciprocal
// Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64.
// divisor must not be 0 or a power of 2
func reciprocal(divisor uint32) uint64 {
const p2exp63 = uint64(1) << 63
quotient := p2exp63 / uint64(divisor)
remainder := p2exp63 % uint64(divisor)
shift := bits.Len32(divisor)
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
}
func signExtend2sCompl(x uint32) uint64 {
return uint64(int64(int32(x)))
}

View file

@ -31,7 +31,9 @@ package randomx
import (
"fmt"
"os"
"runtime"
"slices"
)
import "testing"
@ -47,9 +49,9 @@ var Tests = []struct {
{[]byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"}, // test d
}
func Test_Randomx(t *testing.T) {
func Test_RandomXLight(t *testing.T) {
c := Randomx_alloc_cache(0)
c := NewCache(0)
for ix, tt := range Tests {
@ -62,7 +64,10 @@ func Test_Randomx(t *testing.T) {
}
}()
vm := c.VM_Initialize()
dataset := NewLightDataset(c)
dataset.InitDataset(0, DatasetItemCount)
vm := NewVM(dataset)
defer vm.Close()
var output_hash [32]byte
@ -74,57 +79,125 @@ func Test_Randomx(t *testing.T) {
}
})
}
}
func Benchmark_RandomX(b *testing.B) {
func Test_RandomXFull(t *testing.T) {
if os.Getenv("CI") != "" {
t.Skip("Skipping full mode in CI environment")
}
c := NewCache(0)
for ix, tt := range Tests {
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
c.Init(tt.key)
defer func() {
err := c.Close()
if err != nil {
t.Error(err)
}
}()
dataset := NewFullDataset(c)
if dataset == nil {
t.Skip("Skipping full mode in 32-bit environment")
}
InitDatasetParallel(dataset, runtime.NumCPU())
vm := NewVM(dataset)
defer vm.Close()
var output_hash [32]byte
vm.CalculateHash(tt.input, &output_hash)
actual := fmt.Sprintf("%x", output_hash)
if actual != tt.expected {
t.Errorf("#%d Fib(%v): expected %s, actual %s", ix, tt.key, tt.expected, actual)
}
})
// cleanup 2GiB between runs
runtime.GC()
}
}
var BenchmarkTest = Tests[0]
var BenchmarkCache *Cache
var BenchmarkDatasetLight *DatasetLight
var BenchmarkDatasetFull *DatasetFull
func TestMain(m *testing.M) {
if slices.Contains(os.Args, "-test.bench") {
//init light and full dataset
BenchmarkCache = NewCache(0)
BenchmarkCache.Init(BenchmarkTest.key)
BenchmarkDatasetLight = NewLightDataset(BenchmarkCache)
BenchmarkDatasetLight.InitDataset(0, DatasetItemCount)
BenchmarkDatasetFull = NewFullDataset(BenchmarkCache)
InitDatasetParallel(BenchmarkDatasetFull, runtime.NumCPU())
defer BenchmarkCache.Close()
}
os.Exit(m.Run())
}
func Benchmark_RandomXLight(b *testing.B) {
b.ReportAllocs()
tt := Tests[0]
c := Randomx_alloc_cache(0)
c.Init(tt.key)
defer func() {
err := c.Close()
if err != nil {
b.Error(err)
}
}()
vm := c.VM_Initialize()
vm := NewVM(BenchmarkDatasetLight)
defer vm.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
var output_hash [32]byte
vm.CalculateHash(tt.input, &output_hash)
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
}
func Benchmark_RandomXParallel(b *testing.B) {
func Benchmark_RandomXFull(b *testing.B) {
b.ReportAllocs()
tt := Tests[0]
c := Randomx_alloc_cache(0)
c.Init(tt.key)
defer func() {
err := c.Close()
if err != nil {
b.Error(err)
}
}()
vm := NewVM(BenchmarkDatasetFull)
defer vm.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
var output_hash [32]byte
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
}
func Benchmark_RandomXLight_Parallel(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
var output_hash [32]byte
vm := c.VM_Initialize()
vm := NewVM(BenchmarkDatasetLight)
defer vm.Close()
for pb.Next() {
vm.CalculateHash(tt.input, &output_hash)
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
})
}
func Benchmark_RandomXFull_Parallel(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
var output_hash [32]byte
vm := NewVM(BenchmarkDatasetFull)
defer vm.Close()
for pb.Next() {
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
})

View file

@ -24,7 +24,3 @@ const RegisterFileSize = RegistersCount*8 + RegistersCountFloat*2*8*3
func (rf *RegisterFile) Memory() *[RegisterFileSize]byte {
return (*[RegisterFileSize]byte)(unsafe.Pointer(rf))
}
type MemoryRegisters struct {
mx, ma uint64
}

View file

@ -29,7 +29,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import "math/bits"
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
"math/bits"
)
type ExecutionPort byte
@ -201,7 +204,7 @@ var buffer3 = []int{4, 9, 3}
var buffer4 = []int{4, 4, 4, 4}
var buffer5 = []int{3, 3, 10}
var Decoder_To_Instruction_Length = [][]int{
var decoderToInstructionSize = [][]int{
buffer0,
buffer1,
buffer2,
@ -258,7 +261,7 @@ func (d DecoderType) String() string {
}
}
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Generator) DecoderType {
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *blake2.Generator) DecoderType {
if ins.Opcode == S_IMULH_R || ins.Opcode == S_ISMULH_R {
return Decoder3310
@ -295,158 +298,6 @@ func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Gene
return Decoder484
}
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
var slot10 = []*Instruction{&IMUL_RCP}
// SuperScalarInstruction superscalar program is built with superscalar instructions
type SuperScalarInstruction struct {
Opcode byte
Dst int
Src int
Mod byte
Imm32 uint32
Imm64 uint64
OpGroup int
OpGroupPar int
GroupParIsSource int
ins *Instruction
CanReuse bool
}
func (sins *SuperScalarInstruction) FixSrcReg() {
if sins.Src == 0xff {
sins.Src = sins.Dst
}
}
func (sins *SuperScalarInstruction) Reset() {
sins.Opcode = 99
sins.Src = 0xff
sins.Dst = 0xff
sins.CanReuse = false
sins.GroupParIsSource = 0
}
func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator) {
sins.Reset()
sins.ins = ins
sins.OpGroupPar = -1
sins.Opcode = ins.Opcode
switch ins.Opcode {
case S_ISUB_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IXOR_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IXOR_R
sins.GroupParIsSource = 1
case S_IADD_RS:
sins.Mod = gen.GetByte()
// set modshift on Imm32
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
//sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IMUL_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMUL_R
sins.GroupParIsSource = 1
case S_IROR_C:
sins.Mod = 0
for sins.Imm32 = 0; sins.Imm32 == 0; {
sins.Imm32 = uint32(gen.GetByte() & 63)
}
sins.OpGroup = S_IROR_C
sins.OpGroupPar = -1
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IADD_C7
sins.OpGroupPar = -1
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IXOR_C7
sins.OpGroupPar = -1
case S_IMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_ISMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_ISMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_IMUL_RCP:
sins.Mod = 0
for {
sins.Imm32 = gen.GetUint32()
if (sins.Imm32&sins.Imm32 - 1) != 0 {
break
}
}
sins.Imm64 = randomx_reciprocal(sins.Imm32)
sins.OpGroup = S_IMUL_RCP
default:
panic("should not occur")
}
}
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Generator, instruction_len int, decoder_type int, islast, isfirst bool) {
switch instruction_len {
case 3:
if islast {
create(sins, slot3L[gen.GetByte()&3], gen)
} else {
create(sins, slot3[gen.GetByte()&1], gen)
}
case 4:
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
if decoder_type == int(Decoder4444) && !islast {
create(sins, &IMUL_R, gen)
} else {
create(sins, slot4[gen.GetByte()&1], gen)
}
case 7:
create(sins, slot7[gen.GetByte()&1], gen)
case 8:
create(sins, slot8[gen.GetByte()&1], gen)
case 9:
create(sins, slot9[gen.GetByte()&1], gen)
case 10:
create(sins, slot10[0], gen)
default:
panic("should not be possible")
}
}
type SuperScalarProgram []SuperScalarInstruction
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
@ -460,7 +311,7 @@ func (p SuperScalarProgram) Program() []SuperScalarInstruction {
return p[1:]
}
func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
func BuildSuperScalarProgram(gen *blake2.Generator) SuperScalarProgram {
cycle := 0
depcycle := 0
//retire_cycle := 0
@ -474,12 +325,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
code_size := 0
program := make(SuperScalarProgram, 1, 512)
preAllocatedRegisters := gen.allocRegIndex[:]
registers := gen.allocRegisters[:]
for i := range registers {
registers[i] = Register{}
}
var registers [8]Register
sins := &SuperScalarInstruction{}
sins.ins = &Instruction{Opcode: S_NOP}
@ -508,7 +354,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if ports_saturated || program_size >= SuperscalarMaxSize {
break
}
CreateSuperScalarInstruction(sins, gen, Decoder_To_Instruction_Length[int(decoder)][buffer_index], int(decoder), len(Decoder_To_Instruction_Length[decoder]) == (buffer_index+1), buffer_index == 0)
CreateSuperScalarInstruction(sins, gen, decoderToInstructionSize[decoder][buffer_index], decoder, len(decoderToInstructionSize[decoder]) == (buffer_index+1), buffer_index == 0)
macro_op_index = 0
}
@ -529,7 +375,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if macro_op_index == sins.ins.SrcOP { // FIXME
forward := 0
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(preAllocatedRegisters, scheduleCycle, registers, gen); forward++ {
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(scheduleCycle, &registers, gen); forward++ {
scheduleCycle++
cycle++
}
@ -547,7 +393,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if macro_op_index == sins.ins.DstOP { // FIXME
forward := 0
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(preAllocatedRegisters, scheduleCycle, throwAwayCount > 0, registers, gen); forward++ {
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(scheduleCycle, throwAwayCount > 0, &registers, gen); forward++ {
scheduleCycle++
cycle++
}
@ -708,24 +554,24 @@ const RegisterNeedsDisplacement = 5
// RegisterNeedsSib x86 r12 register
const RegisterNeedsSib = 4
func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters []int, cycle int, Registers []Register, gen *Blake2Generator) bool {
available_registers := preAllocatedAvailableRegisters[:0]
func (sins *SuperScalarInstruction) SelectSource(cycle int, registers *[8]Register, gen *blake2.Generator) bool {
availableRegisters := make([]int, 0, 8)
for i := range Registers {
if Registers[i].Latency <= cycle {
available_registers = append(available_registers, i)
for i := range registers {
if registers[i].Latency <= cycle {
availableRegisters = append(availableRegisters, i)
}
}
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
if len(availableRegisters) == 2 && sins.Opcode == S_IADD_RS {
if availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement {
sins.Src = RegisterNeedsDisplacement
sins.OpGroupPar = sins.Src
return true
}
}
if selectRegister(available_registers, gen, &sins.Src) {
if selectRegister(availableRegisters, gen, &sins.Src) {
if sins.GroupParIsSource == 0 {
@ -737,35 +583,35 @@ func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters
return false
}
func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegisters []int, cycle int, allowChainedMul bool, Registers []Register, gen *Blake2Generator) bool {
preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0]
func (sins *SuperScalarInstruction) SelectDestination(cycle int, allowChainedMul bool, Registers *[8]Register, gen *blake2.Generator) bool {
var availableRegisters = make([]int, 0, 8)
for i := range Registers {
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src) &&
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
preAllocatedAvailableRegisters = append(preAllocatedAvailableRegisters, i)
availableRegisters = append(availableRegisters, i)
}
}
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst)
return selectRegister(availableRegisters, gen, &sins.Dst)
}
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
func selectRegister(availableRegisters []int, gen *blake2.Generator, reg *int) bool {
index := 0
if len(available_registers) == 0 {
if len(availableRegisters) == 0 {
return false
}
if len(available_registers) > 1 {
if len(availableRegisters) > 1 {
tmp := gen.GetUint32()
index = int(tmp % uint32(len(available_registers)))
index = int(tmp % uint32(len(availableRegisters)))
} else {
index = 0
}
*reg = available_registers[index]
*reg = availableRegisters[index]
return true
}
@ -799,26 +645,3 @@ func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
}
}
func smulh(a, b int64) uint64 {
hi_, _ := bits.Mul64(uint64(a), uint64(b))
t1 := (a >> 63) & b
t2 := (b >> 63) & a
return uint64(int64(hi_) - t1 - t2)
}
func randomx_reciprocal(divisor uint32) uint64 {
const p2exp63 = uint64(1) << 63
quotient := p2exp63 / uint64(divisor)
remainder := p2exp63 % uint64(divisor)
shift := bits.Len32(divisor)
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
}
func signExtend2sCompl(x uint32) uint64 {
return uint64(int64(int32(x)))
}

157
superscalar_instruction.go Normal file
View file

@ -0,0 +1,157 @@
package randomx
import "git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
// SuperScalarInstruction superscalar program is built with superscalar instructions
type SuperScalarInstruction struct {
Opcode byte
Dst int
Src int
Mod byte
Imm32 uint32
Imm64 uint64
OpGroup int
OpGroupPar int
GroupParIsSource int
ins *Instruction
CanReuse bool
}
func (sins *SuperScalarInstruction) FixSrcReg() {
if sins.Src == 0xff {
sins.Src = sins.Dst
}
}
func (sins *SuperScalarInstruction) Reset() {
sins.Opcode = 99
sins.Src = 0xff
sins.Dst = 0xff
sins.CanReuse = false
sins.GroupParIsSource = 0
}
func createSuperScalarInstruction(sins *SuperScalarInstruction, ins *Instruction, gen *blake2.Generator) {
sins.Reset()
sins.ins = ins
sins.OpGroupPar = -1
sins.Opcode = ins.Opcode
switch ins.Opcode {
case S_ISUB_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IXOR_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IXOR_R
sins.GroupParIsSource = 1
case S_IADD_RS:
sins.Mod = gen.GetByte()
// set modshift on Imm32
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
//sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IMUL_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMUL_R
sins.GroupParIsSource = 1
case S_IROR_C:
sins.Mod = 0
for sins.Imm32 = 0; sins.Imm32 == 0; {
sins.Imm32 = uint32(gen.GetByte() & 63)
}
sins.OpGroup = S_IROR_C
sins.OpGroupPar = -1
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IADD_C7
sins.OpGroupPar = -1
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IXOR_C7
sins.OpGroupPar = -1
case S_IMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_ISMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_ISMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_IMUL_RCP:
sins.Mod = 0
for {
sins.Imm32 = gen.GetUint32()
if (sins.Imm32&sins.Imm32 - 1) != 0 {
break
}
}
sins.Imm64 = reciprocal(sins.Imm32)
sins.OpGroup = S_IMUL_RCP
default:
panic("should not occur")
}
}
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
var slot10 = []*Instruction{&IMUL_RCP}
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *blake2.Generator, instructionLen int, decoderType DecoderType, last, first bool) {
switch instructionLen {
case 3:
if last {
createSuperScalarInstruction(sins, slot3L[gen.GetByte()&3], gen)
} else {
createSuperScalarInstruction(sins, slot3[gen.GetByte()&1], gen)
}
case 4:
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
if decoderType == Decoder4444 && !last {
createSuperScalarInstruction(sins, &IMUL_R, gen)
} else {
createSuperScalarInstruction(sins, slot4[gen.GetByte()&1], gen)
}
case 7:
createSuperScalarInstruction(sins, slot7[gen.GetByte()&1], gen)
case 8:
createSuperScalarInstruction(sins, slot8[gen.GetByte()&1], gen)
case 9:
createSuperScalarInstruction(sins, slot9[gen.GetByte()&1], gen)
case 10:
createSuperScalarInstruction(sins, slot10[0], gen)
default:
panic("should not be possible")
}
}

138
vm.go
View file

@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"git.gammaspectra.live/P2Pool/go-randomx/v3/aes"
"math"
"runtime"
"unsafe"
@ -45,16 +45,30 @@ type REG struct {
type VM struct {
ScratchPad ScratchPad
Dataset Randomx_Dataset
Dataset Dataset
JITProgram VMProgramFunc
program ByteCode
jitProgram VMProgramFunc
}
// Run calculate hash based on input
func NewVM(dataset Dataset) *VM {
vm := &VM{
Dataset: dataset,
}
if dataset.Cache().HasJIT() {
vm.jitProgram = mapProgram(nil, int(RandomXCodeSize))
if dataset.Flags()&RANDOMX_FLAG_SECURE == 0 {
mapProgramRWX(vm.jitProgram)
}
}
return vm
}
// run calculate hash based on input. Not thread safe.
// Warning: Underlying callers will run float64 SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
reg.FPRC = roundingMode
@ -64,49 +78,64 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
entropy := (*[16]uint64)(unsafe.Pointer(&buffer))
prog := buffer[len(entropy)*8:]
// do more initialization before we run
for i := range entropy[:8] {
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
}
var mem MemoryRegisters
// memory registers
var ma, mx uint32
mem.ma = entropy[8] & CacheLineAlignMask
mem.mx = entropy[10]
ma = uint32(entropy[8] & CacheLineAlignMask)
mx = uint32(entropy[10])
addressRegisters := entropy[12]
var readReg [4]uint64
for i := range readReg {
readReg[i] = uint64(i*2) + (addressRegisters & 1)
addressRegisters >>= 1
}
datasetOffset := (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
datasetOffset := (entropy[13] % (DatasetExtraItems + 1)) * CacheLineSize
eMask := [2]uint64{EMask(entropy[14]), EMask(entropy[15])}
eMask := [2]uint64{ExponentMask(entropy[14]), ExponentMask(entropy[15])}
byteCode := CompileProgramToByteCode(prog)
prog := buffer[len(entropy)*8:]
CompileProgramToByteCode(prog, &vm.program)
spAddr0 := mem.mx
spAddr1 := mem.ma
datasetMemory := vm.Dataset.Memory()
var rlCache RegisterLine
var jitProgram VMProgramFunc
if vm.JITProgram != nil {
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
mapProgramRW(vm.JITProgram)
byteCode.generateCode(vm.JITProgram)
mapProgramRX(vm.JITProgram)
if vm.jitProgram != nil {
if datasetMemory == nil {
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
mapProgramRW(vm.jitProgram)
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
mapProgramRX(vm.jitProgram)
} else {
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
}
} else {
byteCode.generateCode(vm.JITProgram)
// full mode and we have JIT
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
mapProgramRW(vm.jitProgram)
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
mapProgramRX(vm.jitProgram)
} else {
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
}
vm.jitProgram.ExecuteFull(&reg, &vm.ScratchPad, &datasetMemory[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
return reg
}
}
spAddr0 := uint64(mx)
spAddr1 := uint64(ma)
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
@ -131,22 +160,23 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
}
// Run the actual bytecode
if vm.JITProgram != nil {
vm.JITProgram.Execute(&reg, &vm.ScratchPad, eMask)
// run the actual bytecode
if jitProgram != nil {
// light mode
jitProgram.Execute(&reg, &vm.ScratchPad, eMask)
} else {
byteCode.Execute(&reg, &vm.ScratchPad, eMask)
vm.program.Execute(&reg, &vm.ScratchPad, eMask)
}
mem.mx ^= reg.R[readReg[2]] ^ reg.R[readReg[3]]
mem.mx &= CacheLineAlignMask
mx ^= uint32(reg.R[readReg[2]] ^ reg.R[readReg[3]])
mx &= uint32(CacheLineAlignMask)
vm.Dataset.PrefetchDataset(datasetOffset + mem.mx)
// execute diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(datasetOffset+mem.ma, &reg.R, &rlCache)
vm.Dataset.PrefetchDataset(datasetOffset + uint64(mx))
// execute / load output from diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(datasetOffset+uint64(ma), &reg.R)
// swap the elements
mem.mx, mem.ma = mem.ma, mem.mx
mx, ma = ma, mx
for i := uint64(0); i < RegistersCount; i++ {
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
@ -165,17 +195,17 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
}
runtime.KeepAlive(buffer)
return reg
}
func (vm *VM) InitScratchpad(seed *[64]byte) {
func (vm *VM) initScratchpad(seed *[64]byte) {
vm.ScratchPad.Init(seed)
}
func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
hash512, _ := blake2b.New512(nil)
func (vm *VM) runLoops(tempHash [64]byte) RegisterFile {
if lockThreadDueToRoundingMode {
// Lock thread due to rounding mode flags
runtime.LockOSThread()
@ -185,20 +215,16 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
roundingMode := uint8(0)
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
reg := vm.Run(tempHash, roundingMode)
reg := vm.run(tempHash, roundingMode)
roundingMode = reg.FPRC
hash512.Reset()
// write R, F, E, A registers
hash512.Write(reg.Memory()[:])
tempHash = blake2b.Sum512(reg.Memory()[:])
runtime.KeepAlive(reg)
hash512.Sum(tempHash[:0])
}
// final loop executes here
reg := vm.Run(tempHash, roundingMode)
reg := vm.run(tempHash, roundingMode)
// always force a restore
reg.FPRC = 0xff
@ -208,33 +234,29 @@ func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
return reg
}
// CalculateHash Not thread safe.
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
tempHash := blake2b.Sum512(input)
vm.InitScratchpad(&tempHash)
vm.initScratchpad(&tempHash)
reg := vm.RunLoops(tempHash)
reg := vm.runLoops(tempHash)
// now hash the scratch pad as it will act as register A
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
hash256, _ := blake2b.New256(nil)
regMem := reg.Memory()
// write hash onto register A
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], tempHash[:])
hash256.Reset()
// write R, F, E registers
hash256.Write(reg.Memory()[:RegisterFileSize-RegistersCountFloat*2*8])
// write R, F, E, A registers
*output = blake2b.Sum256(regMem[:])
runtime.KeepAlive(reg)
// write register A
hash256.Write(tempHash[:])
hash256.Sum(output[:0])
}
func (vm *VM) Close() error {
if vm.JITProgram != nil {
return vm.JITProgram.Close()
if vm.jitProgram != nil {
return vm.jitProgram.Close()
}
return nil
}

View file

@ -11,6 +11,114 @@ import (
//go:noescape
func vm_run(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64, jmp uintptr)
//go:noescape
func vm_run_full(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations, memoryRegisters uint64, eMask [2]uint64, jmp uintptr)
/*
#define RANDOMX_DATASET_BASE_SIZE 2147483648
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
mov ecx, ebp ;# ecx = ma
;#and ecx, RANDOMX_DATASET_BASE_MASK
and ecx, 2147483584
xor r8, qword ptr [rdi+rcx]
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
;#and edx, RANDOMX_DATASET_BASE_MASK
and edx, 2147483584
prefetchnta byte ptr [rdi+rdx]
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]
*/
var programReadDataset = []byte{0x89, 0xE9, 0x81, 0xE1, 0xC0, 0xFF, 0xFF, 0x7F, 0x4C, 0x33, 0x04, 0x0F, 0x48, 0xC1, 0xCD, 0x20, 0x48, 0x31, 0xC5, 0x89, 0xEA, 0x81, 0xE2, 0xC0, 0xFF, 0xFF, 0x7F, 0x0F, 0x18, 0x04, 0x17, 0x4C, 0x33, 0x4C, 0x0F, 0x08, 0x4C, 0x33, 0x54, 0x0F, 0x10, 0x4C, 0x33, 0x5C, 0x0F, 0x18, 0x4C, 0x33, 0x64, 0x0F, 0x20, 0x4C, 0x33, 0x6C, 0x0F, 0x28, 0x4C, 0x33, 0x74, 0x0F, 0x30, 0x4C, 0x33, 0x7C, 0x0F, 0x38}
/*
lea rcx, [rsi+rax]
push rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
push rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14
*/
var programLoopLoad = []byte{0x48, 0x8D, 0x0C, 0x06, 0x51, 0x4C, 0x33, 0x01, 0x4C, 0x33, 0x49, 0x08, 0x4C, 0x33, 0x51, 0x10, 0x4C, 0x33, 0x59, 0x18, 0x4C, 0x33, 0x61, 0x20, 0x4C, 0x33, 0x69, 0x28, 0x4C, 0x33, 0x71, 0x30, 0x4C, 0x33, 0x79, 0x38, 0x48, 0x8D, 0x0C, 0x16, 0x51, 0xF3, 0x0F, 0xE6, 0x01, 0xF3, 0x0F, 0xE6, 0x49, 0x08, 0xF3, 0x0F, 0xE6, 0x51, 0x10, 0xF3, 0x0F, 0xE6, 0x59, 0x18, 0xF3, 0x0F, 0xE6, 0x61, 0x20, 0xF3, 0x0F, 0xE6, 0x69, 0x28, 0xF3, 0x0F, 0xE6, 0x71, 0x30, 0xF3, 0x0F, 0xE6, 0x79, 0x38, 0x41, 0x0F, 0x54, 0xE5, 0x41, 0x0F, 0x54, 0xED, 0x41, 0x0F, 0x54, 0xF5, 0x41, 0x0F, 0x54, 0xFD, 0x41, 0x0F, 0x56, 0xE6, 0x41, 0x0F, 0x56, 0xEE, 0x41, 0x0F, 0x56, 0xF6, 0x41, 0x0F, 0x56, 0xFE}
/*
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11
mov qword ptr [rcx+32], r12
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
movupd xmmword ptr [rcx+0], xmm0
movupd xmmword ptr [rcx+16], xmm1
movupd xmmword ptr [rcx+32], xmm2
movupd xmmword ptr [rcx+48], xmm3
;#movapd xmmword ptr [rcx+0], xmm0
;#movapd xmmword ptr [rcx+16], xmm1
;#movapd xmmword ptr [rcx+32], xmm2
;#movapd xmmword ptr [rcx+48], xmm3
*/
//var programLoopStore = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x29, 0x01, 0x66, 0x0F, 0x29, 0x49, 0x10, 0x66, 0x0F, 0x29, 0x51, 0x20, 0x66, 0x0F, 0x29, 0x59, 0x30}
var programLoopStore = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x11, 0x01, 0x66, 0x0F, 0x11, 0x49, 0x10, 0x66, 0x0F, 0x11, 0x51, 0x20, 0x66, 0x0F, 0x11, 0x59, 0x30}
/*
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
mov rdx, rax
;#and eax, RANDOMX_SCRATCHPAD_MASK
and eax, 2097088
ror rdx, 32
;#and edx, RANDOMX_SCRATCHPAD_MASK
and edx, 2097088
*/
var programCalculateSpAddrs = []byte{0x48, 0x89, 0xC2, 0x25, 0xC0, 0xFF, 0x1F, 0x00, 0x48, 0xC1, 0xCA, 0x20, 0x81, 0xE2, 0xC0, 0xFF, 0x1F, 0x00}
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations uint64, ma, mx uint32, eMask [2]uint64) {
if f == nil {
panic("program is nil")
}
jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f)))
vm_run_full(rf, pad, dataset, iterations, (uint64(ma)<<32)|uint64(mx), eMask, jmpPtr)
}
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
if f == nil {
panic("program is nil")
@ -20,15 +128,22 @@ func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint6
vm_run(rf, pad, eMask, jmpPtr)
}
func (c *ByteCode) generateCode(program []byte) {
func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
program = program[:0]
isFullMode := readReg != nil
if isFullMode {
program = append(program, programCalculateSpAddrs...)
// prologue
program = append(program, programLoopLoad...)
}
var instructionOffsets [RANDOMX_PROGRAM_SIZE]int32
var codePos int32
for ix := range c {
instructionOffsets[ix] = codePos
curLen := len(program)
instructionOffsets[ix] = int32(len(program))
instr := &c[ix]
switch instr.Opcode {
@ -254,10 +369,10 @@ func (c *ByteCode) generateCode(program []byte) {
reg := instr.Dst
target := instr.jumpTarget() + 1
jmpOffset := instructionOffsets[target] - (codePos + 16)
jmpOffset := instructionOffsets[target] - (int32(len(program)) + 16)
if BranchesWithin32B {
branchBegin := uint32(codePos + 7)
branchBegin := uint32(int32(len(program)) + 7)
branchEnd := branchBegin
if jmpOffset >= -128 {
branchEnd += 9
@ -305,8 +420,51 @@ func (c *ByteCode) generateCode(program []byte) {
case VM_NOP:
program = append(program, NOP1...)
}
codePos += int32(len(program) - curLen)
}
if isFullMode {
// end of prologue
program = append(program, REX_MOV_RR...)
program = append(program, 0xc0+byte(readReg[2]))
program = append(program, REX_XOR_EAX...)
program = append(program, 0xc0+byte(readReg[3]))
// read dataset
program = append(program, programReadDataset...)
// epilogue
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+byte(readReg[0]))
program = append(program, REX_XOR_RAX_R64...)
program = append(program, 0xc0+byte(readReg[1]))
//todo: prefetch scratchpad
program = append(program, programLoopStore...)
if BranchesWithin32B {
branchBegin := uint32(len(program))
branchEnd := branchBegin + 9
// If the jump crosses or touches 32-byte boundary, align it
if (branchBegin ^ branchEnd) >= 32 {
alignmentSize := 32 - (branchBegin & 31)
if alignmentSize > 8 {
program = append(program, NOPX[alignmentSize-9][:alignmentSize-8]...)
alignmentSize = 8
}
program = append(program, NOPX[alignmentSize-1][:alignmentSize]...)
}
}
program = append(program, SUB_EBX...)
program = append(program, JNZ...)
program = binary.LittleEndian.AppendUint32(program, uint32(-len(program)-4))
//exit otherwise
}
program = append(program, RET)
return program
}

View file

@ -34,8 +34,6 @@ TEXT ·vm_run(SB),$8-40
VMOVUPD (28*8)(AX), X10
VMOVUPD (30*8)(AX), X11
//TODO: rest of init
// mantissa mask
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
MOVQ $0x00ffffffffffffff, AX
@ -89,3 +87,107 @@ TEXT ·vm_run(SB),$8-40
// a0-a3 are constant, no need to move
RET
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
TEXT ·vm_run_full(SB),$32-64
// move register file to registers
MOVQ rf+0(FP), AX
PREFETCHNTA 0(AX)
// r0-r7
MOVQ (0*8)(AX), R8
MOVQ (1*8)(AX), R9
MOVQ (2*8)(AX), R10
MOVQ (3*8)(AX), R11
MOVQ (4*8)(AX), R12
MOVQ (5*8)(AX), R13
MOVQ (6*8)(AX), R14
MOVQ (7*8)(AX), R15
// f0-f3
VMOVUPD (8*8)(AX), X0
VMOVUPD (10*8)(AX), X1
VMOVUPD (12*8)(AX), X2
VMOVUPD (14*8)(AX), X3
// e0-e3
VMOVUPD (16*8)(AX), X4
VMOVUPD (18*8)(AX), X5
VMOVUPD (20*8)(AX), X6
VMOVUPD (22*8)(AX), X7
// load constants a0-a3
VMOVUPD (24*8)(AX), X8
VMOVUPD (26*8)(AX), X9
VMOVUPD (28*8)(AX), X10
VMOVUPD (30*8)(AX), X11
//TODO: rest of init
// mantissa mask
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
MOVQ $0x00ffffffffffffff, AX
VMOVQ AX, X13
VPBROADCASTQ X13, X13
// eMask
VMOVDQU64 eMask+40(FP), X14
// scale mask
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
MOVQ $0x80F0000000000000, AX
VMOVQ AX, X15
VPBROADCASTQ X15, X15
// scratchpad pointer on rsi
MOVQ pad+8(FP), SI
// dataset pointer on rdi
MOVQ dataset+16(FP), DI
// iterations on rbx
MOVQ iterations+24(FP), BX
// ma and mx on rbp TODO: change this
MOVQ memoryRegisters+32(FP), BP
// do ma/mx calcs
MOVQ BP, AX
RORQ $32, BP
//AX = spAddr0
//DX = spAddr1
// JIT location
MOVQ jmp+56(FP), CX
// jump to JIT code
// this handles readReg[0-3] and dataset reading, load, stores
CALL CX
// move register file back to registers
MOVQ rf+0(FP), AX
PREFETCHT0 0(AX)
// r0-r7
MOVQ R8, (0*8)(AX)
MOVQ R9, (1*8)(AX)
MOVQ R10, (2*8)(AX)
MOVQ R11, (3*8)(AX)
MOVQ R12, (4*8)(AX)
MOVQ R13, (5*8)(AX)
MOVQ R14, (6*8)(AX)
MOVQ R15, (7*8)(AX)
// f0-f3
VMOVUPD X0, (8*8)(AX)
VMOVUPD X1, (10*8)(AX)
VMOVUPD X2, (12*8)(AX)
VMOVUPD X3, (14*8)(AX)
// e0-e3
VMOVUPD X4, (16*8)(AX)
VMOVUPD X5, (18*8)(AX)
VMOVUPD X6, (20*8)(AX)
VMOVUPD X7, (22*8)(AX)
// a0-a3 are constant, no need to move
RET

View file

@ -2,10 +2,13 @@
package randomx
func (c *ByteCode) generateCode(program []byte) {
func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
return nil
}
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
}
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations uint64, ma, mx uint32, eMask [2]uint64) {
}

View file

@ -3,7 +3,7 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
"math"
"math/bits"
)

View file

@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"git.gammaspectra.live/P2Pool/go-randomx/v3/aes"
"unsafe"
)
import "encoding/binary"
@ -63,7 +63,7 @@ func (ins VM_Instruction) Opcode() byte {
// CompileProgramToByteCode this will interpret single vm instruction into executable opcodes
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
func CompileProgramToByteCode(prog []byte, bc *ByteCode) {
var registerUsage [RegistersCount]int
for i := range registerUsage {
@ -194,7 +194,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
divisor := instr.IMM()
if !isZeroOrPowerOf2(divisor) {
ibc.Opcode = VM_IMUL_I
ibc.Imm = randomx_reciprocal(divisor)
ibc.Imm = reciprocal(divisor)
registerUsage[dst] = i
} else {
ibc.Opcode = VM_NOP
@ -355,9 +355,6 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
}
}
return bc
}
type ScratchPad [ScratchpadSize]byte