Compare commits
25 commits
Author | SHA1 | Date | |
---|---|---|---|
DataHoarder | b0265950b6 | ||
DataHoarder | c41d6c8080 | ||
DataHoarder | 9aa3631f37 | ||
DataHoarder | 9826b7beb4 | ||
DataHoarder | acfff4a4ad | ||
DataHoarder | a458a18f07 | ||
DataHoarder | cceea5b0ba | ||
DataHoarder | 8b063bde61 | ||
DataHoarder | c50cbc56b5 | ||
DataHoarder | 1d83de4880 | ||
DataHoarder | 25b7fc4cc0 | ||
DataHoarder | 3f70ec75be | ||
DataHoarder | 55d6161f6e | ||
DataHoarder | 36f1a90a20 | ||
DataHoarder | 4903cd7407 | ||
DataHoarder | d20dd880ce | ||
DataHoarder | d72726b0fe | ||
DataHoarder | 34cfab4176 | ||
DataHoarder | a71d8f6a2e | ||
DataHoarder | 14a10f544f | ||
DataHoarder | ef069318b9 | ||
DataHoarder | 80f473de54 | ||
DataHoarder | fe253fb825 | ||
DataHoarder | 699ce02f2d | ||
DataHoarder | b35751462b |
96
.drone.yml
96
.drone.yml
|
@ -24,7 +24,7 @@ steps:
|
|||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
|
||||
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
|
@ -51,7 +51,7 @@ steps:
|
|||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -tags disable_jit -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
|
||||
- go test -tags disable_jit -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
|
@ -78,7 +78,7 @@ steps:
|
|||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
|
||||
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
|
@ -105,7 +105,7 @@ steps:
|
|||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
|
||||
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
|
@ -132,7 +132,7 @@ steps:
|
|||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
|
||||
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
|
@ -158,7 +158,7 @@ steps:
|
|||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
|
||||
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
|
@ -184,5 +184,87 @@ steps:
|
|||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
|
||||
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
-
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: go-arm-asm
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
environment:
|
||||
GOPROXY: direct
|
||||
GOARCH: arm
|
||||
GOARM: 7
|
||||
GOOS: linux
|
||||
GOTRACEBACK: 2
|
||||
GOEXPERIMENT: "cgocheck2,newinliner"
|
||||
CGO_ENABLED: "0"
|
||||
|
||||
workspace:
|
||||
path: /drone/src
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: golang:1.22-alpine3.19
|
||||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: go-arm-purego
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
environment:
|
||||
GOPROXY: direct
|
||||
GOARCH: arm
|
||||
GOARM: 7
|
||||
GOOS: linux
|
||||
GOTRACEBACK: 2
|
||||
GOEXPERIMENT: "cgocheck2,newinliner"
|
||||
CGO_ENABLED: "0"
|
||||
|
||||
workspace:
|
||||
path: /drone/src
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: golang:1.22-alpine3.19
|
||||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git
|
||||
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
---
|
||||
kind: pipeline
|
||||
type: docker
|
||||
name: go-wasm-purego
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
environment:
|
||||
GOPROXY: direct
|
||||
GOARCH: wasm
|
||||
GOOS: wasip1
|
||||
GOTRACEBACK: 2
|
||||
GOEXPERIMENT: "cgocheck2,newinliner"
|
||||
CGO_ENABLED: "0"
|
||||
|
||||
workspace:
|
||||
path: /drone/src
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: golang:1.22-alpine3.19
|
||||
commands:
|
||||
- apk update
|
||||
- apk add --no-cache git bash
|
||||
- apk add --no-cache wasmtime --repository=https://dl-cdn.alpinelinux.org/alpine/edge/testing
|
||||
- PATH=$PATH:$(go env GOROOT)/misc/wasm go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
|
||||
...
|
43
README.md
43
README.md
|
@ -1,28 +1,41 @@
|
|||
# RandomX (Golang Implementation)
|
||||
RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs.
|
||||
RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware.
|
||||
|
||||
---
|
||||
|
||||
Fork from [git.dero.io/DERO_Foundation/RandomX](https://git.dero.io/DERO_Foundation/RandomX). Also related, their [Analysis of RandomX writeup](https://medium.com/deroproject/analysis-of-randomx-dde9dfe9bbc6).
|
||||
|
||||
Original code failed RandomX testcases and was implemented using big.Float.
|
||||
|
||||
This package implements RandomX without CGO, using only Golang code, pure float64 ops and two small assembly sections to implement CFROUND modes, with optional soft float implementation.
|
||||
---
|
||||
|
||||
This package implements RandomX without CGO, using only Golang code, native float64 ops, some assembly, but with optional soft float _purego_ implementation.
|
||||
|
||||
All test cases pass properly.
|
||||
|
||||
Uses minimal Go assembly due to having to set rounding mode natively. Native hard float can be added with supporting rounding mode under _asm_.
|
||||
Supports Full mode and Light mode.
|
||||
|
||||
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime.
|
||||
For the C++ implementation and design of RandomX, see [github.com/tevador/RandomX](https://github.com/tevador/RandomX)
|
||||
|
||||
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.
|
||||
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|
||||
|:---------------------:|:-----------:|:------------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|
|
||||
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| Full Mode | ❌ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ |
|
||||
| Float Operations | **hw** | **hw** | **hw** | **hw** | soft | soft | soft | soft |
|
||||
| AES Operations | soft | **hw** | soft | soft | soft | soft | soft | soft |
|
||||
| Superscalar Execution | interpreter | **compiler** | interpreter | interpreter | interpreter | interpreter | interpreter | interpreter |
|
||||
| VM Execution | interpreter | **compiler** | interpreter | interpreter | soft | soft | soft | soft |
|
||||
|
||||
| Platform | Supported | Hard Float | SuperScalar JIT | Notes |
|
||||
|:-----------:|:---------:|:----------:|:---------------:|:----------------:|
|
||||
| **386** | ✅ | ✅ | ❌ | |
|
||||
| **amd64** | ✅ | ✅ | ✅* | JIT only on Unix |
|
||||
| **arm** | ✅* | ❌ | ❌ | |
|
||||
| **arm64** | ✅ | ✅ | ❌ | |
|
||||
| **mips** | ✅* | ❌ | ❌ | |
|
||||
| **mips64** | ✅* | ❌ | ❌ | |
|
||||
| **riscv64** | ✅* | ❌ | ❌ | |
|
||||
| **wasm** | ✅* | ❌ | ❌ | |
|
||||
|
||||
* these platforms only support software floating point / purego and will not be performant.
|
||||
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build tag manually.
|
||||
|
||||
[TinyGo](https://github.com/tinygo-org/tinygo) is supported under the `purego` build tag.
|
||||
|
||||
Any platform with no hard float support or when enabled manually will use soft float, using [softfloat64](https://git.gammaspectra.live/P2Pool/softfloat64). This will be very slow.
|
||||
|
||||
Full mode is NOT recommended in 32-bit systems and is unsupported, although depending on system it might be able to run. You might want to manually run `runtime.GC()` if cleaning up dataset to free memory.
|
||||
|
||||
Native hard float can be added with supporting rounding mode under _asm_.
|
||||
|
||||
JIT only supported under Unix systems (Linux, *BSD, macOS), and can be hard-disabled via the `disable_jit` build flag, or at runtime.
|
142
aes/hash.go
142
aes/hash.go
|
@ -1,142 +0,0 @@
|
|||
/*
|
||||
Copyright (c) 2019 DERO Foundation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package aes
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// HashAes1Rx4
|
||||
//
|
||||
// Calculate a 512-bit hash of 'input' using 4 lanes of AES.
|
||||
// The input is treated as a set of round keys for the encryption
|
||||
// of the initial state.
|
||||
//
|
||||
// 'inputSize' must be a multiple of 64.
|
||||
//
|
||||
// For a 2 MiB input, this has the same security as 32768-round
|
||||
// AES encryption.
|
||||
//
|
||||
// Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
func HashAes1Rx4(input []byte, output *[64]byte) {
|
||||
if len(input)%64 != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// states are copied
|
||||
states := keys.AesHash1R_State
|
||||
|
||||
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
|
||||
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
|
||||
|
||||
soft_aesenc(&states[0], &in[0])
|
||||
soft_aesdec(&states[1], &in[1])
|
||||
soft_aesenc(&states[2], &in[2])
|
||||
soft_aesdec(&states[3], &in[3])
|
||||
}
|
||||
|
||||
soft_aesenc(&states[0], &keys.AesHash1R_XKeys[0])
|
||||
soft_aesdec(&states[1], &keys.AesHash1R_XKeys[0])
|
||||
soft_aesenc(&states[2], &keys.AesHash1R_XKeys[0])
|
||||
soft_aesdec(&states[3], &keys.AesHash1R_XKeys[0])
|
||||
|
||||
soft_aesenc(&states[0], &keys.AesHash1R_XKeys[1])
|
||||
soft_aesdec(&states[1], &keys.AesHash1R_XKeys[1])
|
||||
soft_aesenc(&states[2], &keys.AesHash1R_XKeys[1])
|
||||
soft_aesdec(&states[3], &keys.AesHash1R_XKeys[1])
|
||||
|
||||
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
|
||||
}
|
||||
|
||||
// FillAes1Rx4
|
||||
//
|
||||
// Fill 'output' with pseudorandom data based on 512-bit 'state'.
|
||||
// The state is encrypted using a single AES round per 16 bytes of output
|
||||
// in 4 lanes.
|
||||
//
|
||||
// 'output' size must be a multiple of 64.
|
||||
//
|
||||
// The modified state is written back to 'state' to allow multiple
|
||||
// calls to this function.
|
||||
func FillAes1Rx4(state *[64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// Reference to state without copying
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
soft_aesdec(&states[0], &keys.AesGenerator1R_Keys[0])
|
||||
soft_aesenc(&states[1], &keys.AesGenerator1R_Keys[1])
|
||||
soft_aesdec(&states[2], &keys.AesGenerator1R_Keys[2])
|
||||
soft_aesenc(&states[3], &keys.AesGenerator1R_Keys[3])
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
// FillAes4Rx4 used to generate final program
|
||||
func FillAes4Rx4(state [64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// state is copied on caller
|
||||
|
||||
// Copy state
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(&state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[0])
|
||||
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[0])
|
||||
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[4])
|
||||
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[4])
|
||||
|
||||
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[1])
|
||||
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[1])
|
||||
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[5])
|
||||
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[5])
|
||||
|
||||
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[2])
|
||||
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[2])
|
||||
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[6])
|
||||
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[6])
|
||||
|
||||
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[3])
|
||||
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[3])
|
||||
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[7])
|
||||
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[7])
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
package argon2
|
||||
|
||||
import "golang.org/x/crypto/blake2b"
|
||||
|
||||
import (
|
||||
_ "golang.org/x/crypto/argon2"
|
||||
_ "unsafe"
|
||||
)
|
||||
|
||||
const BlockSize uint32 = 1024
|
||||
|
||||
type Block [BlockSize / 8]uint64
|
||||
|
||||
const syncPoints = 4
|
||||
|
||||
//go:linkname initHash golang.org/x/crypto/argon2.initHash
|
||||
func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
|
||||
|
||||
//go:linkname initBlocks golang.org/x/crypto/argon2.initBlocks
|
||||
func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []Block
|
||||
|
||||
//go:linkname processBlocks golang.org/x/crypto/argon2.processBlocks
|
||||
func processBlocks(B []Block, time, memory, threads uint32, mode int)
|
||||
|
||||
// BuildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call
|
||||
func BuildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []Block {
|
||||
if time < 1 {
|
||||
panic("argon2: number of rounds too small")
|
||||
}
|
||||
if threads < 1 {
|
||||
panic("argon2: parallelism degree too low")
|
||||
}
|
||||
const mode = 0 /* argon2d */
|
||||
h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
|
||||
|
||||
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
|
||||
if memory < 2*syncPoints*uint32(threads) {
|
||||
memory = 2 * syncPoints * uint32(threads)
|
||||
}
|
||||
B := initBlocks(&h0, memory, uint32(threads))
|
||||
processBlocks(B, time, memory, uint32(threads), mode)
|
||||
|
||||
return B
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
//go:build 386 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
// stmxcsr reads the MXCSR control and status register.
|
||||
//
|
||||
//go:noescape
|
||||
func stmxcsr(addr *uint32)
|
||||
|
||||
// ldmxcsr writes to the MXCSR control and status register.
|
||||
//
|
||||
//go:noescape
|
||||
func ldmxcsr(addr *uint32)
|
||||
|
||||
func setRoundingMode(mode uint8) {
|
||||
var csr uint32
|
||||
stmxcsr(&csr)
|
||||
csr = (csr & (^uint32(0x6000))) | ((uint32(mode) & 3) << 13)
|
||||
ldmxcsr(&csr)
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
//go:build 386 && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// stmxcsr reads the MXCSR control and status register.
|
||||
TEXT ·stmxcsr(SB),NOSPLIT|NOFRAME,$0-4
|
||||
MOVL addr+0(FP), SI
|
||||
STMXCSR (SI)
|
||||
RET
|
||||
|
||||
// ldmxcsr writes to the MXCSR control and status register.
|
||||
TEXT ·ldmxcsr(SB),NOSPLIT|NOFRAME,$0-4
|
||||
MOVL addr+0(FP), SI
|
||||
LDMXCSR (SI)
|
||||
RET
|
|
@ -1,20 +0,0 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
// stmxcsr reads the MXCSR control and status register.
|
||||
//
|
||||
//go:noescape
|
||||
func stmxcsr(addr *uint32)
|
||||
|
||||
// ldmxcsr writes to the MXCSR control and status register.
|
||||
//
|
||||
//go:noescape
|
||||
func ldmxcsr(addr *uint32)
|
||||
|
||||
func setRoundingMode(mode uint8) {
|
||||
var csr uint32
|
||||
stmxcsr(&csr)
|
||||
csr = (csr & (^uint32(0x6000))) | ((uint32(mode) & 3) << 13)
|
||||
ldmxcsr(&csr)
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// stmxcsr reads the MXCSR control and status register.
|
||||
TEXT ·stmxcsr(SB),NOSPLIT|NOFRAME,$0-8
|
||||
MOVQ addr+0(FP), SI
|
||||
STMXCSR (SI)
|
||||
RET
|
||||
|
||||
// ldmxcsr writes to the MXCSR control and status register.
|
||||
TEXT ·ldmxcsr(SB),NOSPLIT|NOFRAME,$0-8
|
||||
MOVQ addr+0(FP), SI
|
||||
LDMXCSR (SI)
|
||||
RET
|
50
blake2b.go
50
blake2b.go
|
@ -1,50 +0,0 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"golang.org/x/crypto/blake2b"
|
||||
)
|
||||
|
||||
type Blake2Generator struct {
|
||||
data [64]byte
|
||||
dataindex int
|
||||
allocRegIndex [8]int
|
||||
allocRegisters [8]Register
|
||||
}
|
||||
|
||||
func Init_Blake2Generator(key []byte, nonce uint32) *Blake2Generator {
|
||||
var b Blake2Generator
|
||||
b.dataindex = len(b.data)
|
||||
if len(key) > 60 {
|
||||
copy(b.data[:], key[0:60])
|
||||
} else {
|
||||
copy(b.data[:], key)
|
||||
}
|
||||
binary.LittleEndian.PutUint32(b.data[60:], nonce)
|
||||
|
||||
return &b
|
||||
}
|
||||
|
||||
func (b *Blake2Generator) checkdata(bytesNeeded int) {
|
||||
if b.dataindex+bytesNeeded > cap(b.data) {
|
||||
//blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
h := blake2b.Sum512(b.data[:])
|
||||
copy(b.data[:], h[:])
|
||||
b.dataindex = 0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (b *Blake2Generator) GetByte() byte {
|
||||
b.checkdata(1)
|
||||
ret := b.data[b.dataindex]
|
||||
b.dataindex++
|
||||
return ret
|
||||
}
|
||||
func (b *Blake2Generator) GetUint32() uint32 {
|
||||
b.checkdata(4)
|
||||
ret := binary.LittleEndian.Uint32(b.data[b.dataindex:])
|
||||
b.dataindex += 4
|
||||
|
||||
return ret
|
||||
}
|
220
cache.go
220
cache.go
|
@ -1,55 +1,83 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
|
||||
"errors"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/argon2"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
|
||||
"runtime"
|
||||
"slices"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type MemoryBlock [128]uint64
|
||||
type MemoryBlock [argon2.BlockSize / 8]uint64
|
||||
|
||||
func (m *MemoryBlock) GetLine(addr uint64) *RegisterLine {
|
||||
addr >>= 3
|
||||
//[addr : addr+8 : addr+8]
|
||||
return (*RegisterLine)(unsafe.Add(unsafe.Pointer(m), addr*8))
|
||||
return (*RegisterLine)(unsafe.Pointer(unsafe.SliceData(m[addr : addr+8 : addr+8])))
|
||||
}
|
||||
|
||||
type Randomx_Cache struct {
|
||||
Blocks []MemoryBlock
|
||||
type Cache struct {
|
||||
blocks *[RANDOMX_ARGON_MEMORY]MemoryBlock
|
||||
|
||||
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
|
||||
programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
|
||||
|
||||
JitPrograms [RANDOMX_PROGRAM_COUNT]ProgramFunc
|
||||
jitPrograms [RANDOMX_PROGRAM_COUNT]SuperScalarProgramFunc
|
||||
|
||||
Flags uint64
|
||||
flags Flags
|
||||
}
|
||||
|
||||
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
|
||||
if flags == RANDOMX_FLAG_DEFAULT {
|
||||
flags = RANDOMX_FLAG_JIT
|
||||
}
|
||||
return &Randomx_Cache{
|
||||
Flags: flags,
|
||||
// NewCache Creates a randomx_cache structure and allocates memory for RandomX Cache.
|
||||
// *
|
||||
// * @param flags is any combination of these 2 flags (each flag can be set or not set):
|
||||
// * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
|
||||
// * RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes
|
||||
// * subsequent Dataset initialization faster
|
||||
// * Optionally, one of these two flags may be selected:
|
||||
// * RANDOMX_FLAG_ARGON2_SSSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set
|
||||
// * makes subsequent cache initialization faster
|
||||
// * RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set
|
||||
// * makes subsequent cache initialization faster
|
||||
// *
|
||||
// * @return Pointer to an allocated randomx_cache structure.
|
||||
// * Returns NULL if:
|
||||
// * (1) memory allocation fails
|
||||
// * (2) the RANDOMX_FLAG_JIT is set and JIT compilation is not supported on the current platform
|
||||
// * (3) an invalid or unsupported RANDOMX_FLAG_ARGON2 value is set
|
||||
// */
|
||||
func NewCache(flags Flags) (c *Cache, err error) {
|
||||
|
||||
var blocks *[RANDOMX_ARGON_MEMORY]MemoryBlock
|
||||
|
||||
if flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
|
||||
if largePageAllocator == nil {
|
||||
return nil, errors.New("huge pages not supported")
|
||||
}
|
||||
blocks, err = memory.Allocate[[RANDOMX_ARGON_MEMORY]MemoryBlock](largePageAllocator)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
blocks, err = memory.Allocate[[RANDOMX_ARGON_MEMORY]MemoryBlock](cacheLineAlignedAllocator)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &Cache{
|
||||
flags: flags,
|
||||
blocks: blocks,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) HasJIT() bool {
|
||||
return cache.Flags&RANDOMX_FLAG_JIT > 0 && cache.JitPrograms[0] != nil
|
||||
func (c *Cache) hasInitializedJIT() bool {
|
||||
return c.flags.HasJIT() && c.jitPrograms[0] != nil
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) VM_Initialize() *VM {
|
||||
|
||||
return &VM{
|
||||
Dataset: &Randomx_DatasetLight{
|
||||
Cache: cache,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Close() error {
|
||||
for _, p := range cache.JitPrograms {
|
||||
// Close Releases all memory occupied by the Cache structure.
|
||||
func (c *Cache) Close() error {
|
||||
for _, p := range c.jitPrograms {
|
||||
if p != nil {
|
||||
err := p.Close()
|
||||
if err != nil {
|
||||
|
@ -57,45 +85,63 @@ func (cache *Randomx_Cache) Close() error {
|
|||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
if c.flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
|
||||
return memory.Free(largePageAllocator, c.blocks)
|
||||
} else {
|
||||
return memory.Free(cacheLineAlignedAllocator, c.blocks)
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Init(key []byte) {
|
||||
// Lock due to external JIT madness
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
// Init Initializes the cache memory and SuperscalarHash using the provided key value.
|
||||
// Does nothing if called again with the same key value.
|
||||
func (c *Cache) Init(key []byte) {
|
||||
//TODO: cache key and do not regenerate
|
||||
|
||||
kkey := slices.Clone(key)
|
||||
argonBlocks := unsafe.Slice((*argon2.Block)(unsafe.Pointer(c.blocks)), len(c.blocks))
|
||||
|
||||
argonBlocks := argon2.BuildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
|
||||
argon2.BuildBlocks(argonBlocks, key, []byte(RANDOMX_ARGON_SALT), RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES)
|
||||
|
||||
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argon2.Block{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
|
||||
const nonce uint32 = 0
|
||||
|
||||
cache.Blocks = memoryBlocks
|
||||
gen := blake2.New(key, nonce)
|
||||
for i := range c.programs {
|
||||
// build a superscalar program
|
||||
prog := BuildSuperScalarProgram(gen)
|
||||
|
||||
nonce := uint32(0) //uint32(len(key))
|
||||
gen := Init_Blake2Generator(key, nonce)
|
||||
for i := 0; i < 8; i++ {
|
||||
cache.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
|
||||
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
|
||||
cache.JitPrograms[i] = generateSuperscalarCode(cache.Programs[i])
|
||||
if c.flags.HasJIT() {
|
||||
c.jitPrograms[i] = generateSuperscalarCode(prog)
|
||||
// fallback if can't compile program
|
||||
if c.jitPrograms[i] == nil {
|
||||
c.programs[i] = prog
|
||||
} else if err := memory.PageReadExecute(c.jitPrograms[i]); err != nil {
|
||||
c.programs[i] = prog
|
||||
} else {
|
||||
c.programs[i] = SuperScalarProgram{prog[0]}
|
||||
}
|
||||
} else {
|
||||
c.programs[i] = prog
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// GetMixBlock fetch a 64 byte block in uint64 form
|
||||
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
|
||||
const Mask = CacheSize/CacheLineSize - 1
|
||||
|
||||
mask := CacheSize/CacheLineSize - 1
|
||||
// getMixBlock fetch a 64 byte block in uint64 form
|
||||
func (c *Cache) getMixBlock(addr uint64) *RegisterLine {
|
||||
|
||||
addr = (addr & mask) * CacheLineSize
|
||||
addr = (addr & Mask) * CacheLineSize
|
||||
|
||||
block := addr / 1024
|
||||
return cache.Blocks[block].GetLine(addr % 1024)
|
||||
return c.blocks[block].GetLine(addr % 1024)
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
|
||||
func (c *Cache) GetMemory() *[RANDOMX_ARGON_MEMORY]MemoryBlock {
|
||||
return c.blocks
|
||||
}
|
||||
|
||||
func (c *Cache) initDataset(rl *RegisterLine, itemNumber uint64) {
|
||||
registerValue := itemNumber
|
||||
|
||||
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
|
||||
|
@ -107,51 +153,45 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
|
|||
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
|
||||
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
|
||||
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
program := cache.Programs[i]
|
||||
|
||||
executeSuperscalar(program.Program(), rl)
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
if c.hasInitializedJIT() {
|
||||
if c.flags.HasJIT() {
|
||||
// Lock due to external JIT madness
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
}
|
||||
|
||||
registerValue = rl[program.AddressRegister()]
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := c.getMixBlock(registerValue)
|
||||
|
||||
c.jitPrograms[i].Execute(uintptr(unsafe.Pointer(rl)))
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
registerValue = rl[c.programs[i].AddressRegister()]
|
||||
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := c.getMixBlock(registerValue)
|
||||
|
||||
program := c.programs[i]
|
||||
|
||||
executeSuperscalar(program.Program(), rl)
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
registerValue = rl[program.AddressRegister()]
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
|
||||
registerValue := itemNumber
|
||||
|
||||
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
|
||||
rl[1] = rl[0] ^ keys.SuperScalar_Constants[1]
|
||||
rl[2] = rl[0] ^ keys.SuperScalar_Constants[2]
|
||||
rl[3] = rl[0] ^ keys.SuperScalar_Constants[3]
|
||||
rl[4] = rl[0] ^ keys.SuperScalar_Constants[4]
|
||||
rl[5] = rl[0] ^ keys.SuperScalar_Constants[5]
|
||||
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
|
||||
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
|
||||
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
cache.JitPrograms[i].Execute(rl)
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
registerValue = rl[cache.Programs[i].AddressRegister()]
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
|
||||
panic("todo")
|
||||
func (c *Cache) datasetInit(dataset []RegisterLine, startItem, endItem uint64) {
|
||||
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
|
||||
cache.InitDatasetItem(&dataset[0], itemNumber)
|
||||
c.initDataset(&dataset[0], itemNumber)
|
||||
}
|
||||
}
|
||||
|
|
101
cache_test.go
Normal file
101
cache_test.go
Normal file
|
@ -0,0 +1,101 @@
|
|||
package randomx
|
||||
|
||||
import "testing"
|
||||
|
||||
func Test_Cache_Init(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cache, err := NewCache(GetFlags())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer cache.Close()
|
||||
cache.Init(Tests[1].key)
|
||||
|
||||
memory := cache.GetMemory()
|
||||
|
||||
var tests = []struct {
|
||||
index int
|
||||
value uint64
|
||||
}{
|
||||
{0, 0x191e0e1d23c02186},
|
||||
{1568413, 0xf1b62fe6210bf8b1},
|
||||
{33554431, 0x1f47f056d05cd99b},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
if memory[tt.index/128][tt.index%128] != tt.value {
|
||||
t.Errorf("i=%d, index=%d", i, tt.index)
|
||||
t.Errorf("expected=%016x, actual=%016x", tt.value, memory[tt.index/128][tt.index%128])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func Test_Cache_InitDataset(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var tests = []struct {
|
||||
index int
|
||||
value uint64
|
||||
}{
|
||||
{0, 0x680588a85ae222db},
|
||||
{10000000, 0x7943a1f6186ffb72},
|
||||
{20000000, 0x9035244d718095e1},
|
||||
{30000000, 0x145a5091f7853099},
|
||||
}
|
||||
|
||||
t.Run("interpreter", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
flags := GetFlags()
|
||||
flags &^= RANDOMX_FLAG_JIT
|
||||
|
||||
cache, err := NewCache(flags)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer cache.Close()
|
||||
cache.Init(Tests[1].key)
|
||||
|
||||
var datasetItem RegisterLine
|
||||
|
||||
for i, tt := range tests {
|
||||
cache.initDataset(&datasetItem, uint64(tt.index))
|
||||
if datasetItem[0] != tt.value {
|
||||
t.Errorf("i=%d, index=%d", i, tt.index)
|
||||
t.Errorf("expected=%016x, actual=%016x", tt.value, datasetItem[0])
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("compiler", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
flags := GetFlags()
|
||||
flags |= RANDOMX_FLAG_JIT
|
||||
if !flags.HasJIT() {
|
||||
t.Skip("not supported on this platform")
|
||||
}
|
||||
|
||||
cache, err := NewCache(flags)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer cache.Close()
|
||||
cache.Init(Tests[1].key)
|
||||
if !cache.hasInitializedJIT() {
|
||||
t.Skip("not supported on this platform")
|
||||
}
|
||||
|
||||
var datasetItem RegisterLine
|
||||
|
||||
for i, tt := range tests {
|
||||
cache.initDataset(&datasetItem, uint64(tt.index))
|
||||
if datasetItem[0] != tt.value {
|
||||
t.Errorf("i=%d, index=%d", i, tt.index)
|
||||
t.Errorf("expected=%016x, actual=%016x", tt.value, datasetItem[0])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
15
commitment.go
Normal file
15
commitment.go
Normal file
|
@ -0,0 +1,15 @@
|
|||
package randomx
|
||||
|
||||
import "golang.org/x/crypto/blake2b"
|
||||
|
||||
// CalculateCommitment Calculate a RandomX commitment from a RandomX hash and its input.
|
||||
func CalculateCommitment(input []byte, hashIn, hashOut *[RANDOMX_HASH_SIZE]byte) {
|
||||
hasher, err := blake2b.New(RANDOMX_HASH_SIZE, nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
hasher.Write(input)
|
||||
hasher.Write(hashIn[:])
|
||||
hasher.Sum(hashOut[:0])
|
||||
}
|
41
commitment_test.go
Normal file
41
commitment_test.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Test_CalculateCommitment(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cache, err := NewCache(GetFlags())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer cache.Close()
|
||||
|
||||
test := Tests[1]
|
||||
|
||||
cache.Init(test.key)
|
||||
|
||||
vm, err := NewVM(GetFlags(), cache, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer vm.Close()
|
||||
|
||||
var outputHash [RANDOMX_HASH_SIZE]byte
|
||||
|
||||
vm.CalculateHash(test.input, &outputHash)
|
||||
CalculateCommitment(test.input, &outputHash, &outputHash)
|
||||
|
||||
outputHex := hex.EncodeToString(outputHash[:])
|
||||
|
||||
expected := "d53ccf348b75291b7be76f0a7ac8208bbced734b912f6fca60539ab6f86be919"
|
||||
|
||||
if expected != outputHex {
|
||||
t.Errorf("key=%v, input=%v", test.key, test.input)
|
||||
t.Errorf("expected=%s, actual=%s", expected, outputHex)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
16
config.go
16
config.go
|
@ -29,7 +29,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
package randomx
|
||||
|
||||
import "git.gammaspectra.live/P2Pool/go-randomx/v2/argon2"
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/argon2"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
|
||||
)
|
||||
|
||||
// see reference configuration.h
|
||||
// Cache size in KiB. Must be a power of 2.
|
||||
|
@ -81,7 +84,9 @@ const RANDOMX_JUMP_BITS = 8
|
|||
// Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
|
||||
const RANDOMX_JUMP_OFFSET = 8
|
||||
|
||||
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
|
||||
const RANDOMX_HASH_SIZE = 32
|
||||
|
||||
const DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
|
||||
|
||||
const SuperscalarMaxSize = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
|
||||
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64
|
||||
|
@ -106,9 +111,10 @@ const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
|
|||
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
|
||||
const STOREL3CONDITION = 14
|
||||
|
||||
const RANDOMX_FLAG_DEFAULT = uint64(0)
|
||||
const RANDOMX_FLAG_JIT = uint64(1 << iota)
|
||||
|
||||
func isZeroOrPowerOf2(x uint32) bool {
|
||||
return (x & (x - 1)) == 0
|
||||
}
|
||||
|
||||
var largePageAllocator = memory.NewLargePageAllocator()
|
||||
var pageAllocator = memory.NewPageAllocator()
|
||||
var cacheLineAlignedAllocator = memory.NewAlignedAllocator(CacheLineSize)
|
||||
|
|
112
dataset.go
112
dataset.go
|
@ -1,7 +1,111 @@
|
|||
package randomx
|
||||
|
||||
type Randomx_Dataset interface {
|
||||
InitDataset(startItem, endItem uint64)
|
||||
ReadDataset(address uint64, r, cache *RegisterLine)
|
||||
PrefetchDataset(address uint64)
|
||||
import (
|
||||
"errors"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
|
||||
|
||||
const DatasetItemCount = DatasetSize / CacheLineSize
|
||||
|
||||
type Dataset struct {
|
||||
memory []RegisterLine
|
||||
flags Flags
|
||||
}
|
||||
|
||||
// NewDataset Creates a randomx_dataset structure and allocates memory for RandomX Dataset.
|
||||
// Only one flag is supported (can be set or not set): RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
|
||||
// Returns nil if allocation fails
|
||||
func NewDataset(flags Flags) (result *Dataset, err error) {
|
||||
defer func() {
|
||||
//catch too large memory allocation or unable to allocate, for example on 32-bit targets or out of memory
|
||||
if r := recover(); r != nil {
|
||||
result = nil
|
||||
if e, ok := r.(error); ok && e != nil {
|
||||
err = e
|
||||
} else {
|
||||
err = errors.New("out of memory")
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
var alignedMemory []RegisterLine
|
||||
|
||||
if flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
|
||||
if largePageAllocator == nil {
|
||||
return nil, errors.New("huge pages not supported")
|
||||
}
|
||||
alignedMemory, err = memory.AllocateSlice[RegisterLine](largePageAllocator, DatasetItemCount)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
alignedMemory, err = memory.AllocateSlice[RegisterLine](cacheLineAlignedAllocator, DatasetItemCount)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &Dataset{
|
||||
memory: alignedMemory,
|
||||
flags: flags,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Dataset) prefetchDataset(address uint64) {
|
||||
|
||||
}
|
||||
|
||||
func (d *Dataset) readDataset(address uint64, r *RegisterLine) {
|
||||
cache := &d.memory[address/CacheLineSize]
|
||||
|
||||
for i := range r {
|
||||
r[i] ^= cache[i]
|
||||
}
|
||||
}
|
||||
|
||||
// Memory Returns a pointer to the internal memory buffer of the dataset structure.
|
||||
// The size of the internal memory buffer is DatasetItemCount * RANDOMX_DATASET_ITEM_SIZE.
|
||||
func (d *Dataset) Memory() []RegisterLine {
|
||||
return d.memory
|
||||
}
|
||||
|
||||
func (d *Dataset) InitDataset(cache *Cache, startItem, itemCount uint64) {
|
||||
if startItem >= DatasetItemCount || itemCount > DatasetItemCount {
|
||||
panic("out of range")
|
||||
}
|
||||
if startItem+itemCount > DatasetItemCount {
|
||||
panic("out of range")
|
||||
}
|
||||
cache.datasetInit(d.memory[startItem:startItem+itemCount], startItem, startItem+itemCount)
|
||||
}
|
||||
|
||||
func (d *Dataset) Close() error {
|
||||
if d.flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
|
||||
return memory.FreeSlice(largePageAllocator, d.memory)
|
||||
} else {
|
||||
return memory.FreeSlice(cacheLineAlignedAllocator, d.memory)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Dataset) InitDatasetParallel(cache *Cache, n int) {
|
||||
n = max(1, n)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := uint64(1); i < uint64(n); i++ {
|
||||
a := (DatasetItemCount * i) / uint64(n)
|
||||
b := (DatasetItemCount * (i + 1)) / uint64(n)
|
||||
|
||||
wg.Add(1)
|
||||
go func(a, b uint64) {
|
||||
defer wg.Done()
|
||||
d.InitDataset(cache, a, b-a)
|
||||
}(a, b)
|
||||
}
|
||||
|
||||
d.InitDataset(cache, 0, DatasetItemCount/uint64(n))
|
||||
wg.Wait()
|
||||
}
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
package randomx
|
||||
|
||||
type Randomx_DatasetLight struct {
|
||||
Cache *Randomx_Cache
|
||||
Memory []uint64
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
|
||||
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLine) {
|
||||
if d.Cache.HasJIT() {
|
||||
d.Cache.InitDatasetItemJIT(cache, address/CacheLineSize)
|
||||
} else {
|
||||
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
|
||||
}
|
||||
|
||||
for i := range r {
|
||||
r[i] ^= cache[i]
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) {
|
||||
//d.Cache.initDataset(d.Cache.Programs)
|
||||
}
|
14
exec.go
14
exec.go
|
@ -1,3 +1,15 @@
|
|||
package randomx
|
||||
|
||||
type ProgramFunc []byte
|
||||
import "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
|
||||
|
||||
type SuperScalarProgramFunc []byte
|
||||
|
||||
type VMProgramFunc []byte
|
||||
|
||||
func (f SuperScalarProgramFunc) Close() error {
|
||||
return memory.FreeSlice(pageAllocator, f)
|
||||
}
|
||||
|
||||
func (f VMProgramFunc) Close() error {
|
||||
return memory.FreeSlice(pageAllocator, f)
|
||||
}
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
//go:build !unix || disable_jit || purego
|
||||
|
||||
package randomx
|
||||
|
||||
func (f ProgramFunc) Execute(rl *RegisterLine) {
|
||||
|
||||
}
|
||||
|
||||
func (f ProgramFunc) Close() error {
|
||||
return nil
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
//go:build unix && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func (f ProgramFunc) Execute(rl *RegisterLine) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
memoryPtr := &f
|
||||
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
|
||||
|
||||
fun(rl)
|
||||
}
|
||||
|
||||
func (f ProgramFunc) Close() error {
|
||||
return unix.Munmap(f)
|
||||
}
|
||||
|
||||
func mapProgram(program []byte) ProgramFunc {
|
||||
// Write only
|
||||
execFunc, err := unix.Mmap(-1, 0, len(program), unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// Introduce machine code into the memory region
|
||||
copy(execFunc, program)
|
||||
|
||||
// uphold W^X
|
||||
|
||||
// Read and Exec only
|
||||
err = unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_EXEC)
|
||||
if err != nil {
|
||||
defer func() {
|
||||
// unmap if we err
|
||||
err := unix.Munmap(execFunc)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return execFunc
|
||||
}
|
68
flags.go
Normal file
68
flags.go
Normal file
|
@ -0,0 +1,68 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
|
||||
"golang.org/x/sys/cpu"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
type Flags uint64
|
||||
|
||||
func (f Flags) Has(flags Flags) bool {
|
||||
return f&flags == flags
|
||||
}
|
||||
|
||||
func (f Flags) HasJIT() bool {
|
||||
return f.Has(RANDOMX_FLAG_JIT) && supportsJIT
|
||||
}
|
||||
|
||||
const RANDOMX_FLAG_DEFAULT Flags = 0
|
||||
|
||||
const (
|
||||
// RANDOMX_FLAG_LARGE_PAGES Select large page allocation for dataset
|
||||
RANDOMX_FLAG_LARGE_PAGES = Flags(1 << iota)
|
||||
// RANDOMX_FLAG_HARD_AES Selects between hardware or software AES
|
||||
RANDOMX_FLAG_HARD_AES
|
||||
// RANDOMX_FLAG_FULL_MEM Selects between full or light mode dataset
|
||||
RANDOMX_FLAG_FULL_MEM
|
||||
// RANDOMX_FLAG_JIT Enables JIT features
|
||||
RANDOMX_FLAG_JIT
|
||||
// RANDOMX_FLAG_SECURE Enables W^X for JIT code
|
||||
RANDOMX_FLAG_SECURE
|
||||
RANDOMX_FLAG_ARGON2_SSSE3
|
||||
RANDOMX_FLAG_ARGON2_AVX2
|
||||
RANDOMX_FLAG_ARGON2 = RANDOMX_FLAG_ARGON2_AVX2 | RANDOMX_FLAG_ARGON2_SSSE3
|
||||
)
|
||||
|
||||
// GetFlags The recommended flags to be used on the current machine.
|
||||
// Does not include:
|
||||
// * RANDOMX_FLAG_LARGE_PAGES
|
||||
// * RANDOMX_FLAG_FULL_MEM
|
||||
// * RANDOMX_FLAG_SECURE
|
||||
// These flags must be added manually if desired.
|
||||
//
|
||||
// On OpenBSD RANDOMX_FLAG_SECURE is enabled by default in JIT mode as W^X is enforced by the OS.
|
||||
func GetFlags() (flags Flags) {
|
||||
flags = RANDOMX_FLAG_DEFAULT
|
||||
if runtime.GOARCH == "amd64" {
|
||||
flags |= RANDOMX_FLAG_JIT
|
||||
|
||||
if aes.HasHardAESImplementation && cpu.X86.HasAES {
|
||||
flags |= RANDOMX_FLAG_HARD_AES
|
||||
}
|
||||
|
||||
if cpu.X86.HasSSSE3 {
|
||||
flags |= RANDOMX_FLAG_ARGON2_SSSE3
|
||||
}
|
||||
|
||||
if cpu.X86.HasAVX2 {
|
||||
flags |= RANDOMX_FLAG_ARGON2_AVX2
|
||||
}
|
||||
}
|
||||
|
||||
if runtime.GOOS == "openbsd" || runtime.GOOS == "netbsd" || ((runtime.GOOS == "darwin" || runtime.GOOS == "ios") && runtime.GOARCH == "arm64") {
|
||||
flags |= RANDOMX_FLAG_SECURE
|
||||
}
|
||||
|
||||
return flags
|
||||
}
|
2
go.mod
2
go.mod
|
@ -1,4 +1,4 @@
|
|||
module git.gammaspectra.live/P2Pool/go-randomx/v2
|
||||
module git.gammaspectra.live/P2Pool/go-randomx/v3
|
||||
|
||||
go 1.21
|
||||
|
||||
|
|
69
internal/aes/hard_amd64.go
Normal file
69
internal/aes/hard_amd64.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package aes
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
|
||||
"golang.org/x/sys/cpu"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const HasHardAESImplementation = true
|
||||
|
||||
type hardAES struct {
|
||||
}
|
||||
|
||||
func NewHardAES() AES {
|
||||
if cpu.X86.HasAES {
|
||||
return hardAES{}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (aes hardAES) HashAes1Rx4(input []byte, output *[64]byte) {
|
||||
if len(input)%len(output) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
asm.HashAes1Rx4(&keys.AesHash1R_State, &keys.AesHash1R_XKeys, output, unsafe.SliceData(input), uint64(len(input)))
|
||||
}
|
||||
|
||||
func (aes hardAES) FillAes1Rx4(state *[64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// Reference to state without copying
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(state))
|
||||
asm.FillAes1Rx4(states, &keys.AesGenerator1R_Keys, unsafe.SliceData(output), uint64(len(output)))
|
||||
runtime.KeepAlive(state)
|
||||
}
|
||||
|
||||
func (aes hardAES) FillAes4Rx4(state [64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// state is copied on caller
|
||||
|
||||
// Copy state
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(&state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys0)
|
||||
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys1)
|
||||
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys2)
|
||||
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys3)
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
func (aes hardAES) HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte) {
|
||||
//TODO
|
||||
aes.HashAes1Rx4(scratchpad, output)
|
||||
aes.FillAes1Rx4(fillState, scratchpad)
|
||||
}
|
9
internal/aes/hard_generic.go
Normal file
9
internal/aes/hard_generic.go
Normal file
|
@ -0,0 +1,9 @@
|
|||
//go:build !amd64 || purego
|
||||
|
||||
package aes
|
||||
|
||||
const HasHardAESImplementation = false
|
||||
|
||||
func NewHardAES() AES {
|
||||
return nil
|
||||
}
|
59
internal/aes/hash.go
Normal file
59
internal/aes/hash.go
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
Copyright (c) 2019 DERO Foundation. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package aes
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
|
||||
)
|
||||
|
||||
var fillAes4Rx4Keys0 = [4][4]uint32{
|
||||
keys.AesGenerator4R_Keys[0],
|
||||
keys.AesGenerator4R_Keys[0],
|
||||
keys.AesGenerator4R_Keys[4],
|
||||
keys.AesGenerator4R_Keys[4],
|
||||
}
|
||||
var fillAes4Rx4Keys1 = [4][4]uint32{
|
||||
keys.AesGenerator4R_Keys[1],
|
||||
keys.AesGenerator4R_Keys[1],
|
||||
keys.AesGenerator4R_Keys[5],
|
||||
keys.AesGenerator4R_Keys[5],
|
||||
}
|
||||
var fillAes4Rx4Keys2 = [4][4]uint32{
|
||||
keys.AesGenerator4R_Keys[2],
|
||||
keys.AesGenerator4R_Keys[2],
|
||||
keys.AesGenerator4R_Keys[6],
|
||||
keys.AesGenerator4R_Keys[6],
|
||||
}
|
||||
var fillAes4Rx4Keys3 = [4][4]uint32{
|
||||
keys.AesGenerator4R_Keys[3],
|
||||
keys.AesGenerator4R_Keys[3],
|
||||
keys.AesGenerator4R_Keys[7],
|
||||
keys.AesGenerator4R_Keys[7],
|
||||
}
|
38
internal/aes/impl.go
Normal file
38
internal/aes/impl.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
package aes
|
||||
|
||||
type AES interface {
|
||||
|
||||
// HashAes1Rx4
|
||||
//
|
||||
// Calculate a 512-bit hash of 'input' using 4 lanes of AES.
|
||||
// The input is treated as a set of round keys for the encryption
|
||||
// of the initial state.
|
||||
//
|
||||
// 'input' size must be a multiple of 64.
|
||||
//
|
||||
// For a 2 MiB input, this has the same security as 32768-round
|
||||
// AES encryption.
|
||||
//
|
||||
// Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
HashAes1Rx4(input []byte, output *[64]byte)
|
||||
|
||||
// FillAes1Rx4
|
||||
//
|
||||
// Fill 'output' with pseudorandom data based on 512-bit 'state'.
|
||||
// The state is encrypted using a single AES round per 16 bytes of output
|
||||
// in 4 lanes.
|
||||
//
|
||||
// 'output' size must be a multiple of 64.
|
||||
//
|
||||
// The modified state is written back to 'state' to allow multiple
|
||||
// calls to this function.
|
||||
FillAes1Rx4(state *[64]byte, output []byte)
|
||||
|
||||
// HashAndFillAes1Rx4 Hashes and fills scratchpad and output in one sweep
|
||||
HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte)
|
||||
|
||||
// FillAes4Rx4 used to generate final program
|
||||
//
|
||||
// 'state' is copied when calling
|
||||
FillAes4Rx4(state [64]byte, output []byte)
|
||||
}
|
|
@ -29,3 +29,24 @@ func soft_aesdec(state *[4]uint32, key *[4]uint32) {
|
|||
state[2] = key[2] ^ td0[uint8(s2)] ^ td1[uint8(s1>>8)] ^ td2[uint8(s0>>16)] ^ td3[uint8(s3>>24)]
|
||||
state[3] = key[3] ^ td0[uint8(s3)] ^ td1[uint8(s2>>8)] ^ td2[uint8(s1>>16)] ^ td3[uint8(s0>>24)]
|
||||
}
|
||||
|
||||
func soft_aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
|
||||
soft_aesdec(&states[0], &keys[0])
|
||||
soft_aesenc(&states[1], &keys[1])
|
||||
soft_aesdec(&states[2], &keys[2])
|
||||
soft_aesenc(&states[3], &keys[3])
|
||||
}
|
||||
|
||||
func soft_aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
|
||||
soft_aesenc(&states[0], &keys[0])
|
||||
soft_aesdec(&states[1], &keys[1])
|
||||
soft_aesenc(&states[2], &keys[2])
|
||||
soft_aesdec(&states[3], &keys[3])
|
||||
}
|
||||
|
||||
func soft_aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
|
||||
soft_aesenc(&states[0], key)
|
||||
soft_aesdec(&states[1], key)
|
||||
soft_aesenc(&states[2], key)
|
||||
soft_aesdec(&states[3], key)
|
||||
}
|
75
internal/aes/soft.go
Normal file
75
internal/aes/soft.go
Normal file
|
@ -0,0 +1,75 @@
|
|||
package aes
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type softAES struct {
|
||||
}
|
||||
|
||||
func NewSoftAES() AES {
|
||||
return softAES{}
|
||||
}
|
||||
|
||||
func (aes softAES) HashAes1Rx4(input []byte, output *[64]byte) {
|
||||
if len(input)%len(output) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
// states are copied
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(output))
|
||||
*states = keys.AesHash1R_State
|
||||
|
||||
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
|
||||
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
|
||||
|
||||
soft_aesroundtrip_encdec(states, in)
|
||||
}
|
||||
|
||||
soft_aesroundtrip_encdec1(states, &keys.AesHash1R_XKeys[0])
|
||||
|
||||
soft_aesroundtrip_encdec1(states, &keys.AesHash1R_XKeys[1])
|
||||
|
||||
runtime.KeepAlive(output)
|
||||
}
|
||||
|
||||
func (aes softAES) FillAes1Rx4(state *[64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
// Reference to state without copying
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
soft_aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
func (aes softAES) FillAes4Rx4(state [64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// state is copied on caller
|
||||
|
||||
// Copy state
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(&state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys0)
|
||||
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys1)
|
||||
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys2)
|
||||
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys3)
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
func (aes softAES) HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte) {
|
||||
//TODO
|
||||
aes.HashAes1Rx4(scratchpad, output)
|
||||
aes.FillAes1Rx4(fillState, scratchpad)
|
||||
}
|
76
internal/argon2/argon2.go
Normal file
76
internal/argon2/argon2.go
Normal file
|
@ -0,0 +1,76 @@
|
|||
package argon2
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"golang.org/x/crypto/blake2b"
|
||||
)
|
||||
|
||||
import (
|
||||
_ "golang.org/x/crypto/argon2"
|
||||
_ "unsafe"
|
||||
)
|
||||
|
||||
const BlockSize uint32 = 1024
|
||||
|
||||
type Block [BlockSize / 8]uint64
|
||||
|
||||
const syncPoints = 4
|
||||
|
||||
//go:linkname initHash golang.org/x/crypto/argon2.initHash
|
||||
func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
|
||||
|
||||
//go:linkname processBlocks golang.org/x/crypto/argon2.processBlocks
|
||||
func processBlocks(B []Block, time, memory, threads uint32, mode int)
|
||||
|
||||
//go:linkname blake2bHash golang.org/x/crypto/argon2.blake2bHash
|
||||
func blake2bHash(out []byte, in []byte)
|
||||
|
||||
// initBlocks From golang.org/x/crypto/argon2.initBlocks with external memory allocation
|
||||
func initBlocks(B []Block, h0 *[blake2b.Size + 8]byte, memory, threads uint32) {
|
||||
var block0 [1024]byte
|
||||
|
||||
clear(B)
|
||||
|
||||
for lane := uint32(0); lane < threads; lane++ {
|
||||
j := lane * (memory / threads)
|
||||
binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane)
|
||||
|
||||
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
|
||||
blake2bHash(block0[:], h0[:])
|
||||
for i := range B[j+0] {
|
||||
B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
|
||||
}
|
||||
|
||||
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
|
||||
blake2bHash(block0[:], h0[:])
|
||||
for i := range B[j+1] {
|
||||
B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BuildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call and external memory allocation
|
||||
func BuildBlocks(B []Block, password, salt []byte, time, memory uint32, threads uint8) {
|
||||
if time < 1 {
|
||||
panic("argon2: number of rounds too small")
|
||||
}
|
||||
if threads < 1 {
|
||||
panic("argon2: parallelism degree too low")
|
||||
}
|
||||
|
||||
if len(B) != int(memory) {
|
||||
panic("argon2: invalid block size")
|
||||
}
|
||||
|
||||
const mode = 0 /* argon2d */
|
||||
const keyLen = 0
|
||||
h0 := initHash(password, salt, nil, nil, time, memory, uint32(threads), keyLen, mode)
|
||||
|
||||
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
|
||||
if memory < 2*syncPoints*uint32(threads) {
|
||||
memory = 2 * syncPoints * uint32(threads)
|
||||
}
|
||||
|
||||
initBlocks(B, &h0, memory, uint32(threads))
|
||||
processBlocks(B, time, memory, uint32(threads), mode)
|
||||
}
|
18
internal/asm/aes_amd64.go
Normal file
18
internal/asm/aes_amd64.go
Normal file
|
@ -0,0 +1,18 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
//go:noescape
|
||||
func FillAes1Rx4(states *[4][4]uint32, keys *[4][4]uint32, output *byte, outputLen uint64)
|
||||
|
||||
//go:noescape
|
||||
func HashAes1Rx4(initialState *[4][4]uint32, xKeys *[2][4]uint32, output *[64]byte, input *byte, inputLen uint64)
|
||||
|
||||
//go:noescape
|
||||
func AESRoundTrip_DecEnc(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
func AESRoundTrip_EncDec(states *[4][4]uint32, keys *[4][4]uint32)
|
||||
|
||||
//go:noescape
|
||||
func AESRoundTrip_EncDec1(states *[4][4]uint32, key *[4]uint32)
|
172
internal/asm/aes_amd64.s
Normal file
172
internal/asm/aes_amd64.s
Normal file
|
@ -0,0 +1,172 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·FillAes1Rx4(SB),NOSPLIT|NOFRAME,$0-32
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
MOVQ output+16(FP), CX
|
||||
MOVQ outputLen+24(FP), DX
|
||||
|
||||
// initial state
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 16(AX), X1
|
||||
VMOVDQU 32(AX), X2
|
||||
VMOVDQU 48(AX), X3
|
||||
|
||||
// keys: X4-X7
|
||||
VMOVDQU 0(BX), X4
|
||||
VMOVDQU 16(BX), X5
|
||||
VMOVDQU 32(BX), X6
|
||||
VMOVDQU 48(BX), X7
|
||||
|
||||
loop:
|
||||
|
||||
AESDEC X4, X0
|
||||
AESENC X5, X1
|
||||
AESDEC X6, X2
|
||||
AESENC X7, X3
|
||||
|
||||
// store state onto output
|
||||
VMOVDQU X0, 0(CX)
|
||||
VMOVDQU X1, 16(CX)
|
||||
VMOVDQU X2, 32(CX)
|
||||
VMOVDQU X3, 48(CX)
|
||||
ADDQ $64, CX
|
||||
|
||||
// outputLen -= 64, continue if not 0
|
||||
SUBQ $64, DX
|
||||
JNE loop
|
||||
|
||||
// offload initial state
|
||||
VMOVDQU X0, 0(AX)
|
||||
VMOVDQU X1, 16(AX)
|
||||
VMOVDQU X2, 32(AX)
|
||||
VMOVDQU X3, 48(AX)
|
||||
RET
|
||||
|
||||
|
||||
TEXT ·HashAes1Rx4(SB),NOSPLIT|NOFRAME,$0-40
|
||||
MOVQ initialState+0(FP), AX
|
||||
|
||||
// initial state
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 16(AX), X1
|
||||
VMOVDQU 32(AX), X2
|
||||
VMOVDQU 48(AX), X3
|
||||
|
||||
|
||||
MOVQ xKeys+8(FP), AX
|
||||
MOVQ output+16(FP), BX
|
||||
MOVQ input+24(FP), CX
|
||||
MOVQ inputLen+32(FP), DX
|
||||
|
||||
loop:
|
||||
// input as keys: X4-X7
|
||||
VMOVDQU 0(CX), X4
|
||||
VMOVDQU 16(CX), X5
|
||||
VMOVDQU 32(CX), X6
|
||||
VMOVDQU 48(CX), X7
|
||||
|
||||
AESENC X4, X0
|
||||
AESDEC X5, X1
|
||||
AESENC X6, X2
|
||||
AESDEC X7, X3
|
||||
|
||||
ADDQ $64, CX
|
||||
// inputLen -= 64, continue if not 0
|
||||
SUBQ $64, DX
|
||||
JNE loop
|
||||
|
||||
// do encdec1 with both keys!
|
||||
VMOVDQU 0(AX), X4
|
||||
VMOVDQU 16(AX), X5
|
||||
|
||||
AESENC X4, X0
|
||||
AESDEC X4, X1
|
||||
AESENC X4, X2
|
||||
AESDEC X4, X3
|
||||
|
||||
AESENC X5, X0
|
||||
AESDEC X5, X1
|
||||
AESENC X5, X2
|
||||
AESDEC X5, X3
|
||||
|
||||
// offload into output
|
||||
VMOVDQU X0, 0(BX)
|
||||
VMOVDQU X1, 16(BX)
|
||||
VMOVDQU X2, 32(BX)
|
||||
VMOVDQU X3, 48(BX)
|
||||
RET
|
||||
|
||||
TEXT ·AESRoundTrip_DecEnc(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 0(BX), X1
|
||||
VMOVDQU 16(AX), X2
|
||||
VMOVDQU 16(BX), X3
|
||||
VMOVDQU 32(AX), X4
|
||||
VMOVDQU 32(BX), X5
|
||||
VMOVDQU 48(AX), X6
|
||||
VMOVDQU 48(BX), X7
|
||||
|
||||
AESDEC X1, X0
|
||||
AESENC X3, X2
|
||||
AESDEC X5, X4
|
||||
AESENC X7, X6
|
||||
|
||||
VMOVDQU X0, 0(AX)
|
||||
VMOVDQU X2, 16(AX)
|
||||
VMOVDQU X4, 32(AX)
|
||||
VMOVDQU X6, 48(AX)
|
||||
RET
|
||||
|
||||
|
||||
TEXT ·AESRoundTrip_EncDec(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ keys+8(FP), BX
|
||||
|
||||
VMOVDQU 0(AX), X0
|
||||
VMOVDQU 0(BX), X1
|
||||
VMOVDQU 16(AX), X2
|
||||
VMOVDQU 16(BX), X3
|
||||
VMOVDQU 32(AX), X4
|
||||
VMOVDQU 32(BX), X5
|
||||
VMOVDQU 48(AX), X6
|
||||
VMOVDQU 48(BX), X7
|
||||
|
||||
AESENC X1, X0
|
||||
AESDEC X3, X2
|
||||
AESENC X5, X4
|
||||
AESDEC X7, X6
|
||||
|
||||
VMOVDQU X0, 0(AX)
|
||||
VMOVDQU X2, 16(AX)
|
||||
VMOVDQU X4, 32(AX)
|
||||
VMOVDQU X6, 48(AX)
|
||||
RET
|
||||
|
||||
|
||||
TEXT ·AESRoundTrip_EncDec1(SB),NOSPLIT|NOFRAME,$0-16
|
||||
MOVQ states+0(FP), AX
|
||||
MOVQ key+8(FP), BX
|
||||
|
||||
VMOVDQU 0(BX), X0
|
||||
VMOVDQU 0(AX), X1
|
||||
VMOVDQU 16(AX), X2
|
||||
VMOVDQU 32(AX), X3
|
||||
VMOVDQU 48(AX), X4
|
||||
|
||||
AESENC X0, X1
|
||||
AESDEC X0, X2
|
||||
AESENC X0, X3
|
||||
AESDEC X0, X4
|
||||
|
||||
VMOVDQU X1, 0(AX)
|
||||
VMOVDQU X2, 16(AX)
|
||||
VMOVDQU X3, 32(AX)
|
||||
VMOVDQU X4, 48(AX)
|
||||
RET
|
||||
|
11
internal/asm/aes_noasm.go
Normal file
11
internal/asm/aes_noasm.go
Normal file
|
@ -0,0 +1,11 @@
|
|||
//go:build !amd64 || purego
|
||||
|
||||
package asm
|
||||
|
||||
func AESRoundEncrypt(state *[4]uint32, key *[4]uint32) {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func AESRoundDecrypt(state *[4]uint32, key *[4]uint32) {
|
||||
panic("not implemented")
|
||||
}
|
5
internal/asm/cpuid_amd64.go
Normal file
5
internal/asm/cpuid_amd64.go
Normal file
|
@ -0,0 +1,5 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
func Cpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
15
internal/asm/cpuid_amd64.s
Normal file
15
internal/asm/cpuid_amd64.s
Normal file
|
@ -0,0 +1,15 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func Cpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·Cpuid(SB), 7, $0
|
||||
XORQ CX, CX
|
||||
MOVL op+0(FP), AX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
6
internal/asm/round_386.go
Normal file
6
internal/asm/round_386.go
Normal file
|
@ -0,0 +1,6 @@
|
|||
//go:build 386 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
//go:noescape
|
||||
func setRoundingMode(mode uint8)
|
21
internal/asm/round_386.s
Normal file
21
internal/asm/round_386.s
Normal file
|
@ -0,0 +1,21 @@
|
|||
//go:build 386 && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·setRoundingMode(SB),NOSPLIT|NOFRAME,$4-1
|
||||
MOVB addr+0(FP), AX
|
||||
ANDL $3, AX
|
||||
ROLL $13, AX
|
||||
|
||||
// get current MXCSR register
|
||||
PUSHL AX
|
||||
STMXCSR 0(SP)
|
||||
|
||||
// put new rounding mode
|
||||
ANDL $~0x6000, 0(SP)
|
||||
ORL AX, 0(SP)
|
||||
|
||||
// store new MXCSR register
|
||||
LDMXCSR 0(SP)
|
||||
POPL AX
|
||||
RET
|
6
internal/asm/round_amd64.go
Normal file
6
internal/asm/round_amd64.go
Normal file
|
@ -0,0 +1,6 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
package asm
|
||||
|
||||
//go:noescape
|
||||
func setRoundingMode(mode uint8)
|
21
internal/asm/round_amd64.s
Normal file
21
internal/asm/round_amd64.s
Normal file
|
@ -0,0 +1,21 @@
|
|||
//go:build amd64 && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·setRoundingMode(SB),NOSPLIT|NOFRAME,$8-1
|
||||
MOVB addr+0(FP), AX
|
||||
ANDQ $3, AX
|
||||
ROLQ $13, AX
|
||||
|
||||
// get current MXCSR register
|
||||
PUSHQ AX
|
||||
STMXCSR 0(SP)
|
||||
|
||||
// put new rounding mode
|
||||
ANDL $~0x6000, 0(SP)
|
||||
ORL AX, 0(SP)
|
||||
|
||||
// store new MXCSR register
|
||||
LDMXCSR 0(SP)
|
||||
POPQ AX
|
||||
RET
|
23
internal/asm/round_arm.go
Normal file
23
internal/asm/round_arm.go
Normal file
|
@ -0,0 +1,23 @@
|
|||
//go:build (arm.6 || arm.7) && !purego
|
||||
|
||||
package asm
|
||||
|
||||
// GetFPSCR returns the value of FPSCR register.
|
||||
func getFPSCR() (value uint32)
|
||||
|
||||
// SetFPSCR writes the FPSCR value.
|
||||
func setFPSCR(value uint32)
|
||||
|
||||
func setRoundingMode(mode uint8) {
|
||||
switch mode {
|
||||
// switch plus/minus infinity
|
||||
case 1:
|
||||
mode = 2
|
||||
case 2:
|
||||
mode = 1
|
||||
|
||||
}
|
||||
fpscr := getFPSCR()
|
||||
fpscr = (fpscr & (^uint32(0x0C00000))) | ((uint32(mode) & 3) << 22)
|
||||
setFPSCR(fpscr)
|
||||
}
|
13
internal/asm/round_arm.s
Normal file
13
internal/asm/round_arm.s
Normal file
|
@ -0,0 +1,13 @@
|
|||
//go:build (arm.6 || arm.7) && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·getFPSCR(SB),NOSPLIT,$0-4
|
||||
WORD $0xeef1ba10 // vmrs r11, fpscr
|
||||
MOVW R11, value+0(FP)
|
||||
RET
|
||||
|
||||
TEXT ·setFPSCR(SB),NOSPLIT,$0-4
|
||||
MOVW value+0(FP), R11
|
||||
WORD $0xeee1ba10 // vmsr fpscr, r11
|
||||
RET
|
|
@ -1,4 +1,4 @@
|
|||
//go:build (!arm64 && !amd64 && !386) || purego
|
||||
//go:build (!arm64 && !(arm.6 || arm.7) && !amd64 && !386) || purego
|
||||
|
||||
package asm
|
||||
|
46
internal/blake2/generator.go
Normal file
46
internal/blake2/generator.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
package blake2
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"golang.org/x/crypto/blake2b"
|
||||
)
|
||||
|
||||
type Generator struct {
|
||||
state [blake2b.Size]byte
|
||||
i int
|
||||
}
|
||||
|
||||
func New(seed []byte, nonce uint32) *Generator {
|
||||
var state [blake2b.Size]byte
|
||||
copy(state[:60], seed)
|
||||
binary.LittleEndian.PutUint32(state[60:], nonce)
|
||||
g := &Generator{
|
||||
i: len(state),
|
||||
state: state,
|
||||
}
|
||||
|
||||
return g
|
||||
}
|
||||
|
||||
func (g *Generator) GetUint32() (v uint32) {
|
||||
if (g.i + 4) > len(g.state) {
|
||||
g.reseed()
|
||||
}
|
||||
v = binary.LittleEndian.Uint32(g.state[g.i:])
|
||||
g.i += 4
|
||||
return v
|
||||
}
|
||||
|
||||
func (g *Generator) GetByte() (v byte) {
|
||||
if (g.i + 1) > len(g.state) {
|
||||
g.reseed()
|
||||
}
|
||||
v = g.state[g.i]
|
||||
g.i++
|
||||
return v
|
||||
}
|
||||
|
||||
func (g *Generator) reseed() {
|
||||
g.state = blake2b.Sum512(g.state[:])
|
||||
g.i = 0
|
||||
}
|
32
internal/memory/aligned.go
Normal file
32
internal/memory/aligned.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
package memory
|
||||
|
||||
import "unsafe"
|
||||
|
||||
type AlignedAllocator uint64
|
||||
|
||||
func NewAlignedAllocator(alignment uint64) Allocator {
|
||||
if !isZeroOrPowerOf2(alignment) {
|
||||
panic("alignment must be a power of 2")
|
||||
}
|
||||
return AlignedAllocator(alignment)
|
||||
}
|
||||
|
||||
func (a AlignedAllocator) AllocMemory(size uint64) ([]byte, error) {
|
||||
if a <= 4 {
|
||||
//slice allocations are 16-byte aligned, fast path
|
||||
return make([]byte, size, max(size, uint64(a))), nil
|
||||
}
|
||||
|
||||
memory := make([]byte, size+uint64(a))
|
||||
ptr := uintptr(unsafe.Pointer(unsafe.SliceData(memory)))
|
||||
align := uint64(a) - (uint64(ptr) & (uint64(a) - 1))
|
||||
if align == uint64(a) {
|
||||
return memory[:size:size], nil
|
||||
}
|
||||
return memory[align : align+size : align+size], nil
|
||||
}
|
||||
|
||||
func (a AlignedAllocator) FreeMemory(memory []byte) error {
|
||||
//let gc free
|
||||
return nil
|
||||
}
|
45
internal/memory/alloc.go
Normal file
45
internal/memory/alloc.go
Normal file
|
@ -0,0 +1,45 @@
|
|||
package memory
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type Allocator interface {
|
||||
AllocMemory(size uint64) ([]byte, error)
|
||||
FreeMemory(memory []byte) error
|
||||
}
|
||||
|
||||
func Allocate[T any](a Allocator) (*T, error) {
|
||||
var zeroType T
|
||||
|
||||
mem, err := a.AllocMemory(uint64(unsafe.Sizeof(zeroType)))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return (*T)(unsafe.Pointer(unsafe.SliceData(mem))), nil
|
||||
}
|
||||
|
||||
func Free[T any](a Allocator, v *T) error {
|
||||
var zeroType T
|
||||
return a.FreeMemory(unsafe.Slice((*byte)(unsafe.Pointer(v)), uint64(unsafe.Sizeof(zeroType))))
|
||||
}
|
||||
|
||||
func AllocateSlice[T any, T2 ~int | ~uint64 | ~uint32](a Allocator, size T2) ([]T, error) {
|
||||
var zeroType T
|
||||
|
||||
mem, err := a.AllocMemory(uint64(unsafe.Sizeof(zeroType)) * uint64(size))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(mem))), size), nil
|
||||
}
|
||||
|
||||
func FreeSlice[T any](a Allocator, v []T) error {
|
||||
var zeroType T
|
||||
|
||||
return a.FreeMemory(unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(v))), uint64(unsafe.Sizeof(zeroType))*uint64(len(v))))
|
||||
}
|
||||
|
||||
func isZeroOrPowerOf2(x uint64) bool {
|
||||
return (x & (x - 1)) == 0
|
||||
}
|
45
internal/memory/large_freebsd.go
Normal file
45
internal/memory/large_freebsd.go
Normal file
|
@ -0,0 +1,45 @@
|
|||
//go:build freebsd && !purego
|
||||
|
||||
package memory
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type LargePageAllocator struct {
|
||||
}
|
||||
|
||||
func NewLargePageAllocator() Allocator {
|
||||
return LargePageAllocator{}
|
||||
}
|
||||
|
||||
/*
|
||||
* Request specific alignment (n == log2 of the desired alignment).
|
||||
*
|
||||
* MAP_ALIGNED_SUPER requests optimal superpage alignment, but does
|
||||
* not enforce a specific alignment.
|
||||
*/
|
||||
//#define MAP_ALIGNED(n) ((n) << MAP_ALIGNMENT_SHIFT)
|
||||
//#define MAP_ALIGNMENT_SHIFT 24
|
||||
//#define MAP_ALIGNMENT_MASK MAP_ALIGNED(0xff)
|
||||
//#define MAP_ALIGNED_SUPER MAP_ALIGNED(1) /* align on a superpage */
|
||||
|
||||
const MAP_ALIGNED_SUPER = 1 << 24
|
||||
|
||||
func (a LargePageAllocator) AllocMemory(size uint64) ([]byte, error) {
|
||||
|
||||
memory, err := unix.Mmap(-1, 0, int(size), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS|MAP_ALIGNED_SUPER)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return memory, nil
|
||||
}
|
||||
|
||||
func (a LargePageAllocator) FreeMemory(memory []byte) error {
|
||||
if memory == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return unix.Munmap(memory)
|
||||
}
|
10
internal/memory/large_other.go
Normal file
10
internal/memory/large_other.go
Normal file
|
@ -0,0 +1,10 @@
|
|||
//go:build openbsd || netbsd || dragonfly || darwin || ios || !unix || purego
|
||||
|
||||
package memory
|
||||
|
||||
var LargePageNoMemoryErr error
|
||||
|
||||
// NewLargePageAllocator Not supported in platform
|
||||
func NewLargePageAllocator() Allocator {
|
||||
return nil
|
||||
}
|
31
internal/memory/large_unix.go
Normal file
31
internal/memory/large_unix.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
//go:build unix && !(freebsd || openbsd || netbsd || dragonfly || darwin || ios) && !purego
|
||||
|
||||
package memory
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type LargePageAllocator struct {
|
||||
}
|
||||
|
||||
func NewLargePageAllocator() Allocator {
|
||||
return LargePageAllocator{}
|
||||
}
|
||||
|
||||
func (a LargePageAllocator) AllocMemory(size uint64) ([]byte, error) {
|
||||
memory, err := unix.Mmap(-1, 0, int(size), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS|unix.MAP_HUGETLB|unix.MAP_POPULATE)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return memory, nil
|
||||
}
|
||||
|
||||
func (a LargePageAllocator) FreeMemory(memory []byte) error {
|
||||
if memory == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return unix.Munmap(memory)
|
||||
}
|
22
internal/memory/pages_other.go
Normal file
22
internal/memory/pages_other.go
Normal file
|
@ -0,0 +1,22 @@
|
|||
//go:build !unix || purego
|
||||
|
||||
package memory
|
||||
|
||||
var PageNoMemoryErr error
|
||||
|
||||
func NewPageAllocator() Allocator {
|
||||
return nil
|
||||
}
|
||||
|
||||
func PageReadWrite(memory []byte) error {
|
||||
panic("not supported")
|
||||
}
|
||||
|
||||
func PageReadExecute(memory []byte) error {
|
||||
panic("not supported")
|
||||
}
|
||||
|
||||
// PageReadWriteExecute Insecure!
|
||||
func PageReadWriteExecute(memory []byte) error {
|
||||
panic("not supported")
|
||||
}
|
46
internal/memory/pages_unix.go
Normal file
46
internal/memory/pages_unix.go
Normal file
|
@ -0,0 +1,46 @@
|
|||
//go:build unix && !purego
|
||||
|
||||
package memory
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var PageNoMemoryErr = unix.ENOMEM
|
||||
|
||||
type PageAllocator struct {
|
||||
}
|
||||
|
||||
func NewPageAllocator() Allocator {
|
||||
return PageAllocator{}
|
||||
}
|
||||
|
||||
func (a PageAllocator) AllocMemory(size uint64) ([]byte, error) {
|
||||
memory, err := unix.Mmap(-1, 0, int(size), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return memory, nil
|
||||
}
|
||||
|
||||
func (a PageAllocator) FreeMemory(memory []byte) error {
|
||||
if memory == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return unix.Munmap(memory)
|
||||
}
|
||||
|
||||
func PageReadWrite(memory []byte) error {
|
||||
return unix.Mprotect(memory, unix.PROT_READ|unix.PROT_WRITE)
|
||||
}
|
||||
|
||||
func PageReadExecute(memory []byte) error {
|
||||
return unix.Mprotect(memory, unix.PROT_READ|unix.PROT_EXEC)
|
||||
}
|
||||
|
||||
// PageReadWriteExecute Insecure!
|
||||
func PageReadWriteExecute(memory []byte) error {
|
||||
return unix.Mprotect(memory, unix.PROT_READ|unix.PROT_WRITE|unix.PROT_EXEC)
|
||||
}
|
268
jit_amd64.go
Normal file
268
jit_amd64.go
Normal file
|
@ -0,0 +1,268 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
|
||||
)
|
||||
|
||||
const supportsJIT = true
|
||||
|
||||
/*
|
||||
|
||||
REGISTER ALLOCATION:
|
||||
|
||||
; rax -> temporary
|
||||
; rbx -> todo: iteration counter "ic"
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> scratchpad pointer
|
||||
; rdi -> todo: dataset pointer
|
||||
; rbp -> (do not use, it's used by Golang sampling) jump target //todo: memory registers "ma" (high 32 bits), "mx" (low 32 bits)
|
||||
; rsp -> stack pointer
|
||||
; r8 -> "r0"
|
||||
; r9 -> "r1"
|
||||
; r10 -> "r2"
|
||||
; r11 -> "r3"
|
||||
; r12 -> "r4"
|
||||
; r13 -> "r5"
|
||||
; r14 -> "r6"
|
||||
; r15 -> "r7"
|
||||
; xmm0 -> "f0"
|
||||
; xmm1 -> "f1"
|
||||
; xmm2 -> "f2"
|
||||
; xmm3 -> "f3"
|
||||
; xmm4 -> "e0"
|
||||
; xmm5 -> "e1"
|
||||
; xmm6 -> "e2"
|
||||
; xmm7 -> "e3"
|
||||
; xmm8 -> "a0"
|
||||
; xmm9 -> "a1"
|
||||
; xmm10 -> "a2"
|
||||
; xmm11 -> "a3"
|
||||
; xmm12 -> temporary
|
||||
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
|
||||
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
|
||||
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
||||
|
||||
*/
|
||||
|
||||
const MaxRandomXInstrCodeSize = 32 //FDIV_M requires up to 32 bytes of x86 code
|
||||
const MaxSuperscalarInstrSize = 14 //IMUL_RCP requires 14 bytes of x86 code
|
||||
const SuperscalarProgramHeader = 128 //overhead per superscalar program
|
||||
const CodeAlign = 4096 //align code size to a multiple of 4 KiB
|
||||
const ReserveCodeSize = CodeAlign //function prologue/epilogue + reserve
|
||||
|
||||
func alignSize[T ~uintptr | ~uint32 | ~uint64 | ~int64 | ~int32 | ~int](pos, align T) T {
|
||||
return ((pos-1)/align + 1) * align
|
||||
}
|
||||
|
||||
var RandomXCodeSize = alignSize[uint64](ReserveCodeSize+MaxRandomXInstrCodeSize*RANDOMX_PROGRAM_SIZE, CodeAlign)
|
||||
var SuperscalarSize = alignSize[uint64](ReserveCodeSize+(SuperscalarProgramHeader+MaxSuperscalarInstrSize*SuperscalarMaxSize)*RANDOMX_CACHE_ACCESSES, CodeAlign)
|
||||
|
||||
var CodeSize = uint32(RandomXCodeSize + SuperscalarSize)
|
||||
|
||||
var superScalarHashOffset = int32(RandomXCodeSize)
|
||||
|
||||
var REX_ADD_RR = []byte{0x4d, 0x03}
|
||||
var REX_ADD_RM = []byte{0x4c, 0x03}
|
||||
var REX_SUB_RR = []byte{0x4d, 0x2b}
|
||||
var REX_SUB_RM = []byte{0x4c, 0x2b}
|
||||
var REX_MOV_RR = []byte{0x41, 0x8b}
|
||||
var REX_MOV_RR64 = []byte{0x49, 0x8b}
|
||||
var REX_MOV_R64R = []byte{0x4c, 0x8b}
|
||||
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
|
||||
var REX_IMUL_RRI = []byte{0x4d, 0x69}
|
||||
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
|
||||
var REX_MUL_R = []byte{0x49, 0xf7}
|
||||
var REX_MUL_M = []byte{0x48, 0xf7}
|
||||
var REX_81 = []byte{0x49, 0x81}
|
||||
var AND_EAX_I byte = 0x25
|
||||
|
||||
var MOV_EAX_I byte = 0xb8
|
||||
|
||||
var MOV_RAX_I = []byte{0x48, 0xb8}
|
||||
var MOV_RCX_I = []byte{0x48, 0xb9}
|
||||
var REX_LEA = []byte{0x4f, 0x8d}
|
||||
var REX_MUL_MEM = []byte{0x48, 0xf7, 0x24, 0x0e}
|
||||
var REX_IMUL_MEM = []byte{0x48, 0xf7, 0x2c, 0x0e}
|
||||
var REX_SHR_RAX = []byte{0x48, 0xc1, 0xe8}
|
||||
var RAX_ADD_SBB_1 = []byte{0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00}
|
||||
var MUL_RCX = []byte{0x48, 0xf7, 0xe1}
|
||||
var REX_SHR_RDX = []byte{0x48, 0xc1, 0xea}
|
||||
var REX_SH = []byte{0x49, 0xc1}
|
||||
var MOV_RCX_RAX_SAR_RCX_63 = []byte{0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f}
|
||||
var AND_ECX_I = []byte{0x81, 0xe1}
|
||||
var ADD_RAX_RCX = []byte{0x48, 0x01, 0xC8}
|
||||
var SAR_RAX_I8 = []byte{0x48, 0xC1, 0xF8}
|
||||
var NEG_RAX = []byte{0x48, 0xF7, 0xD8}
|
||||
var ADD_R_RAX = []byte{0x4C, 0x03}
|
||||
var XOR_EAX_EAX = []byte{0x33, 0xC0}
|
||||
var ADD_RDX_R = []byte{0x4c, 0x01}
|
||||
var SUB_RDX_R = []byte{0x4c, 0x29}
|
||||
var SAR_RDX_I8 = []byte{0x48, 0xC1, 0xFA}
|
||||
var TEST_RDX_RDX = []byte{0x48, 0x85, 0xD2}
|
||||
var SETS_AL_ADD_RDX_RAX = []byte{0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0}
|
||||
var REX_NEG = []byte{0x49, 0xF7}
|
||||
var REX_XOR_RR = []byte{0x4D, 0x33}
|
||||
var REX_XOR_RI = []byte{0x49, 0x81}
|
||||
var REX_XOR_RM = []byte{0x4c, 0x33}
|
||||
var REX_ROT_CL = []byte{0x49, 0xd3}
|
||||
var REX_ROT_I8 = []byte{0x49, 0xc1}
|
||||
var SHUFPD = []byte{0x66, 0x0f, 0xc6}
|
||||
var REX_ADDPD = []byte{0x66, 0x41, 0x0f, 0x58}
|
||||
var REX_CVTDQ2PD_XMM12 = []byte{0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06}
|
||||
var REX_SUBPD = []byte{0x66, 0x41, 0x0f, 0x5c}
|
||||
var REX_XORPS = []byte{0x41, 0x0f, 0x57}
|
||||
var REX_MULPD = []byte{0x66, 0x41, 0x0f, 0x59}
|
||||
var REX_MAXPD = []byte{0x66, 0x41, 0x0f, 0x5f}
|
||||
var REX_DIVPD = []byte{0x66, 0x41, 0x0f, 0x5e}
|
||||
var SQRTPD = []byte{0x66, 0x0f, 0x51}
|
||||
var AND_OR_MOV_LDMXCSR = []byte{0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58}
|
||||
var ROL_RAX = []byte{0x48, 0xc1, 0xc0}
|
||||
var XOR_ECX_ECX = []byte{0x33, 0xC9}
|
||||
var REX_CMP_R32I = []byte{0x41, 0x81}
|
||||
var REX_CMP_M32I = []byte{0x81, 0x3c, 0x06}
|
||||
var MOVAPD = []byte{0x66, 0x0f, 0x29}
|
||||
var REX_MOV_MR = []byte{0x4c, 0x89}
|
||||
var REX_XOR_EAX = []byte{0x41, 0x33}
|
||||
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
|
||||
var JNZ = []byte{0x0f, 0x85}
|
||||
var JMP byte = 0xe9
|
||||
|
||||
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
|
||||
var REX_XCHG = []byte{0x4d, 0x87}
|
||||
var REX_ANDPS_XMM12 = []byte{0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6}
|
||||
var REX_PADD = []byte{0x66, 0x44, 0x0f}
|
||||
var PADD_OPCODES = []byte{0xfc, 0xfd, 0xfe, 0xd4}
|
||||
var CALL = 0xe8
|
||||
|
||||
var REX_ADD_I = []byte{0x49, 0x81}
|
||||
var REX_TEST = []byte{0x49, 0xF7}
|
||||
var JZ = []byte{0x0f, 0x84}
|
||||
var JZ_SHORT byte = 0x74
|
||||
|
||||
var RET byte = 0xc3
|
||||
|
||||
var LEA_32 = []byte{0x41, 0x8d}
|
||||
var MOVNTI = []byte{0x4c, 0x0f, 0xc3}
|
||||
var ADD_EBX_I = []byte{0x81, 0xc3}
|
||||
|
||||
var NOP1 = []byte{0x90}
|
||||
var NOP2 = []byte{0x66, 0x90}
|
||||
var NOP3 = []byte{0x66, 0x66, 0x90}
|
||||
var NOP4 = []byte{0x0F, 0x1F, 0x40, 0x00}
|
||||
var NOP5 = []byte{0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
|
||||
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||
|
||||
var NOPX = [][]byte{NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8}
|
||||
|
||||
var JMP_ALIGN_PREFIX = [14][]byte{
|
||||
{},
|
||||
{0x2E},
|
||||
{0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
|
||||
}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
func genAddressReg(buf []byte, instr *ByteCodeInstruction, rax bool) []byte {
|
||||
buf = append(buf, LEA_32...)
|
||||
if rax {
|
||||
buf = append(buf, 0x80+instr.Src+0)
|
||||
} else {
|
||||
buf = append(buf, 0x80+instr.Src+8)
|
||||
}
|
||||
if instr.Src == RegisterNeedsSib {
|
||||
buf = append(buf, 0x24)
|
||||
}
|
||||
buf = binary.LittleEndian.AppendUint32(buf, uint32(instr.Imm))
|
||||
if rax {
|
||||
buf = append(buf, AND_EAX_I)
|
||||
} else {
|
||||
buf = append(buf, AND_ECX_I...)
|
||||
}
|
||||
buf = binary.LittleEndian.AppendUint32(buf, instr.MemMask)
|
||||
return buf
|
||||
}
|
||||
|
||||
func valAsString(values ...uint32) []byte {
|
||||
r := make([]byte, 4*len(values))
|
||||
for i, v := range values {
|
||||
dst := r[i*4:]
|
||||
dst[0] = byte(v & 0xff)
|
||||
dst[1] = byte((v >> 8) & 0xff)
|
||||
dst[2] = byte((v >> 16) & 0xff)
|
||||
dst[3] = byte((v >> 24) & 0xff)
|
||||
switch {
|
||||
case dst[0] == 0:
|
||||
return r[:i*4]
|
||||
case dst[1] == 0:
|
||||
return r[:i*4+1]
|
||||
case dst[2] == 0:
|
||||
return r[:i*4+2]
|
||||
case dst[3] == 0:
|
||||
return r[:i*4+3]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func familyModel(maxFunctionId uint32) (family, model, stepping int) {
|
||||
if maxFunctionId < 0x1 {
|
||||
return 0, 0, 0
|
||||
}
|
||||
eax, _, _, _ := asm.Cpuid(1)
|
||||
// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
|
||||
family = int((eax >> 8) & 0xf)
|
||||
extFam := family == 0x6 // Intel is 0x6, needs extended model.
|
||||
if family == 0xf {
|
||||
// Add ExtFamily
|
||||
family += int((eax >> 20) & 0xff)
|
||||
extFam = true
|
||||
}
|
||||
// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
|
||||
model = int((eax >> 4) & 0xf)
|
||||
if extFam {
|
||||
// Add ExtModel
|
||||
model += int((eax >> 12) & 0xf0)
|
||||
}
|
||||
stepping = int(eax & 0xf)
|
||||
return family, model, stepping
|
||||
}
|
||||
|
||||
var BranchesWithin32B = func() bool {
|
||||
a, b, c, d := asm.Cpuid(0)
|
||||
v := string(valAsString(b, d, c))
|
||||
|
||||
if v == "GenuineIntel" {
|
||||
family, model, stepping := familyModel(a)
|
||||
|
||||
// Intel JCC erratum mitigation
|
||||
if family == 6 {
|
||||
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
|
||||
return ((model == 0x4E) && (stepping == 0x3)) ||
|
||||
((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) ||
|
||||
((model == 0x5E) && (stepping == 0x3)) ||
|
||||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
|
||||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
|
||||
((model == 0xA6) && (stepping == 0x0)) ||
|
||||
((model == 0xAE) && (stepping == 0xA))
|
||||
}
|
||||
}
|
||||
return false
|
||||
}()
|
7
jit_generic.go
Normal file
7
jit_generic.go
Normal file
|
@ -0,0 +1,7 @@
|
|||
//go:build !unix || !amd64 || disable_jit || purego
|
||||
|
||||
package randomx
|
||||
|
||||
const supportsJIT = false
|
||||
|
||||
var RandomXCodeSize uint64 = 0
|
|
@ -1,6 +1,9 @@
|
|||
package randomx
|
||||
|
||||
import "math"
|
||||
import (
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
const (
|
||||
mantbits64 uint = 52
|
||||
|
@ -41,10 +44,36 @@ func StaticExponent(entropy uint64) uint64 {
|
|||
return exponent
|
||||
}
|
||||
|
||||
func EMask(entropy uint64) uint64 {
|
||||
func ExponentMask(entropy uint64) uint64 {
|
||||
return (entropy & mask22bit) | StaticExponent(entropy)
|
||||
}
|
||||
|
||||
func Xor(a, b float64) float64 {
|
||||
return math.Float64frombits(math.Float64bits(a) ^ math.Float64bits(b))
|
||||
}
|
||||
|
||||
func smulh(a, b int64) uint64 {
|
||||
hi_, _ := bits.Mul64(uint64(a), uint64(b))
|
||||
t1 := (a >> 63) & b
|
||||
t2 := (b >> 63) & a
|
||||
return uint64(int64(hi_) - t1 - t2)
|
||||
}
|
||||
|
||||
// reciprocal
|
||||
// Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64.
|
||||
// divisor must not be 0 or a power of 2
|
||||
func reciprocal(divisor uint32) uint64 {
|
||||
|
||||
const p2exp63 = uint64(1) << 63
|
||||
|
||||
quotient := p2exp63 / uint64(divisor)
|
||||
remainder := p2exp63 % uint64(divisor)
|
||||
|
||||
shift := bits.Len32(divisor)
|
||||
|
||||
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
|
||||
}
|
||||
|
||||
func signExtend2sCompl(x uint32) uint64 {
|
||||
return uint64(int64(int32(x)))
|
||||
}
|
28
math_test.go
Normal file
28
math_test.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
package randomx
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestReciprocal(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var tests = []struct {
|
||||
a uint32
|
||||
b uint64
|
||||
}{
|
||||
{3, 12297829382473034410},
|
||||
{13, 11351842506898185609},
|
||||
{33, 17887751829051686415},
|
||||
{65537, 18446462603027742720},
|
||||
{15000001, 10316166306300415204},
|
||||
{3845182035, 10302264209224146340},
|
||||
{0xffffffff, 9223372039002259456},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
r := reciprocal(tt.a)
|
||||
if r != tt.b {
|
||||
t.Errorf("i=%d, a=%d", i, tt.a)
|
||||
t.Errorf("expected=%016x, actual=%016x", tt.b, r)
|
||||
}
|
||||
}
|
||||
}
|
382
randomx_test.go
382
randomx_test.go
|
@ -30,31 +30,95 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
|
||||
"os"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strings"
|
||||
"unsafe"
|
||||
)
|
||||
import "testing"
|
||||
|
||||
var Tests = []struct {
|
||||
key []byte // key
|
||||
input []byte // input
|
||||
expected string // expected result
|
||||
}{
|
||||
{[]byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
|
||||
{[]byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"}, // test a
|
||||
{[]byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"}, // test b
|
||||
{[]byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"}, // test c
|
||||
{[]byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"}, // test d
|
||||
type testdata struct {
|
||||
name string
|
||||
key []byte
|
||||
input []byte
|
||||
// expected result, in hex
|
||||
expected string
|
||||
}
|
||||
|
||||
func Test_Randomx(t *testing.T) {
|
||||
func mustHex(str string) []byte {
|
||||
b, err := hex.DecodeString(str)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
c := Randomx_alloc_cache(0)
|
||||
var Tests = []testdata{
|
||||
{"example", []byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
|
||||
{"test_a", []byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"},
|
||||
{"test_b", []byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"},
|
||||
{"test_c", []byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"},
|
||||
{"test_d", []byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"},
|
||||
{"test_e", []byte("test key 001"), mustHex("0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416"), "c56414121acda1713c2f2a819d8ae38aed7c80c35c2a769298d34f03833cd5f1"},
|
||||
}
|
||||
|
||||
for ix, tt := range Tests {
|
||||
func testFlags(name string, flags Flags) (f Flags, skip bool) {
|
||||
flags |= GetFlags()
|
||||
flags &^= RANDOMX_FLAG_LARGE_PAGES
|
||||
|
||||
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
|
||||
c.Init(tt.key)
|
||||
nn := strings.Split(name, "/")
|
||||
switch nn[len(nn)-1] {
|
||||
case "interpreter":
|
||||
flags &^= RANDOMX_FLAG_JIT
|
||||
case "compiler":
|
||||
flags |= RANDOMX_FLAG_JIT
|
||||
if !flags.HasJIT() {
|
||||
return flags, true
|
||||
}
|
||||
|
||||
case "softaes":
|
||||
flags &^= RANDOMX_FLAG_HARD_AES
|
||||
case "hardaes":
|
||||
flags |= RANDOMX_FLAG_HARD_AES
|
||||
if aes.NewHardAES() == nil {
|
||||
return flags, true
|
||||
}
|
||||
case "largepages":
|
||||
flags |= RANDOMX_FLAG_LARGE_PAGES
|
||||
if largePageAllocator == nil {
|
||||
return flags, true
|
||||
}
|
||||
if unsafe.Sizeof(uint(0)) < 8 {
|
||||
//not 64-bit platforms
|
||||
return flags, true
|
||||
}
|
||||
}
|
||||
|
||||
return flags, false
|
||||
}
|
||||
|
||||
func Test_RandomXLight(t *testing.T) {
|
||||
t.Parallel()
|
||||
for _, n := range []string{"interpreter", "compiler", "softaes", "hardaes", "largepages"} {
|
||||
t.Run(n, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
tFlags, skip := testFlags(t.Name(), 0)
|
||||
if skip {
|
||||
t.Skip("not supported on this platform")
|
||||
}
|
||||
|
||||
c, err := NewCache(tFlags)
|
||||
if err != nil {
|
||||
if tFlags.Has(RANDOMX_FLAG_LARGE_PAGES) && errors.Is(err, memory.PageNoMemoryErr) {
|
||||
t.Skip("cannot allocate memory")
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
|
@ -62,66 +126,282 @@ func Test_Randomx(t *testing.T) {
|
|||
}
|
||||
}()
|
||||
|
||||
vm := c.VM_Initialize()
|
||||
for _, test := range Tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
c.Init(test.key)
|
||||
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
vm, err := NewVM(tFlags, c, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := vm.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
actual := fmt.Sprintf("%x", output_hash)
|
||||
if actual != tt.expected {
|
||||
t.Errorf("#%d Fib(%v): expected %s, actual %s", ix, tt.key, tt.expected, actual)
|
||||
var outputHash [RANDOMX_HASH_SIZE]byte
|
||||
|
||||
vm.CalculateHash(test.input, &outputHash)
|
||||
|
||||
outputHex := hex.EncodeToString(outputHash[:])
|
||||
|
||||
if outputHex != test.expected {
|
||||
t.Errorf("key=%v, input=%v", test.key, test.input)
|
||||
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
|
||||
t.FailNow()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_RandomXBatch(t *testing.T) {
|
||||
t.Parallel()
|
||||
for _, n := range []string{"softaes", "hardaes"} {
|
||||
t.Run(n, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
tFlags, skip := testFlags(t.Name(), 0)
|
||||
if skip {
|
||||
t.Skip("not supported on this platform")
|
||||
}
|
||||
|
||||
c, err := NewCache(tFlags)
|
||||
if tFlags.Has(RANDOMX_FLAG_LARGE_PAGES) && errors.Is(err, memory.PageNoMemoryErr) {
|
||||
t.Skip("cannot allocate memory")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
tests := Tests[1:4]
|
||||
|
||||
c.Init(tests[0].key)
|
||||
vm, err := NewVM(tFlags, c, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := vm.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var outputHash [3][RANDOMX_HASH_SIZE]byte
|
||||
|
||||
vm.CalculateHashFirst(tests[0].input)
|
||||
vm.CalculateHashNext(tests[1].input, &outputHash[0])
|
||||
vm.CalculateHashNext(tests[2].input, &outputHash[1])
|
||||
vm.CalculateHashLast(&outputHash[2])
|
||||
|
||||
for i, test := range tests {
|
||||
outputHex := hex.EncodeToString(outputHash[i][:])
|
||||
|
||||
if outputHex != test.expected {
|
||||
t.Errorf("key=%v, input=%v", test.key, test.input)
|
||||
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func Benchmark_RandomX(b *testing.B) {
|
||||
func Test_RandomXFull(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping full mode with -short")
|
||||
}
|
||||
|
||||
if os.Getenv("CI") != "" {
|
||||
t.Skip("Skipping full mode in CI environment")
|
||||
}
|
||||
|
||||
for _, n := range []string{"interpreter", "compiler", "softaes", "hardaes", "largepages"} {
|
||||
t.Run(n, func(t *testing.T) {
|
||||
|
||||
tFlags, skip := testFlags(t.Name(), RANDOMX_FLAG_FULL_MEM)
|
||||
if skip {
|
||||
t.Skip("not supported on this platform")
|
||||
}
|
||||
|
||||
c, err := NewCache(tFlags)
|
||||
if tFlags.Has(RANDOMX_FLAG_LARGE_PAGES) && errors.Is(err, memory.PageNoMemoryErr) {
|
||||
t.Skip("cannot allocate memory")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
dataset, err := NewDataset(tFlags)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := dataset.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
for _, test := range Tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
c.Init(test.key)
|
||||
dataset.InitDatasetParallel(c, runtime.NumCPU())
|
||||
|
||||
vm, err := NewVM(tFlags, nil, dataset)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := vm.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var outputHash [RANDOMX_HASH_SIZE]byte
|
||||
|
||||
vm.CalculateHash(test.input, &outputHash)
|
||||
|
||||
outputHex := hex.EncodeToString(outputHash[:])
|
||||
|
||||
if outputHex != test.expected {
|
||||
t.Errorf("key=%v, input=%v", test.key, test.input)
|
||||
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
|
||||
t.FailNow()
|
||||
}
|
||||
})
|
||||
|
||||
// cleanup between runs
|
||||
runtime.GC()
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
// cleanup 2 GiB between runs
|
||||
runtime.GC()
|
||||
}
|
||||
}
|
||||
|
||||
var BenchmarkTest = Tests[0]
|
||||
var BenchmarkCache *Cache
|
||||
var BenchmarkDataset *Dataset
|
||||
|
||||
var BenchmarkFlags = GetFlags()
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
if slices.Contains(os.Args, "-test.bench") {
|
||||
flags := GetFlags()
|
||||
flags |= RANDOMX_FLAG_FULL_MEM
|
||||
var err error
|
||||
//init light and full dataset
|
||||
BenchmarkCache, err = NewCache(flags | RANDOMX_FLAG_LARGE_PAGES)
|
||||
if err != nil {
|
||||
BenchmarkCache, err = NewCache(flags)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
defer BenchmarkCache.Close()
|
||||
BenchmarkCache.Init(BenchmarkTest.key)
|
||||
|
||||
BenchmarkDataset, err = NewDataset(flags | RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES)
|
||||
if err != nil {
|
||||
BenchmarkDataset, err = NewDataset(flags | RANDOMX_FLAG_FULL_MEM)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
defer BenchmarkDataset.Close()
|
||||
BenchmarkDataset.InitDatasetParallel(BenchmarkCache, runtime.NumCPU())
|
||||
}
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func Benchmark_RandomXLight(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
tt := Tests[0]
|
||||
vm, err := NewVM(BenchmarkFlags, BenchmarkCache, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer vm.Close()
|
||||
|
||||
c := Randomx_alloc_cache(0)
|
||||
|
||||
c.Init(tt.key)
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
vm := c.VM_Initialize()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_RandomXParallel(b *testing.B) {
|
||||
func Benchmark_RandomXFull(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
tt := Tests[0]
|
||||
|
||||
c := Randomx_alloc_cache(0)
|
||||
|
||||
c.Init(tt.key)
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}()
|
||||
vm, err := NewVM(BenchmarkFlags|RANDOMX_FLAG_FULL_MEM, nil, BenchmarkDataset)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer vm.Close()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark_RandomXLight_Parallel(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var output_hash [32]byte
|
||||
vm := c.VM_Initialize()
|
||||
|
||||
vm, err := NewVM(BenchmarkFlags, BenchmarkCache, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer vm.Close()
|
||||
|
||||
for pb.Next() {
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func Benchmark_RandomXFull_Parallel(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var output_hash [32]byte
|
||||
|
||||
vm, err := NewVM(BenchmarkFlags|RANDOMX_FLAG_FULL_MEM, nil, BenchmarkDataset)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer vm.Close()
|
||||
|
||||
for pb.Next() {
|
||||
vm.CalculateHash(BenchmarkTest.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
})
|
||||
|
|
11
register.go
11
register.go
|
@ -1,5 +1,7 @@
|
|||
package randomx
|
||||
|
||||
import "unsafe"
|
||||
|
||||
const RegistersCount = 8
|
||||
const RegistersCountFloat = 4
|
||||
|
||||
|
@ -17,6 +19,11 @@ type RegisterFile struct {
|
|||
FPRC uint8
|
||||
}
|
||||
|
||||
type MemoryRegisters struct {
|
||||
mx, ma uint64
|
||||
const RegisterFileSize = RegistersCount*8 + RegistersCountFloat*2*8*3
|
||||
|
||||
func (rf *RegisterFile) Memory() *[RegisterFileSize]byte {
|
||||
return (*[RegisterFileSize]byte)(unsafe.Pointer(rf))
|
||||
}
|
||||
func (rf *RegisterFile) Clear() {
|
||||
clear(rf.Memory()[:])
|
||||
}
|
||||
|
|
291
superscalar.go
291
superscalar.go
|
@ -29,7 +29,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
package randomx
|
||||
|
||||
import "math/bits"
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
type ExecutionPort byte
|
||||
|
||||
|
@ -201,7 +204,7 @@ var buffer3 = []int{4, 9, 3}
|
|||
var buffer4 = []int{4, 4, 4, 4}
|
||||
var buffer5 = []int{3, 3, 10}
|
||||
|
||||
var Decoder_To_Instruction_Length = [][]int{
|
||||
var decoderToInstructionSize = [][]int{
|
||||
buffer0,
|
||||
buffer1,
|
||||
buffer2,
|
||||
|
@ -258,7 +261,7 @@ func (d DecoderType) String() string {
|
|||
}
|
||||
}
|
||||
|
||||
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Generator) DecoderType {
|
||||
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *blake2.Generator) DecoderType {
|
||||
|
||||
if ins.Opcode == S_IMULH_R || ins.Opcode == S_ISMULH_R {
|
||||
return Decoder3310
|
||||
|
@ -295,172 +298,20 @@ func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Gene
|
|||
return Decoder484
|
||||
}
|
||||
|
||||
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
|
||||
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
|
||||
|
||||
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
|
||||
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
|
||||
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
|
||||
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
|
||||
var slot10 = []*Instruction{&IMUL_RCP}
|
||||
|
||||
// SuperScalarInstruction superscalar program is built with superscalar instructions
|
||||
type SuperScalarInstruction struct {
|
||||
Opcode byte
|
||||
Dst_Reg int
|
||||
Src_Reg int
|
||||
Mod byte
|
||||
Imm32 uint32
|
||||
Type int
|
||||
OpGroup int
|
||||
OpGroupPar int
|
||||
GroupParIsSource int
|
||||
ins *Instruction
|
||||
CanReuse bool
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) FixSrcReg() {
|
||||
if sins.Src_Reg >= 0 {
|
||||
// do nothing
|
||||
} else {
|
||||
sins.Src_Reg = sins.Dst_Reg
|
||||
}
|
||||
|
||||
}
|
||||
func (sins *SuperScalarInstruction) Reset() {
|
||||
sins.Opcode = 99
|
||||
sins.Src_Reg = -1
|
||||
sins.Dst_Reg = -1
|
||||
sins.CanReuse = false
|
||||
sins.GroupParIsSource = 0
|
||||
}
|
||||
func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator) {
|
||||
sins.Reset()
|
||||
sins.ins = ins
|
||||
sins.OpGroupPar = -1
|
||||
sins.Opcode = ins.Opcode
|
||||
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IXOR_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IXOR_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IADD_RS:
|
||||
sins.Mod = gen.GetByte()
|
||||
// set modshift on Imm32
|
||||
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
|
||||
//sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IMUL_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMUL_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IROR_C:
|
||||
sins.Mod = 0
|
||||
|
||||
for sins.Imm32 = 0; sins.Imm32 == 0; {
|
||||
sins.Imm32 = uint32(gen.GetByte() & 63)
|
||||
}
|
||||
|
||||
sins.OpGroup = S_IROR_C
|
||||
sins.OpGroupPar = -1
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IADD_C7
|
||||
sins.OpGroupPar = -1
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IXOR_C7
|
||||
sins.OpGroupPar = -1
|
||||
|
||||
case S_IMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
case S_ISMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_ISMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
|
||||
case S_IMUL_RCP:
|
||||
|
||||
sins.Mod = 0
|
||||
for {
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
if (sins.Imm32&sins.Imm32 - 1) != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
sins.OpGroup = S_IMUL_RCP
|
||||
|
||||
default:
|
||||
panic("should not occur")
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Generator, instruction_len int, decoder_type int, islast, isfirst bool) {
|
||||
|
||||
switch instruction_len {
|
||||
case 3:
|
||||
if islast {
|
||||
create(sins, slot3L[gen.GetByte()&3], gen)
|
||||
} else {
|
||||
create(sins, slot3[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 4:
|
||||
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
|
||||
if decoder_type == int(Decoder4444) && !islast {
|
||||
create(sins, &IMUL_R, gen)
|
||||
} else {
|
||||
create(sins, slot4[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 7:
|
||||
create(sins, slot7[gen.GetByte()&1], gen)
|
||||
|
||||
case 8:
|
||||
create(sins, slot8[gen.GetByte()&1], gen)
|
||||
|
||||
case 9:
|
||||
create(sins, slot9[gen.GetByte()&1], gen)
|
||||
case 10:
|
||||
create(sins, slot10[0], gen)
|
||||
|
||||
default:
|
||||
panic("should not be possible")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type SuperScalarProgram []SuperScalarInstruction
|
||||
|
||||
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
|
||||
p[0].Dst_Reg = addressRegister
|
||||
func (p SuperScalarProgram) setAddressRegister(addressRegister uint8) {
|
||||
p[0].Dst = addressRegister
|
||||
}
|
||||
|
||||
func (p SuperScalarProgram) AddressRegister() int {
|
||||
return p[0].Dst_Reg
|
||||
func (p SuperScalarProgram) AddressRegister() uint8 {
|
||||
return p[0].Dst
|
||||
}
|
||||
func (p SuperScalarProgram) Program() []SuperScalarInstruction {
|
||||
return p[1:]
|
||||
}
|
||||
|
||||
func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
||||
func BuildSuperScalarProgram(gen *blake2.Generator) SuperScalarProgram {
|
||||
cycle := 0
|
||||
depcycle := 0
|
||||
//retire_cycle := 0
|
||||
|
@ -474,12 +325,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
code_size := 0
|
||||
program := make(SuperScalarProgram, 1, 512)
|
||||
|
||||
preAllocatedRegisters := gen.allocRegIndex[:]
|
||||
|
||||
registers := gen.allocRegisters[:]
|
||||
for i := range registers {
|
||||
registers[i] = Register{}
|
||||
}
|
||||
var registers [8]Register
|
||||
|
||||
sins := &SuperScalarInstruction{}
|
||||
sins.ins = &Instruction{Opcode: S_NOP}
|
||||
|
@ -508,7 +354,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
if ports_saturated || program_size >= SuperscalarMaxSize {
|
||||
break
|
||||
}
|
||||
CreateSuperScalarInstruction(sins, gen, Decoder_To_Instruction_Length[int(decoder)][buffer_index], int(decoder), len(Decoder_To_Instruction_Length[decoder]) == (buffer_index+1), buffer_index == 0)
|
||||
CreateSuperScalarInstruction(sins, gen, decoderToInstructionSize[decoder][buffer_index], decoder, len(decoderToInstructionSize[decoder]) == (buffer_index+1), buffer_index == 0)
|
||||
macro_op_index = 0
|
||||
|
||||
}
|
||||
|
@ -529,7 +375,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
|
||||
if macro_op_index == sins.ins.SrcOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(preAllocatedRegisters, scheduleCycle, registers, gen); forward++ {
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(scheduleCycle, ®isters, gen); forward++ {
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
@ -547,7 +393,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
|
||||
if macro_op_index == sins.ins.DstOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(preAllocatedRegisters, scheduleCycle, throwAwayCount > 0, registers, gen); forward++ {
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(scheduleCycle, throwAwayCount > 0, ®isters, gen); forward++ {
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
@ -569,9 +415,9 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
|
||||
|
||||
if macro_op_index == sins.ins.ResultOP { // fix me
|
||||
registers[sins.Dst_Reg].Latency = depcycle
|
||||
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup
|
||||
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar
|
||||
registers[sins.Dst].Latency = depcycle
|
||||
registers[sins.Dst].LastOpGroup = sins.OpGroup
|
||||
registers[sins.Dst].LastOpPar = sins.OpGroupPar
|
||||
|
||||
}
|
||||
|
||||
|
@ -609,12 +455,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
lastdst := asic_latencies[program[i].Dst_Reg] + 1
|
||||
lastdst := asic_latencies[program[i].Dst] + 1
|
||||
lastsrc := 0
|
||||
if program[i].Dst_Reg != program[i].Src_Reg {
|
||||
lastsrc = asic_latencies[program[i].Src_Reg] + 1
|
||||
if program[i].Dst != program[i].Src {
|
||||
lastsrc = asic_latencies[program[i].Src] + 1
|
||||
}
|
||||
asic_latencies[program[i].Dst_Reg] = max(lastdst, lastsrc)
|
||||
asic_latencies[program[i].Dst] = max(lastdst, lastsrc)
|
||||
}
|
||||
|
||||
asic_latency_max := 0
|
||||
|
@ -628,7 +474,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
}
|
||||
|
||||
// Set AddressRegister hack
|
||||
program.setAddressRegister(address_reg)
|
||||
program.setAddressRegister(uint8(address_reg))
|
||||
|
||||
return program
|
||||
}
|
||||
|
@ -702,122 +548,101 @@ type Register struct {
|
|||
//RegisterNeedsSib = 4; //x86 r12 register
|
||||
}
|
||||
|
||||
// RegisterNeedsDisplacement x86 r13 register
|
||||
const RegisterNeedsDisplacement = 5
|
||||
|
||||
// RegisterNeedsSib x86 r12 register
|
||||
const RegisterNeedsSib = 4
|
||||
|
||||
func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters []int, cycle int, Registers []Register, gen *Blake2Generator) bool {
|
||||
available_registers := preAllocatedAvailableRegisters[:0]
|
||||
func (sins *SuperScalarInstruction) SelectSource(cycle int, registers *[8]Register, gen *blake2.Generator) bool {
|
||||
availableRegisters := make([]uint8, 0, 8)
|
||||
|
||||
for i := range Registers {
|
||||
if Registers[i].Latency <= cycle {
|
||||
available_registers = append(available_registers, i)
|
||||
for i := range registers {
|
||||
if registers[i].Latency <= cycle {
|
||||
availableRegisters = append(availableRegisters, uint8(i))
|
||||
}
|
||||
}
|
||||
|
||||
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
|
||||
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
|
||||
sins.Src_Reg = RegisterNeedsDisplacement
|
||||
sins.OpGroupPar = sins.Src_Reg
|
||||
if len(availableRegisters) == 2 && sins.Opcode == S_IADD_RS {
|
||||
if availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement {
|
||||
sins.Src = RegisterNeedsDisplacement
|
||||
sins.OpGroupPar = int(sins.Src)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if selectRegister(available_registers, gen, &sins.Src_Reg) {
|
||||
if selectRegister(availableRegisters, gen, &sins.Src) {
|
||||
|
||||
if sins.GroupParIsSource == 0 {
|
||||
|
||||
} else {
|
||||
sins.OpGroupPar = sins.Src_Reg
|
||||
sins.OpGroupPar = int(sins.Src)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegisters []int, cycle int, allowChainedMul bool, Registers []Register, gen *Blake2Generator) bool {
|
||||
preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0]
|
||||
func (sins *SuperScalarInstruction) SelectDestination(cycle int, allowChainedMul bool, Registers *[8]Register, gen *blake2.Generator) bool {
|
||||
var availableRegisters = make([]uint8, 0, 8)
|
||||
|
||||
for i := range Registers {
|
||||
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src_Reg) &&
|
||||
if Registers[i].Latency <= cycle && (sins.CanReuse || uint8(i) != sins.Src) &&
|
||||
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
|
||||
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
|
||||
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
|
||||
preAllocatedAvailableRegisters = append(preAllocatedAvailableRegisters, i)
|
||||
availableRegisters = append(availableRegisters, uint8(i))
|
||||
}
|
||||
}
|
||||
|
||||
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst_Reg)
|
||||
return selectRegister(availableRegisters, gen, &sins.Dst)
|
||||
}
|
||||
|
||||
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
|
||||
func selectRegister(availableRegisters []uint8, gen *blake2.Generator, reg *uint8) bool {
|
||||
index := 0
|
||||
if len(available_registers) == 0 {
|
||||
if len(availableRegisters) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(available_registers) > 1 {
|
||||
if len(availableRegisters) > 1 {
|
||||
tmp := gen.GetUint32()
|
||||
|
||||
index = int(tmp % uint32(len(available_registers)))
|
||||
index = int(tmp % uint32(len(availableRegisters)))
|
||||
} else {
|
||||
index = 0
|
||||
}
|
||||
*reg = available_registers[index]
|
||||
*reg = availableRegisters[index]
|
||||
return true
|
||||
}
|
||||
|
||||
const Mask = CacheSize/CacheLineSize - 1
|
||||
|
||||
// executeSuperscalar execute the superscalar program
|
||||
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
|
||||
//TODO: produce around (14 * 8 * 8) = 896 different opcodes with hardcoded registers
|
||||
|
||||
for i := range p {
|
||||
ins := &p[i]
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
r[ins.Dst_Reg] -= r[ins.Src_Reg]
|
||||
r[ins.Dst] -= r[ins.Src]
|
||||
case S_IXOR_R:
|
||||
r[ins.Dst_Reg] ^= r[ins.Src_Reg]
|
||||
r[ins.Dst] ^= r[ins.Src]
|
||||
case S_IADD_RS:
|
||||
r[ins.Dst_Reg] += r[ins.Src_Reg] << ins.Imm32
|
||||
r[ins.Dst] += r[ins.Src] << ins.Imm32
|
||||
case S_IMUL_R:
|
||||
r[ins.Dst_Reg] *= r[ins.Src_Reg]
|
||||
r[ins.Dst] *= r[ins.Src]
|
||||
case S_IROR_C:
|
||||
r[ins.Dst_Reg] = bits.RotateLeft64(r[ins.Dst_Reg], 0-int(ins.Imm32))
|
||||
r[ins.Dst] = bits.RotateLeft64(r[ins.Dst], 0-int(ins.Imm32))
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
r[ins.Dst_Reg] += signExtend2sCompl(ins.Imm32)
|
||||
r[ins.Dst] += signExtend2sCompl(ins.Imm32)
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
r[ins.Dst_Reg] ^= signExtend2sCompl(ins.Imm32)
|
||||
r[ins.Dst] ^= signExtend2sCompl(ins.Imm32)
|
||||
case S_IMULH_R:
|
||||
r[ins.Dst_Reg], _ = bits.Mul64(r[ins.Dst_Reg], r[ins.Src_Reg])
|
||||
r[ins.Dst], _ = bits.Mul64(r[ins.Dst], r[ins.Src])
|
||||
case S_ISMULH_R:
|
||||
r[ins.Dst_Reg] = smulh(int64(r[ins.Dst_Reg]), int64(r[ins.Src_Reg]))
|
||||
r[ins.Dst] = smulh(int64(r[ins.Dst]), int64(r[ins.Src]))
|
||||
case S_IMUL_RCP:
|
||||
r[ins.Dst_Reg] *= randomx_reciprocal(ins.Imm32)
|
||||
r[ins.Dst] *= ins.Imm64
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func smulh(a, b int64) uint64 {
|
||||
hi_, _ := bits.Mul64(uint64(a), uint64(b))
|
||||
t1 := (a >> 63) & b
|
||||
t2 := (b >> 63) & a
|
||||
return uint64(int64(hi_) - t1 - t2)
|
||||
}
|
||||
|
||||
func randomx_reciprocal(divisor uint32) uint64 {
|
||||
|
||||
const p2exp63 = uint64(1) << 63
|
||||
|
||||
quotient := p2exp63 / uint64(divisor)
|
||||
remainder := p2exp63 % uint64(divisor)
|
||||
|
||||
shift := uint32(bits.Len32(divisor))
|
||||
|
||||
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
|
||||
}
|
||||
|
||||
func signExtend2sCompl(x uint32) uint64 {
|
||||
return uint64(int64(int32(x)))
|
||||
}
|
||||
|
|
|
@ -1,152 +0,0 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
var REX_SUB_RR = []byte{0x4d, 0x2b}
|
||||
var REX_MOV_RR64 = []byte{0x49, 0x8b}
|
||||
var REX_MOV_R64R = []byte{0x4c, 0x8b}
|
||||
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
|
||||
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
|
||||
var REX_MUL_R = []byte{0x49, 0xf7}
|
||||
var REX_81 = []byte{0x49, 0x81}
|
||||
|
||||
var MOV_RAX_I = []byte{0x48, 0xb8}
|
||||
var REX_LEA = []byte{0x4f, 0x8d}
|
||||
var REX_XOR_RR = []byte{0x4D, 0x33}
|
||||
var REX_XOR_RI = []byte{0x49, 0x81}
|
||||
var REX_ROT_I8 = []byte{0x49, 0xc1}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
|
||||
/*
|
||||
push rbp
|
||||
push rbx
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rbp,rsp
|
||||
sub rsp,(0x8*7)
|
||||
|
||||
mov rsi, rax; # register dataset
|
||||
|
||||
prefetchnta byte ptr [rsi]
|
||||
|
||||
mov r8, qword ptr [rsi+0]
|
||||
mov r9, qword ptr [rsi+8]
|
||||
mov r10, qword ptr [rsi+16]
|
||||
mov r11, qword ptr [rsi+24]
|
||||
mov r12, qword ptr [rsi+32]
|
||||
mov r13, qword ptr [rsi+40]
|
||||
mov r14, qword ptr [rsi+48]
|
||||
mov r15, qword ptr [rsi+56]
|
||||
*/
|
||||
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
|
||||
|
||||
/*
|
||||
prefetchw byte ptr [rsi]
|
||||
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
|
||||
add rsp,(0x8*7)
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rbp
|
||||
ret
|
||||
*/
|
||||
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
|
||||
|
||||
// generateSuperscalarCode
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
|
||||
|
||||
var program []byte
|
||||
|
||||
program = append(program, codeInitBlock...)
|
||||
|
||||
p := scalarProgram.Program()
|
||||
for i := range p {
|
||||
instr := &p[i]
|
||||
|
||||
dst := instr.Dst_Reg % RegistersCount
|
||||
src := instr.Src_Reg % RegistersCount
|
||||
|
||||
switch instr.Opcode {
|
||||
case S_ISUB_R:
|
||||
program = append(program, REX_SUB_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IXOR_R:
|
||||
program = append(program, REX_XOR_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IADD_RS:
|
||||
program = append(program, REX_LEA...)
|
||||
program = append(program,
|
||||
byte(0x04+8*dst),
|
||||
genSIB(int(instr.Imm32), src, dst),
|
||||
)
|
||||
case S_IMUL_R:
|
||||
program = append(program, REX_IMUL_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IROR_C:
|
||||
program = append(program, REX_ROT_I8...)
|
||||
program = append(program,
|
||||
byte(0xc8+dst),
|
||||
byte(instr.Imm32&63),
|
||||
)
|
||||
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
program = append(program, REX_81...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
|
||||
//TODO: align NOP on C8/C9
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
program = append(program, REX_XOR_RI...)
|
||||
program = append(program, byte(0xf0+dst))
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
|
||||
//TODO: align NOP on C8/C9
|
||||
|
||||
case S_IMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, byte(0xe0+src))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, byte(0xc2+8*dst))
|
||||
case S_ISMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, byte(0xe8+src))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, byte(0xc2+8*dst))
|
||||
case S_IMUL_RCP:
|
||||
program = append(program, MOV_RAX_I...)
|
||||
program = binary.LittleEndian.AppendUint64(program, randomx_reciprocal(instr.Imm32))
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, byte(0xc0+8*instr.Dst_Reg))
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
program = append(program, codeRetBlock...)
|
||||
|
||||
return mapProgram(program)
|
||||
}
|
157
superscalar_instruction.go
Normal file
157
superscalar_instruction.go
Normal file
|
@ -0,0 +1,157 @@
|
|||
package randomx
|
||||
|
||||
import "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
|
||||
|
||||
// SuperScalarInstruction superscalar program is built with superscalar instructions
|
||||
type SuperScalarInstruction struct {
|
||||
Opcode byte
|
||||
Dst uint8
|
||||
Src uint8
|
||||
Mod byte
|
||||
Imm32 uint32
|
||||
Imm64 uint64
|
||||
OpGroup int
|
||||
OpGroupPar int
|
||||
GroupParIsSource int
|
||||
ins *Instruction
|
||||
CanReuse bool
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) FixSrcReg() {
|
||||
if sins.Src == 0xff {
|
||||
sins.Src = sins.Dst
|
||||
}
|
||||
|
||||
}
|
||||
func (sins *SuperScalarInstruction) Reset() {
|
||||
sins.Opcode = 99
|
||||
sins.Src = 0xff
|
||||
sins.Dst = 0xff
|
||||
sins.CanReuse = false
|
||||
sins.GroupParIsSource = 0
|
||||
}
|
||||
|
||||
func createSuperScalarInstruction(sins *SuperScalarInstruction, ins *Instruction, gen *blake2.Generator) {
|
||||
sins.Reset()
|
||||
sins.ins = ins
|
||||
sins.OpGroupPar = -1
|
||||
sins.Opcode = ins.Opcode
|
||||
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IXOR_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IXOR_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IADD_RS:
|
||||
sins.Mod = gen.GetByte()
|
||||
// set modshift on Imm32
|
||||
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
|
||||
//sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IMUL_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMUL_R
|
||||
sins.GroupParIsSource = 1
|
||||
case S_IROR_C:
|
||||
sins.Mod = 0
|
||||
|
||||
for sins.Imm32 = 0; sins.Imm32 == 0; {
|
||||
sins.Imm32 = uint32(gen.GetByte() & 63)
|
||||
}
|
||||
|
||||
sins.OpGroup = S_IROR_C
|
||||
sins.OpGroupPar = -1
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IADD_C7
|
||||
sins.OpGroupPar = -1
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IXOR_C7
|
||||
sins.OpGroupPar = -1
|
||||
|
||||
case S_IMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
case S_ISMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_ISMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
|
||||
case S_IMUL_RCP:
|
||||
|
||||
sins.Mod = 0
|
||||
for {
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
if (sins.Imm32&sins.Imm32 - 1) != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
sins.Imm64 = reciprocal(sins.Imm32)
|
||||
|
||||
sins.OpGroup = S_IMUL_RCP
|
||||
|
||||
default:
|
||||
panic("should not occur")
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
|
||||
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
|
||||
|
||||
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
|
||||
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
|
||||
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
|
||||
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
|
||||
var slot10 = []*Instruction{&IMUL_RCP}
|
||||
|
||||
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *blake2.Generator, instructionLen int, decoderType DecoderType, last, first bool) {
|
||||
|
||||
switch instructionLen {
|
||||
case 3:
|
||||
if last {
|
||||
createSuperScalarInstruction(sins, slot3L[gen.GetByte()&3], gen)
|
||||
} else {
|
||||
createSuperScalarInstruction(sins, slot3[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 4:
|
||||
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
|
||||
if decoderType == Decoder4444 && !last {
|
||||
createSuperScalarInstruction(sins, &IMUL_R, gen)
|
||||
} else {
|
||||
createSuperScalarInstruction(sins, slot4[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 7:
|
||||
createSuperScalarInstruction(sins, slot7[gen.GetByte()&1], gen)
|
||||
|
||||
case 8:
|
||||
createSuperScalarInstruction(sins, slot8[gen.GetByte()&1], gen)
|
||||
|
||||
case 9:
|
||||
createSuperScalarInstruction(sins, slot9[gen.GetByte()&1], gen)
|
||||
case 10:
|
||||
createSuperScalarInstruction(sins, slot10[0], gen)
|
||||
|
||||
default:
|
||||
panic("should not be possible")
|
||||
}
|
||||
|
||||
}
|
101
superscalar_jit_amd64.go
Normal file
101
superscalar_jit_amd64.go
Normal file
|
@ -0,0 +1,101 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func superscalar_run(rf, jmp uintptr)
|
||||
|
||||
func (f SuperScalarProgramFunc) Execute(rf uintptr) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
|
||||
superscalar_run(rf, uintptr(unsafe.Pointer(unsafe.SliceData(f))))
|
||||
}
|
||||
|
||||
// generateSuperscalarCode
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgramFunc {
|
||||
|
||||
var program []byte
|
||||
|
||||
p := scalarProgram.Program()
|
||||
for i := range p {
|
||||
instr := &p[i]
|
||||
|
||||
dst := instr.Dst % RegistersCount
|
||||
src := instr.Src % RegistersCount
|
||||
|
||||
switch instr.Opcode {
|
||||
case S_ISUB_R:
|
||||
program = append(program, REX_SUB_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IXOR_R:
|
||||
program = append(program, REX_XOR_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IADD_RS:
|
||||
program = append(program, REX_LEA...)
|
||||
program = append(program,
|
||||
byte(0x04+8*dst),
|
||||
genSIB(int(instr.Imm32), int(src), int(dst)),
|
||||
)
|
||||
case S_IMUL_R:
|
||||
program = append(program, REX_IMUL_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IROR_C:
|
||||
program = append(program, REX_ROT_I8...)
|
||||
program = append(program,
|
||||
byte(0xc8+dst),
|
||||
byte(instr.Imm32&63),
|
||||
)
|
||||
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
program = append(program, REX_81...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
|
||||
//TODO: align NOP on C8/C9
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
program = append(program, REX_XOR_RI...)
|
||||
program = append(program, byte(0xf0+dst))
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
|
||||
//TODO: align NOP on C8/C9
|
||||
|
||||
case S_IMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, byte(0xe0+src))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, byte(0xc2+8*dst))
|
||||
case S_ISMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, byte(0xe8+src))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, byte(0xc2+8*dst))
|
||||
case S_IMUL_RCP:
|
||||
program = append(program, MOV_RAX_I...)
|
||||
program = binary.LittleEndian.AppendUint64(program, instr.Imm64)
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, byte(0xc0+8*instr.Dst))
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
program = append(program, RET)
|
||||
|
||||
pagedMemory, err := memory.AllocateSlice[byte](pageAllocator, len(program))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
copy(pagedMemory, program)
|
||||
|
||||
return pagedMemory
|
||||
}
|
42
superscalar_jit_amd64.s
Normal file
42
superscalar_jit_amd64.s
Normal file
|
@ -0,0 +1,42 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·superscalar_run(SB),$0-16
|
||||
|
||||
MOVQ rf+0(FP), SI
|
||||
|
||||
PREFETCHNTA 0(SI)
|
||||
|
||||
// move register line to registers
|
||||
MOVQ 0(SI), R8
|
||||
MOVQ 8(SI), R9
|
||||
MOVQ 16(SI), R10
|
||||
MOVQ 24(SI), R11
|
||||
MOVQ 32(SI), R12
|
||||
MOVQ 40(SI), R13
|
||||
MOVQ 48(SI), R14
|
||||
MOVQ 56(SI), R15
|
||||
|
||||
MOVQ jmp+8(FP), AX
|
||||
// jump to JIT code
|
||||
CALL AX
|
||||
|
||||
|
||||
// prefetchw BYTE PTR [rsi]
|
||||
// PREFETCHW 0(SI)
|
||||
BYTE $0x0F
|
||||
BYTE $0x0D
|
||||
BYTE $0x0E
|
||||
|
||||
// move registers back to register line
|
||||
MOVQ R8, 0(SI)
|
||||
MOVQ R9, 8(SI)
|
||||
MOVQ R10, 16(SI)
|
||||
MOVQ R11, 24(SI)
|
||||
MOVQ R12, 32(SI)
|
||||
MOVQ R13, 40(SI)
|
||||
MOVQ R14, 48(SI)
|
||||
MOVQ R15, 56(SI)
|
||||
|
||||
RET
|
|
@ -2,7 +2,11 @@
|
|||
|
||||
package randomx
|
||||
|
||||
func (f SuperScalarProgramFunc) Execute(rf uintptr) {
|
||||
|
||||
}
|
||||
|
||||
// generateSuperscalarCode
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgramFunc {
|
||||
return nil
|
||||
}
|
||||
|
|
451
vm.go
451
vm.go
|
@ -30,84 +30,200 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
|
||||
"errors"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
|
||||
"math"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
import "encoding/binary"
|
||||
import "golang.org/x/crypto/blake2b"
|
||||
|
||||
type REG struct {
|
||||
Hi uint64
|
||||
Lo uint64
|
||||
}
|
||||
|
||||
type VM struct {
|
||||
StateStart [64]byte
|
||||
ScratchPad ScratchPad
|
||||
pad *ScratchPad
|
||||
|
||||
ByteCode ByteCode
|
||||
|
||||
mem MemoryRegisters
|
||||
config Config // configuration
|
||||
datasetOffset uint64
|
||||
|
||||
Dataset Randomx_Dataset
|
||||
|
||||
Cache *Randomx_Cache // randomx cache
|
||||
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
eMask [2]uint64
|
||||
readReg [4]uint64
|
||||
}
|
||||
|
||||
// Run calculate hash based on input
|
||||
// Warning: Underlying callers will run asm.SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
||||
|
||||
reg.FPRC = roundingMode
|
||||
flags Flags
|
||||
|
||||
// buffer first 128 bytes are entropy below rest are program bytes
|
||||
var buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte
|
||||
aes.FillAes4Rx4(inputHash, buffer[:])
|
||||
buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte
|
||||
|
||||
entropy := (*[16]uint64)(unsafe.Pointer(&buffer))
|
||||
hashState [blake2b.Size]byte
|
||||
|
||||
prog := buffer[len(entropy)*8:]
|
||||
registerFile *RegisterFile
|
||||
|
||||
AES aes.AES
|
||||
|
||||
Cache *Cache
|
||||
Dataset *Dataset
|
||||
|
||||
program ByteCode
|
||||
jitProgram VMProgramFunc
|
||||
}
|
||||
|
||||
// NewVM Creates and initializes a RandomX virtual machine.
|
||||
// *
|
||||
// * @param flags is any combination of these 5 flags (each flag can be set or not set):
|
||||
// * RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages
|
||||
// * RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES
|
||||
// * RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset
|
||||
// * RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler
|
||||
// * RANDOMX_FLAG_SECURE - when combined with RANDOMX_FLAG_JIT, the JIT pages are never
|
||||
// * writable and executable at the same time (W^X policy)
|
||||
// * The numeric values of the first 4 flags are ordered so that a higher value will provide
|
||||
// * faster hash calculation and a lower numeric value will provide higher portability.
|
||||
// * Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest.
|
||||
// * @param cache is a pointer to an initialized randomx_cache structure. Can be
|
||||
// * NULL if RANDOMX_FLAG_FULL_MEM is set.
|
||||
// * @param dataset is a pointer to a randomx_dataset structure. Can be NULL
|
||||
// * if RANDOMX_FLAG_FULL_MEM is not set.
|
||||
// *
|
||||
// * @return Pointer to an initialized randomx_vm structure.
|
||||
// * Returns NULL if:
|
||||
// * (1) Scratchpad memory allocation fails.
|
||||
// * (2) The requested initialization flags are not supported on the current platform.
|
||||
// * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set
|
||||
// * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set
|
||||
// */
|
||||
func NewVM(flags Flags, cache *Cache, dataset *Dataset) (*VM, error) {
|
||||
if cache == nil && !flags.Has(RANDOMX_FLAG_FULL_MEM) {
|
||||
return nil, errors.New("nil cache in light mode")
|
||||
}
|
||||
if dataset == nil && flags.Has(RANDOMX_FLAG_FULL_MEM) {
|
||||
return nil, errors.New("nil dataset in full mode")
|
||||
}
|
||||
|
||||
pad, err := memory.Allocate[ScratchPad](cacheLineAlignedAllocator)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
registerFile, err := memory.Allocate[RegisterFile](cacheLineAlignedAllocator)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_ = pad
|
||||
_ = registerFile
|
||||
|
||||
vm := &VM{
|
||||
Cache: cache,
|
||||
Dataset: dataset,
|
||||
flags: flags,
|
||||
pad: new(ScratchPad),
|
||||
registerFile: new(RegisterFile),
|
||||
}
|
||||
|
||||
if flags.Has(RANDOMX_FLAG_HARD_AES) {
|
||||
vm.AES = aes.NewHardAES()
|
||||
}
|
||||
// fallback
|
||||
if vm.AES == nil {
|
||||
vm.AES = aes.NewSoftAES()
|
||||
}
|
||||
|
||||
if flags.HasJIT() {
|
||||
vm.jitProgram, err = memory.AllocateSlice[byte](pageAllocator, int(RandomXCodeSize))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !flags.Has(RANDOMX_FLAG_SECURE) {
|
||||
err = memory.PageReadWriteExecute(vm.jitProgram)
|
||||
if err != nil {
|
||||
vm.jitProgram.Close()
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
// run calculate hash based on input. Not thread safe.
|
||||
// Warning: Underlying callers will run float64 SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (vm *VM) run() {
|
||||
|
||||
// buffer first 128 bytes are entropy below rest are program bytes
|
||||
vm.AES.FillAes4Rx4(vm.hashState, vm.buffer[:])
|
||||
|
||||
entropy := (*[16]uint64)(unsafe.Pointer(&vm.buffer))
|
||||
|
||||
// do more initialization before we run
|
||||
|
||||
reg := vm.registerFile
|
||||
reg.Clear()
|
||||
|
||||
// initialize constant registers
|
||||
for i := range entropy[:8] {
|
||||
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
|
||||
}
|
||||
|
||||
vm.mem.ma = entropy[8] & CacheLineAlignMask
|
||||
vm.mem.mx = entropy[10]
|
||||
// memory registers
|
||||
var ma, mx uint32
|
||||
|
||||
ma = uint32(entropy[8] & CacheLineAlignMask)
|
||||
mx = uint32(entropy[10])
|
||||
|
||||
addressRegisters := entropy[12]
|
||||
for i := range vm.config.readReg {
|
||||
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
|
||||
var readReg [4]uint64
|
||||
for i := range readReg {
|
||||
readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
}
|
||||
|
||||
vm.datasetOffset = (entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
vm.config.eMask[LOW] = EMask(entropy[14])
|
||||
vm.config.eMask[HIGH] = EMask(entropy[15])
|
||||
datasetOffset := (entropy[13] % (DatasetExtraItems + 1)) * CacheLineSize
|
||||
|
||||
vm.ByteCode = CompileProgramToByteCode(prog)
|
||||
eMask := [2]uint64{ExponentMask(entropy[14]), ExponentMask(entropy[15])}
|
||||
|
||||
spAddr0 := vm.mem.mx
|
||||
spAddr1 := vm.mem.ma
|
||||
prog := vm.buffer[len(entropy)*8:]
|
||||
CompileProgramToByteCode(prog, &vm.program)
|
||||
|
||||
var jitProgram VMProgramFunc
|
||||
|
||||
if vm.jitProgram != nil {
|
||||
if vm.Dataset == nil { //light mode
|
||||
if vm.flags.Has(RANDOMX_FLAG_SECURE) {
|
||||
err := memory.PageReadWrite(vm.jitProgram)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
|
||||
err = memory.PageReadExecute(vm.jitProgram)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
} else {
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
|
||||
}
|
||||
} else {
|
||||
// full mode and we have JIT
|
||||
if vm.flags.Has(RANDOMX_FLAG_SECURE) {
|
||||
err := memory.PageReadWrite(vm.jitProgram)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
|
||||
err = memory.PageReadExecute(vm.jitProgram)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
} else {
|
||||
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
|
||||
}
|
||||
|
||||
vm.jitProgram.ExecuteFull(reg, vm.pad, &vm.Dataset.Memory()[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
spAddr0 := uint64(mx)
|
||||
spAddr1 := uint64(ma)
|
||||
|
||||
var rlCache RegisterLine
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := reg.R[vm.config.readReg[0]] ^ reg.R[vm.config.readReg[1]]
|
||||
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
|
||||
|
||||
spAddr0 ^= spMix
|
||||
spAddr0 &= ScratchpadL3Mask64
|
||||
|
@ -116,149 +232,182 @@ func (vm *VM) Run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
|
|||
|
||||
//TODO: optimize these loads!
|
||||
for i := uint64(0); i < RegistersCount; i++ {
|
||||
reg.R[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
|
||||
reg.R[i] ^= vm.pad.Load64(uint32(spAddr0 + 8*i))
|
||||
}
|
||||
|
||||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
reg.F[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
|
||||
reg.F[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*i))
|
||||
}
|
||||
|
||||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
|
||||
reg.E[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
|
||||
|
||||
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], vm.config.eMask[LOW])
|
||||
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], vm.config.eMask[HIGH])
|
||||
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], eMask[LOW])
|
||||
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
|
||||
}
|
||||
|
||||
// Run the actual bytecode
|
||||
vm.ByteCode.Execute(®, &vm.ScratchPad, vm.config.eMask)
|
||||
// run the actual bytecode
|
||||
if jitProgram != nil {
|
||||
// light mode
|
||||
jitProgram.Execute(reg, vm.pad, eMask)
|
||||
} else {
|
||||
vm.program.Execute(reg, vm.pad, eMask)
|
||||
}
|
||||
|
||||
vm.mem.mx ^= reg.R[vm.config.readReg[2]] ^ reg.R[vm.config.readReg[3]]
|
||||
vm.mem.mx &= CacheLineAlignMask
|
||||
mx ^= uint32(reg.R[readReg[2]] ^ reg.R[readReg[3]])
|
||||
mx &= uint32(CacheLineAlignMask)
|
||||
|
||||
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
|
||||
// execute diffuser superscalar program to get dataset 64 bytes
|
||||
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, ®.R, &rlCache)
|
||||
if vm.Dataset != nil {
|
||||
// full mode
|
||||
vm.Dataset.prefetchDataset(datasetOffset + uint64(mx))
|
||||
// load output from superscalar program to get dataset 64 bytes
|
||||
vm.Dataset.readDataset(datasetOffset+uint64(ma), ®.R)
|
||||
} else {
|
||||
// light mode
|
||||
// execute output from superscalar program to get dataset 64 bytes
|
||||
vm.Cache.initDataset(&rlCache, (datasetOffset+uint64(ma))/CacheLineSize)
|
||||
for i := range reg.R {
|
||||
reg.R[i] ^= rlCache[i]
|
||||
}
|
||||
}
|
||||
|
||||
// swap the elements
|
||||
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
|
||||
mx, ma = ma, mx
|
||||
|
||||
for i := uint64(0); i < RegistersCount; i++ {
|
||||
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
|
||||
vm.pad.Store64(uint32(spAddr1+8*i), reg.R[i])
|
||||
}
|
||||
|
||||
for i := uint64(0); i < RegistersCountFloat; i++ {
|
||||
reg.F[i][LOW] = Xor(reg.F[i][LOW], reg.E[i][LOW])
|
||||
reg.F[i][HIGH] = Xor(reg.F[i][HIGH], reg.E[i][HIGH])
|
||||
|
||||
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW]))
|
||||
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH]))
|
||||
vm.pad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW]))
|
||||
vm.pad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH]))
|
||||
}
|
||||
|
||||
spAddr0 = 0
|
||||
spAddr1 = 0
|
||||
|
||||
}
|
||||
|
||||
return reg
|
||||
|
||||
}
|
||||
|
||||
func (vm *VM) InitScratchpad(seed *[64]byte) {
|
||||
vm.ScratchPad.Init(seed)
|
||||
func (vm *VM) initScratchpad(seed *[64]byte) {
|
||||
clear(vm.pad[:])
|
||||
vm.AES.FillAes1Rx4(seed, vm.pad[:])
|
||||
}
|
||||
|
||||
func (vm *VM) RunLoops(tempHash [64]byte) RegisterFile {
|
||||
func (vm *VM) runLoops() {
|
||||
if lockThreadDueToRoundingMode {
|
||||
// Lock thread due to rounding mode flags
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
}
|
||||
|
||||
var buf [8]byte
|
||||
hash512, _ := blake2b.New512(nil)
|
||||
// always force a restore before startup
|
||||
ResetRoundingMode(vm.registerFile)
|
||||
|
||||
// Lock thread due to rounding mode flags
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
roundingMode := uint8(0)
|
||||
// restore rounding mode at the end
|
||||
defer ResetRoundingMode(vm.registerFile)
|
||||
|
||||
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
|
||||
reg := vm.Run(tempHash, roundingMode)
|
||||
roundingMode = reg.FPRC
|
||||
vm.run()
|
||||
|
||||
hash512.Reset()
|
||||
for i := range reg.R {
|
||||
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
for i := range reg.F {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range reg.E {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range reg.A {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.A[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
hash512.Sum(tempHash[:0])
|
||||
// write R, F, E, A registers
|
||||
vm.hashState = blake2b.Sum512(vm.registerFile.Memory()[:])
|
||||
}
|
||||
|
||||
// final loop executes here
|
||||
reg := vm.Run(tempHash, roundingMode)
|
||||
roundingMode = reg.FPRC
|
||||
|
||||
//restore rounding mode
|
||||
vm.ByteCode.SetRoundingMode(®, 0)
|
||||
|
||||
return reg
|
||||
vm.run()
|
||||
}
|
||||
|
||||
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
||||
var buf [8]byte
|
||||
|
||||
tempHash := blake2b.Sum512(input)
|
||||
|
||||
vm.InitScratchpad(&tempHash)
|
||||
|
||||
reg := vm.RunLoops(tempHash)
|
||||
|
||||
// now hash the scratch pad and place into register a
|
||||
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
|
||||
|
||||
hash256, _ := blake2b.New256(nil)
|
||||
|
||||
hash256.Reset()
|
||||
|
||||
for i := range reg.R {
|
||||
binary.LittleEndian.PutUint64(buf[:], reg.R[i])
|
||||
hash256.Write(buf[:])
|
||||
// SetCache Reinitializes a virtual machine with a new Cache.
|
||||
// This function should be called anytime the Cache is reinitialized with a new key.
|
||||
// Does nothing if called with a Cache containing the same key value as already set.
|
||||
// VM must be initialized without RANDOMX_FLAG_FULL_MEM.
|
||||
func (vm *VM) SetCache(cache *Cache) {
|
||||
if vm.flags.Has(RANDOMX_FLAG_FULL_MEM) {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
for i := range reg.F {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][LOW]))
|
||||
hash256.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.F[i][HIGH]))
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range reg.E {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][LOW]))
|
||||
hash256.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(reg.E[i][HIGH]))
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
|
||||
// copy tempHash as it first copied to register and then hashed
|
||||
hash256.Write(tempHash[:])
|
||||
|
||||
hash256.Sum(output[:0])
|
||||
vm.Cache = cache
|
||||
//todo
|
||||
}
|
||||
|
||||
// SetDataset Reinitializes a virtual machine with a new Dataset.
|
||||
// VM must be initialized with RANDOMX_FLAG_FULL_MEM.
|
||||
func (vm *VM) SetDataset(dataset *Dataset) {
|
||||
if !vm.flags.Has(RANDOMX_FLAG_FULL_MEM) {
|
||||
panic("unsupported")
|
||||
}
|
||||
vm.Dataset = dataset
|
||||
}
|
||||
|
||||
// CalculateHash Calculates a RandomX hash value.
|
||||
func (vm *VM) CalculateHash(input []byte, output *[RANDOMX_HASH_SIZE]byte) {
|
||||
vm.hashState = blake2b.Sum512(input)
|
||||
|
||||
vm.initScratchpad(&vm.hashState)
|
||||
|
||||
vm.runLoops()
|
||||
|
||||
// now hash the scratch pad as it will act as register A
|
||||
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
|
||||
|
||||
regMem := vm.registerFile.Memory()
|
||||
// write hash onto register A
|
||||
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:])
|
||||
|
||||
// write R, F, E, A registers
|
||||
*output = blake2b.Sum256(regMem[:])
|
||||
}
|
||||
|
||||
// CalculateHashFirst will begin a hash calculation.
|
||||
func (vm *VM) CalculateHashFirst(input []byte) {
|
||||
vm.hashState = blake2b.Sum512(input)
|
||||
|
||||
vm.initScratchpad(&vm.hashState)
|
||||
}
|
||||
|
||||
// CalculateHashNext will output the hash value of the previous input and begin the calculation of the next hash.
|
||||
func (vm *VM) CalculateHashNext(nextInput []byte, output *[RANDOMX_HASH_SIZE]byte) {
|
||||
vm.runLoops()
|
||||
|
||||
// now hash the scratch pad as it will act as register A
|
||||
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
|
||||
|
||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||
regMem := vm.registerFile.Memory()
|
||||
vm.hashState = blake2b.Sum512(nextInput)
|
||||
// write hash onto register A
|
||||
vm.AES.HashAndFillAes1Rx4(vm.pad[:], (*[64]byte)(unsafe.Pointer(unsafe.SliceData(regMem[RegisterFileSize-RegistersCountFloat*2*8:]))), &vm.hashState)
|
||||
runtime.KeepAlive(regMem)
|
||||
|
||||
// write R, F, E, A registers
|
||||
*output = blake2b.Sum256(regMem[:])
|
||||
}
|
||||
|
||||
// CalculateHashLast will output the hash value of the previous input.
|
||||
func (vm *VM) CalculateHashLast(output *[RANDOMX_HASH_SIZE]byte) {
|
||||
vm.runLoops()
|
||||
|
||||
// now hash the scratch pad as it will act as register A
|
||||
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
|
||||
|
||||
regMem := vm.registerFile.Memory()
|
||||
// write hash onto register A
|
||||
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:])
|
||||
|
||||
// write R, F, E, A registers
|
||||
*output = blake2b.Sum256(regMem[:])
|
||||
}
|
||||
|
||||
// Close Releases all memory occupied by the structure.
|
||||
func (vm *VM) Close() error {
|
||||
memory.Free(cacheLineAlignedAllocator, vm.pad)
|
||||
memory.Free(cacheLineAlignedAllocator, vm.registerFile)
|
||||
|
||||
if vm.jitProgram != nil {
|
||||
return vm.jitProgram.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ type ByteCodeInstruction struct {
|
|||
Opcode ByteCodeInstructionOp
|
||||
MemMask uint32
|
||||
Imm uint64
|
||||
EMask uint64
|
||||
/*
|
||||
union {
|
||||
int_reg_t* idst;
|
||||
|
@ -30,7 +31,7 @@ type ByteCodeInstruction struct {
|
|||
}
|
||||
|
||||
func (i ByteCodeInstruction) jumpTarget() int {
|
||||
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Dst)))
|
||||
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Src)))
|
||||
}
|
||||
|
||||
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
|
||||
|
|
466
vm_bytecode_jit_amd64.go
Normal file
466
vm_bytecode_jit_amd64.go
Normal file
|
@ -0,0 +1,466 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
//go:noescape
|
||||
func vm_run(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64, jmp uintptr)
|
||||
|
||||
//go:noescape
|
||||
func vm_run_full(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations, memoryRegisters uint64, eMask [2]uint64, jmp uintptr)
|
||||
|
||||
/*
|
||||
#define RANDOMX_DATASET_BASE_SIZE 2147483648
|
||||
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
|
||||
|
||||
mov ecx, ebp ;# ecx = ma
|
||||
;#and ecx, RANDOMX_DATASET_BASE_MASK
|
||||
and ecx, 2147483584
|
||||
xor r8, qword ptr [rdi+rcx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
xor rbp, rax ;# modify "mx"
|
||||
mov edx, ebp ;# edx = mx
|
||||
;#and edx, RANDOMX_DATASET_BASE_MASK
|
||||
and edx, 2147483584
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
xor r9, qword ptr [rdi+rcx+8]
|
||||
xor r10, qword ptr [rdi+rcx+16]
|
||||
xor r11, qword ptr [rdi+rcx+24]
|
||||
xor r12, qword ptr [rdi+rcx+32]
|
||||
xor r13, qword ptr [rdi+rcx+40]
|
||||
xor r14, qword ptr [rdi+rcx+48]
|
||||
xor r15, qword ptr [rdi+rcx+56]
|
||||
*/
|
||||
var programReadDataset = []byte{0x89, 0xE9, 0x81, 0xE1, 0xC0, 0xFF, 0xFF, 0x7F, 0x4C, 0x33, 0x04, 0x0F, 0x48, 0xC1, 0xCD, 0x20, 0x48, 0x31, 0xC5, 0x89, 0xEA, 0x81, 0xE2, 0xC0, 0xFF, 0xFF, 0x7F, 0x0F, 0x18, 0x04, 0x17, 0x4C, 0x33, 0x4C, 0x0F, 0x08, 0x4C, 0x33, 0x54, 0x0F, 0x10, 0x4C, 0x33, 0x5C, 0x0F, 0x18, 0x4C, 0x33, 0x64, 0x0F, 0x20, 0x4C, 0x33, 0x6C, 0x0F, 0x28, 0x4C, 0x33, 0x74, 0x0F, 0x30, 0x4C, 0x33, 0x7C, 0x0F, 0x38}
|
||||
|
||||
/*
|
||||
lea rcx, [rsi+rax]
|
||||
push rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
lea rcx, [rsi+rdx]
|
||||
push rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
cvtdq2pd xmm3, qword ptr [rcx+24]
|
||||
cvtdq2pd xmm4, qword ptr [rcx+32]
|
||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||
andps xmm4, xmm13
|
||||
andps xmm5, xmm13
|
||||
andps xmm6, xmm13
|
||||
andps xmm7, xmm13
|
||||
orps xmm4, xmm14
|
||||
orps xmm5, xmm14
|
||||
orps xmm6, xmm14
|
||||
orps xmm7, xmm14
|
||||
*/
|
||||
var programLoopLoad = []byte{0x48, 0x8D, 0x0C, 0x06, 0x51, 0x4C, 0x33, 0x01, 0x4C, 0x33, 0x49, 0x08, 0x4C, 0x33, 0x51, 0x10, 0x4C, 0x33, 0x59, 0x18, 0x4C, 0x33, 0x61, 0x20, 0x4C, 0x33, 0x69, 0x28, 0x4C, 0x33, 0x71, 0x30, 0x4C, 0x33, 0x79, 0x38, 0x48, 0x8D, 0x0C, 0x16, 0x51, 0xF3, 0x0F, 0xE6, 0x01, 0xF3, 0x0F, 0xE6, 0x49, 0x08, 0xF3, 0x0F, 0xE6, 0x51, 0x10, 0xF3, 0x0F, 0xE6, 0x59, 0x18, 0xF3, 0x0F, 0xE6, 0x61, 0x20, 0xF3, 0x0F, 0xE6, 0x69, 0x28, 0xF3, 0x0F, 0xE6, 0x71, 0x30, 0xF3, 0x0F, 0xE6, 0x79, 0x38, 0x41, 0x0F, 0x54, 0xE5, 0x41, 0x0F, 0x54, 0xED, 0x41, 0x0F, 0x54, 0xF5, 0x41, 0x0F, 0x54, 0xFD, 0x41, 0x0F, 0x56, 0xE6, 0x41, 0x0F, 0x56, 0xEE, 0x41, 0x0F, 0x56, 0xF6, 0x41, 0x0F, 0x56, 0xFE}
|
||||
|
||||
/*
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
pop rcx
|
||||
xorpd xmm0, xmm4
|
||||
xorpd xmm1, xmm5
|
||||
xorpd xmm2, xmm6
|
||||
xorpd xmm3, xmm7
|
||||
|
||||
;# aligned mode
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
*/
|
||||
var programLoopStoreAligned = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x29, 0x01, 0x66, 0x0F, 0x29, 0x49, 0x10, 0x66, 0x0F, 0x29, 0x51, 0x20, 0x66, 0x0F, 0x29, 0x59, 0x30}
|
||||
|
||||
/*
|
||||
#define RANDOMX_SCRATCHPAD_L3 2097152
|
||||
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
|
||||
mov rdx, rax
|
||||
;#and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
and eax, 2097088
|
||||
ror rdx, 32
|
||||
;#and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
and edx, 2097088
|
||||
*/
|
||||
var programCalculateSpAddrs = []byte{0x48, 0x89, 0xC2, 0x25, 0xC0, 0xFF, 0x1F, 0x00, 0x48, 0xC1, 0xCA, 0x20, 0x81, 0xE2, 0xC0, 0xFF, 0x1F, 0x00}
|
||||
|
||||
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations uint64, ma, mx uint32, eMask [2]uint64) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
|
||||
jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f)))
|
||||
vm_run_full(rf, pad, dataset, iterations, (uint64(ma)<<32)|uint64(mx), eMask, jmpPtr)
|
||||
}
|
||||
|
||||
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
|
||||
jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f)))
|
||||
vm_run(rf, pad, eMask, jmpPtr)
|
||||
}
|
||||
|
||||
func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
|
||||
program = program[:0]
|
||||
|
||||
isFullMode := readReg != nil
|
||||
|
||||
if isFullMode {
|
||||
|
||||
program = append(program, programCalculateSpAddrs...)
|
||||
// prologue
|
||||
program = append(program, programLoopLoad...)
|
||||
}
|
||||
|
||||
var instructionOffsets [RANDOMX_PROGRAM_SIZE]int32
|
||||
|
||||
for ix := range c {
|
||||
instructionOffsets[ix] = int32(len(program))
|
||||
|
||||
instr := &c[ix]
|
||||
switch instr.Opcode {
|
||||
|
||||
case VM_IADD_RS:
|
||||
program = append(program, REX_LEA...)
|
||||
if instr.Dst == RegisterNeedsDisplacement {
|
||||
program = append(program, 0xac)
|
||||
} else {
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
}
|
||||
program = append(program, genSIB(int(instr.ImmB), int(instr.Src), int(instr.Dst)))
|
||||
if instr.Dst == RegisterNeedsDisplacement {
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
}
|
||||
|
||||
case VM_IADD_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_ADD_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_IADD_MZ:
|
||||
program = append(program, REX_ADD_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_ISUB_R:
|
||||
program = append(program, REX_SUB_RR...)
|
||||
program = append(program, 0xc0+8*instr.Dst+instr.Src)
|
||||
case VM_ISUB_I:
|
||||
program = append(program, REX_81...)
|
||||
program = append(program, 0xe8+instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_ISUB_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_SUB_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_ISUB_MZ:
|
||||
program = append(program, REX_SUB_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IMUL_R:
|
||||
program = append(program, REX_IMUL_RR...)
|
||||
program = append(program, 0xc0+8*instr.Dst+instr.Src)
|
||||
case VM_IMUL_I:
|
||||
// also handles imul_rcp, with 64-bit special
|
||||
if bits.Len64(instr.Imm) > 32 {
|
||||
program = append(program, MOV_RAX_I...)
|
||||
program = binary.LittleEndian.AppendUint64(program, instr.Imm)
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, 0xc0+8*instr.Dst)
|
||||
} else {
|
||||
program = append(program, REX_IMUL_RRI...)
|
||||
program = append(program, 0xc0+9*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
}
|
||||
|
||||
case VM_IMUL_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_IMUL_MZ:
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, 0xe0+instr.Src)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_IMULH_M:
|
||||
program = genAddressReg(program, instr, false)
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_MEM...)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
case VM_IMULH_MZ:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_M...)
|
||||
program = append(program, 0xa6)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_ISMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, 0xe8+instr.Src)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_ISMULH_M:
|
||||
program = genAddressReg(program, instr, false)
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_IMUL_MEM...)
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
case VM_ISMULH_MZ:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, REX_MUL_M...)
|
||||
program = append(program, 0xae)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, 0xc2+8*instr.Dst)
|
||||
|
||||
case VM_INEG_R:
|
||||
program = append(program, REX_NEG...)
|
||||
program = append(program, 0xd8+instr.Dst)
|
||||
|
||||
case VM_IXOR_R:
|
||||
program = append(program, REX_XOR_RR...)
|
||||
program = append(program, 0xc0+8*instr.Dst+instr.Src)
|
||||
case VM_IXOR_I:
|
||||
program = append(program, REX_XOR_RI...)
|
||||
program = append(program, 0xf0+instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IXOR_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_XOR_RM...)
|
||||
program = append(program, 0x04+8*instr.Dst)
|
||||
program = append(program, 0x06)
|
||||
case VM_IXOR_MZ:
|
||||
program = append(program, REX_XOR_RM...)
|
||||
program = append(program, 0x86+8*instr.Dst)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
case VM_IROR_R:
|
||||
program = append(program, REX_MOV_RR...)
|
||||
program = append(program, 0xc8+instr.Src)
|
||||
program = append(program, REX_ROT_CL...)
|
||||
program = append(program, 0xc8+instr.Dst)
|
||||
case VM_IROR_I:
|
||||
program = append(program, REX_ROT_I8...)
|
||||
program = append(program, 0xc8+instr.Dst)
|
||||
program = append(program, byte(instr.Imm&63))
|
||||
|
||||
case VM_IROL_R:
|
||||
program = append(program, REX_MOV_RR...)
|
||||
program = append(program, 0xc8+instr.Src)
|
||||
program = append(program, REX_ROT_CL...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
case VM_IROL_I:
|
||||
program = append(program, REX_ROT_I8...)
|
||||
program = append(program, 0xc0+instr.Dst)
|
||||
program = append(program, byte(instr.Imm&63))
|
||||
|
||||
case VM_ISWAP_R:
|
||||
program = append(program, REX_XCHG...)
|
||||
program = append(program, 0xc0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FSWAP_RF:
|
||||
program = append(program, SHUFPD...)
|
||||
program = append(program, 0xc0+9*instr.Dst)
|
||||
program = append(program, 1)
|
||||
case VM_FSWAP_RE:
|
||||
program = append(program, SHUFPD...)
|
||||
program = append(program, 0xc0+9*(instr.Dst+RegistersCountFloat))
|
||||
program = append(program, 1)
|
||||
|
||||
case VM_FADD_R:
|
||||
program = append(program, REX_ADDPD...)
|
||||
program = append(program, 0xc0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FADD_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_CVTDQ2PD_XMM12...)
|
||||
program = append(program, REX_ADDPD...)
|
||||
program = append(program, 0xc4+8*instr.Dst)
|
||||
|
||||
case VM_FSUB_R:
|
||||
program = append(program, REX_SUBPD...)
|
||||
program = append(program, 0xc0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FSUB_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_CVTDQ2PD_XMM12...)
|
||||
program = append(program, REX_SUBPD...)
|
||||
program = append(program, 0xc4+8*instr.Dst)
|
||||
|
||||
case VM_FSCAL_R:
|
||||
program = append(program, REX_XORPS...)
|
||||
program = append(program, 0xc7+8*instr.Dst)
|
||||
|
||||
case VM_FMUL_R:
|
||||
program = append(program, REX_MULPD...)
|
||||
program = append(program, 0xe0+instr.Src+8*instr.Dst)
|
||||
|
||||
case VM_FDIV_M:
|
||||
program = genAddressReg(program, instr, true)
|
||||
program = append(program, REX_CVTDQ2PD_XMM12...)
|
||||
program = append(program, REX_ANDPS_XMM12...)
|
||||
program = append(program, REX_DIVPD...)
|
||||
program = append(program, 0xe4+8*instr.Dst)
|
||||
|
||||
case VM_FSQRT_R:
|
||||
program = append(program, SQRTPD...)
|
||||
program = append(program, 0xe4+9*instr.Dst)
|
||||
|
||||
case VM_CFROUND:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+instr.Src)
|
||||
rotate := byte((13 - instr.Imm) & 63)
|
||||
if rotate != 0 {
|
||||
program = append(program, ROL_RAX...)
|
||||
program = append(program, rotate)
|
||||
}
|
||||
program = append(program, AND_OR_MOV_LDMXCSR...)
|
||||
case VM_CBRANCH:
|
||||
reg := instr.Dst
|
||||
target := instr.jumpTarget() + 1
|
||||
|
||||
jmpOffset := instructionOffsets[target] - (int32(len(program)) + 16)
|
||||
|
||||
if BranchesWithin32B {
|
||||
branchBegin := uint32(int32(len(program)) + 7)
|
||||
branchEnd := branchBegin
|
||||
if jmpOffset >= -128 {
|
||||
branchEnd += 9
|
||||
} else {
|
||||
branchEnd += 13
|
||||
}
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if (branchBegin ^ branchEnd) >= 32 {
|
||||
alignmentSize := 32 - (branchBegin & 31)
|
||||
alignmentSize -= alignmentSize
|
||||
|
||||
program = append(program, JMP_ALIGN_PREFIX[alignmentSize]...)
|
||||
}
|
||||
}
|
||||
program = append(program, REX_ADD_I...)
|
||||
program = append(program, 0xc0+reg)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
|
||||
program = append(program, REX_TEST...)
|
||||
program = append(program, 0xc0+reg)
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.MemMask)
|
||||
|
||||
if jmpOffset >= -128 {
|
||||
program = append(program, JZ_SHORT)
|
||||
program = append(program, byte(jmpOffset))
|
||||
} else {
|
||||
program = append(program, JZ...)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(jmpOffset-4))
|
||||
}
|
||||
|
||||
case VM_ISTORE:
|
||||
//genAddressRegDst
|
||||
program = append(program, LEA_32...)
|
||||
program = append(program, 0x80+instr.Dst)
|
||||
if instr.Dst == RegisterNeedsSib {
|
||||
program = append(program, 0x24)
|
||||
}
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
|
||||
program = append(program, AND_EAX_I)
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.MemMask)
|
||||
|
||||
program = append(program, REX_MOV_MR...)
|
||||
program = append(program, 0x04+8*instr.Src)
|
||||
program = append(program, 0x06)
|
||||
case VM_NOP:
|
||||
program = append(program, NOP1...)
|
||||
}
|
||||
}
|
||||
|
||||
if isFullMode {
|
||||
// end of prologue
|
||||
program = append(program, REX_MOV_RR...)
|
||||
program = append(program, 0xc0+byte(readReg[2]))
|
||||
program = append(program, REX_XOR_EAX...)
|
||||
program = append(program, 0xc0+byte(readReg[3]))
|
||||
|
||||
// read dataset
|
||||
|
||||
program = append(program, programReadDataset...)
|
||||
|
||||
// epilogue
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, 0xc0+byte(readReg[0]))
|
||||
program = append(program, REX_XOR_RAX_R64...)
|
||||
program = append(program, 0xc0+byte(readReg[1]))
|
||||
//todo: prefetch scratchpad
|
||||
|
||||
program = append(program, programLoopStoreAligned...)
|
||||
|
||||
if BranchesWithin32B {
|
||||
branchBegin := uint32(len(program))
|
||||
branchEnd := branchBegin + 9
|
||||
|
||||
// If the jump crosses or touches 32-byte boundary, align it
|
||||
if (branchBegin ^ branchEnd) >= 32 {
|
||||
alignmentSize := 32 - (branchBegin & 31)
|
||||
if alignmentSize > 8 {
|
||||
program = append(program, NOPX[alignmentSize-9][:alignmentSize-8]...)
|
||||
alignmentSize = 8
|
||||
}
|
||||
program = append(program, NOPX[alignmentSize-1][:alignmentSize]...)
|
||||
}
|
||||
}
|
||||
|
||||
program = append(program, SUB_EBX...)
|
||||
program = append(program, JNZ...)
|
||||
program = binary.LittleEndian.AppendUint32(program, uint32(-len(program)-4))
|
||||
//exit otherwise
|
||||
|
||||
}
|
||||
|
||||
program = append(program, RET)
|
||||
|
||||
return program
|
||||
}
|
204
vm_bytecode_jit_amd64.s
Normal file
204
vm_bytecode_jit_amd64.s
Normal file
|
@ -0,0 +1,204 @@
|
|||
//go:build unix && amd64 && !disable_jit && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·vm_run(SB),$8-40
|
||||
|
||||
// move register file to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHNTA 0(AX)
|
||||
// r0-r7
|
||||
MOVQ (0*8)(AX), R8
|
||||
MOVQ (1*8)(AX), R9
|
||||
MOVQ (2*8)(AX), R10
|
||||
MOVQ (3*8)(AX), R11
|
||||
MOVQ (4*8)(AX), R12
|
||||
MOVQ (5*8)(AX), R13
|
||||
MOVQ (6*8)(AX), R14
|
||||
MOVQ (7*8)(AX), R15
|
||||
|
||||
// f0-f3
|
||||
VMOVAPD (8*8)(AX), X0
|
||||
VMOVAPD (10*8)(AX), X1
|
||||
VMOVAPD (12*8)(AX), X2
|
||||
VMOVAPD (14*8)(AX), X3
|
||||
// e0-e3
|
||||
VMOVAPD (16*8)(AX), X4
|
||||
VMOVAPD (18*8)(AX), X5
|
||||
VMOVAPD (20*8)(AX), X6
|
||||
VMOVAPD (22*8)(AX), X7
|
||||
// a0-a3
|
||||
VMOVAPD (24*8)(AX), X8
|
||||
VMOVAPD (26*8)(AX), X9
|
||||
VMOVAPD (28*8)(AX), X10
|
||||
VMOVAPD (30*8)(AX), X11
|
||||
|
||||
// mantissa mask
|
||||
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
||||
MOVQ $0x00ffffffffffffff, AX
|
||||
VMOVQ AX, X13
|
||||
VPBROADCASTQ X13, X13
|
||||
|
||||
// eMask
|
||||
VMOVDQU64 eMask+16(FP), X14
|
||||
|
||||
// scale mask
|
||||
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
|
||||
MOVQ $0x80F0000000000000, AX
|
||||
VMOVQ AX, X15
|
||||
VPBROADCASTQ X15, X15
|
||||
|
||||
// scratchpad pointer
|
||||
MOVQ pad+8(FP), SI
|
||||
|
||||
// JIT location
|
||||
MOVQ jmp+32(FP), AX
|
||||
|
||||
// jump to JIT code
|
||||
CALL AX
|
||||
|
||||
|
||||
// move register file back to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
// prefetchw BYTE PTR [rax]
|
||||
// PREFETCHW 0(AX)
|
||||
BYTE $0x0F
|
||||
BYTE $0x0D
|
||||
BYTE $0x08
|
||||
|
||||
// r0-r7
|
||||
MOVQ R8, (0*8)(AX)
|
||||
MOVQ R9, (1*8)(AX)
|
||||
MOVQ R10, (2*8)(AX)
|
||||
MOVQ R11, (3*8)(AX)
|
||||
MOVQ R12, (4*8)(AX)
|
||||
MOVQ R13, (5*8)(AX)
|
||||
MOVQ R14, (6*8)(AX)
|
||||
MOVQ R15, (7*8)(AX)
|
||||
|
||||
// f0-f3
|
||||
VMOVAPD X0, (8*8)(AX)
|
||||
VMOVAPD X1, (10*8)(AX)
|
||||
VMOVAPD X2, (12*8)(AX)
|
||||
VMOVAPD X3, (14*8)(AX)
|
||||
// e0-e3
|
||||
VMOVAPD X4, (16*8)(AX)
|
||||
VMOVAPD X5, (18*8)(AX)
|
||||
VMOVAPD X6, (20*8)(AX)
|
||||
VMOVAPD X7, (22*8)(AX)
|
||||
|
||||
// a0-a3 are constant, no need to move
|
||||
|
||||
RET
|
||||
|
||||
|
||||
#define RANDOMX_SCRATCHPAD_L3 2097152
|
||||
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
|
||||
|
||||
TEXT ·vm_run_full(SB),$32-64
|
||||
|
||||
// move register file to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHNTA 0(AX)
|
||||
// r0-r7
|
||||
MOVQ (0*8)(AX), R8
|
||||
MOVQ (1*8)(AX), R9
|
||||
MOVQ (2*8)(AX), R10
|
||||
MOVQ (3*8)(AX), R11
|
||||
MOVQ (4*8)(AX), R12
|
||||
MOVQ (5*8)(AX), R13
|
||||
MOVQ (6*8)(AX), R14
|
||||
MOVQ (7*8)(AX), R15
|
||||
|
||||
// f0-f3
|
||||
VMOVAPD (8*8)(AX), X0
|
||||
VMOVAPD (10*8)(AX), X1
|
||||
VMOVAPD (12*8)(AX), X2
|
||||
VMOVAPD (14*8)(AX), X3
|
||||
// e0-e3
|
||||
VMOVAPD (16*8)(AX), X4
|
||||
VMOVAPD (18*8)(AX), X5
|
||||
VMOVAPD (20*8)(AX), X6
|
||||
VMOVAPD (22*8)(AX), X7
|
||||
// load constants a0-a3
|
||||
VMOVAPD (24*8)(AX), X8
|
||||
VMOVAPD (26*8)(AX), X9
|
||||
VMOVAPD (28*8)(AX), X10
|
||||
VMOVAPD (30*8)(AX), X11
|
||||
|
||||
//TODO: rest of init
|
||||
|
||||
// mantissa mask
|
||||
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
||||
MOVQ $0x00ffffffffffffff, AX
|
||||
VMOVQ AX, X13
|
||||
VPBROADCASTQ X13, X13
|
||||
|
||||
// eMask
|
||||
VMOVDQU64 eMask+40(FP), X14
|
||||
|
||||
// scale mask
|
||||
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
|
||||
MOVQ $0x80F0000000000000, AX
|
||||
VMOVQ AX, X15
|
||||
VPBROADCASTQ X15, X15
|
||||
|
||||
// scratchpad pointer on rsi
|
||||
MOVQ pad+8(FP), SI
|
||||
// dataset pointer on rdi
|
||||
MOVQ dataset+16(FP), DI
|
||||
// iterations on rbx
|
||||
MOVQ iterations+24(FP), BX
|
||||
// ma and mx on rbp TODO: change this
|
||||
MOVQ memoryRegisters+32(FP), BP
|
||||
|
||||
// do ma/mx calcs
|
||||
MOVQ BP, AX
|
||||
RORQ $32, BP
|
||||
|
||||
//AX = spAddr0
|
||||
//DX = spAddr1
|
||||
|
||||
// JIT location
|
||||
MOVQ jmp+56(FP), CX
|
||||
// jump to JIT code
|
||||
// this handles readReg[0-3] and dataset reading, load, stores
|
||||
CALL CX
|
||||
|
||||
// move register file back to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
|
||||
// prefetchw BYTE PTR [rax]
|
||||
// PREFETCHW 0(AX)
|
||||
BYTE $0x0F
|
||||
BYTE $0x0D
|
||||
BYTE $0x08
|
||||
|
||||
// r0-r7
|
||||
MOVQ R8, (0*8)(AX)
|
||||
MOVQ R9, (1*8)(AX)
|
||||
MOVQ R10, (2*8)(AX)
|
||||
MOVQ R11, (3*8)(AX)
|
||||
MOVQ R12, (4*8)(AX)
|
||||
MOVQ R13, (5*8)(AX)
|
||||
MOVQ R14, (6*8)(AX)
|
||||
MOVQ R15, (7*8)(AX)
|
||||
|
||||
// f0-f3
|
||||
VMOVAPD X0, (8*8)(AX)
|
||||
VMOVAPD X1, (10*8)(AX)
|
||||
VMOVAPD X2, (12*8)(AX)
|
||||
VMOVAPD X3, (14*8)(AX)
|
||||
// e0-e3
|
||||
VMOVAPD X4, (16*8)(AX)
|
||||
VMOVAPD X5, (18*8)(AX)
|
||||
VMOVAPD X6, (20*8)(AX)
|
||||
VMOVAPD X7, (22*8)(AX)
|
||||
|
||||
// a0-a3 are constant, no need to move
|
||||
|
||||
RET
|
14
vm_bytecode_jit_generic.go
Normal file
14
vm_bytecode_jit_generic.go
Normal file
|
@ -0,0 +1,14 @@
|
|||
//go:build !unix || !amd64 || disable_jit || purego
|
||||
|
||||
package randomx
|
||||
|
||||
func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
|
||||
}
|
||||
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations uint64, ma, mx uint32, eMask [2]uint64) {
|
||||
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
//go:build (arm64 || amd64 || 386) && !purego
|
||||
//go:build (arm64 || arm.6 || arm.7 || amd64 || 386) && !purego
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
@ -13,7 +13,7 @@ import (
|
|||
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
for pc := 0; pc < len(c); pc++ {
|
||||
i := &c[pc]
|
||||
switch i.Opcode {
|
||||
case VM_NOP: // we do nothing
|
||||
|
@ -108,11 +108,11 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH])
|
||||
case VM_CFROUND:
|
||||
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
|
||||
c.SetRoundingMode(f, uint8(tmp))
|
||||
SetRoundingMode(f, uint8(tmp))
|
||||
|
||||
case VM_CBRANCH:
|
||||
f.R[i.Src] += i.Imm
|
||||
if (f.R[i.Src] & uint64(i.MemMask)) == 0 {
|
||||
f.R[i.Dst] += i.Imm
|
||||
if (f.R[i.Dst] & uint64(i.MemMask)) == 0 {
|
||||
pc = i.jumpTarget()
|
||||
}
|
||||
case VM_ISTORE:
|
||||
|
@ -121,10 +121,17 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
const lockThreadDueToRoundingMode = true
|
||||
|
||||
func SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
if f.FPRC == mode {
|
||||
return
|
||||
}
|
||||
f.FPRC = mode
|
||||
asm.SetRoundingMode(mode)
|
||||
}
|
||||
|
||||
func ResetRoundingMode(f *RegisterFile) {
|
||||
f.FPRC = 0
|
||||
asm.SetRoundingMode(uint8(0))
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//go:build (!arm64 && !amd64 && !386) || purego
|
||||
//go:build (!arm64 && !(arm.6 || arm.7) && !amd64 && !386) || purego
|
||||
|
||||
package randomx
|
||||
|
||||
|
@ -8,11 +8,11 @@ import (
|
|||
)
|
||||
|
||||
// Execute Runs a RandomX program with the given register file and scratchpad
|
||||
// Warning: This will call asm.SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
|
||||
// Warning: This will call float64 SetRoundingMode directly
|
||||
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
|
||||
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
|
||||
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
for pc := 0; pc < len(c); pc++ {
|
||||
i := &c[pc]
|
||||
switch i.Opcode {
|
||||
case VM_NOP: // we do nothing
|
||||
|
@ -107,11 +107,11 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
f.E[i.Dst][HIGH] = softfloat64.Sqrt(f.E[i.Dst][HIGH], softfloat64.RoundingMode(f.FPRC))
|
||||
case VM_CFROUND:
|
||||
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
|
||||
c.SetRoundingMode(f, uint8(tmp))
|
||||
SetRoundingMode(f, uint8(tmp))
|
||||
|
||||
case VM_CBRANCH:
|
||||
f.R[i.Src] += i.Imm
|
||||
if (f.R[i.Src] & uint64(i.MemMask)) == 0 {
|
||||
f.R[i.Dst] += i.Imm
|
||||
if (f.R[i.Dst] & uint64(i.MemMask)) == 0 {
|
||||
pc = i.jumpTarget()
|
||||
}
|
||||
case VM_ISTORE:
|
||||
|
@ -120,6 +120,12 @@ func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *ByteCode) SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
const lockThreadDueToRoundingMode = false
|
||||
|
||||
func SetRoundingMode(f *RegisterFile, mode uint8) {
|
||||
f.FPRC = mode
|
||||
}
|
||||
|
||||
func ResetRoundingMode(f *RegisterFile) {
|
||||
f.FPRC = 0
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
|
||||
"unsafe"
|
||||
)
|
||||
import "encoding/binary"
|
||||
|
@ -43,6 +42,11 @@ type VM_Instruction [8]byte // it is hardcode 8 bytes
|
|||
func (ins VM_Instruction) IMM() uint32 {
|
||||
return binary.LittleEndian.Uint32(ins[4:])
|
||||
}
|
||||
|
||||
func (ins VM_Instruction) IMM64() uint64 {
|
||||
return signExtend2sCompl(ins.IMM())
|
||||
}
|
||||
|
||||
func (ins VM_Instruction) Mod() byte {
|
||||
return ins[3]
|
||||
}
|
||||
|
@ -58,14 +62,14 @@ func (ins VM_Instruction) Opcode() byte {
|
|||
|
||||
// CompileProgramToByteCode this will interpret single vm instruction into executable opcodes
|
||||
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
|
||||
func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
||||
func CompileProgramToByteCode(prog []byte, bc *ByteCode) {
|
||||
|
||||
var registerUsage [RegistersCount]int
|
||||
for i := range registerUsage {
|
||||
registerUsage[i] = -1
|
||||
}
|
||||
|
||||
for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ {
|
||||
for i := 0; i < len(bc); i++ {
|
||||
instr := VM_Instruction(prog[i*8:])
|
||||
ibc := &bc[i]
|
||||
|
||||
|
@ -84,13 +88,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
//shift
|
||||
ibc.ImmB = (instr.Mod() >> 2) % 4
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
||||
case 16, 17, 18, 19, 20, 21, 22: // 7
|
||||
ibc.Opcode = VM_IADD_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -107,13 +111,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_ISUB_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_ISUB_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 39, 40, 41, 42, 43, 44, 45: // 7
|
||||
ibc.Opcode = VM_ISUB_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -130,13 +134,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_IMUL_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IMUL_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 62, 63, 64, 65: //4
|
||||
ibc.Opcode = VM_IMUL_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -154,7 +158,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
registerUsage[dst] = i
|
||||
case 70: //1
|
||||
ibc.Opcode = VM_IMULH_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -172,7 +176,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
registerUsage[dst] = i
|
||||
case 75: //1
|
||||
ibc.Opcode = VM_ISMULH_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -189,7 +193,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
divisor := instr.IMM()
|
||||
if !isZeroOrPowerOf2(divisor) {
|
||||
ibc.Opcode = VM_IMUL_I
|
||||
ibc.Imm = randomx_reciprocal(divisor)
|
||||
ibc.Imm = reciprocal(divisor)
|
||||
registerUsage[dst] = i
|
||||
} else {
|
||||
ibc.Opcode = VM_NOP
|
||||
|
@ -202,13 +206,13 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_IXOR_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IXOR_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 101, 102, 103, 104, 105: //5
|
||||
ibc.Opcode = VM_IXOR_M
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if src != dst {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -224,7 +228,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
case 106, 107, 108, 109, 110, 111, 112, 113: //8
|
||||
ibc.Opcode = VM_IROR_R
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IROR_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
@ -232,7 +236,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
ibc.Opcode = VM_IROL_R
|
||||
|
||||
if src == dst {
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
ibc.Opcode = VM_IROL_I
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
@ -269,7 +273,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
|
||||
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
|
@ -283,7 +287,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
|
||||
case 166, 167, 168, 169, 170, 171: //6
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
|
@ -300,22 +304,24 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
} else {
|
||||
ibc.MemMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
case 208, 209, 210, 211, 212, 213: //6
|
||||
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
|
||||
ibc.Opcode = VM_FSQRT_R
|
||||
|
||||
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
|
||||
ibc.Opcode = VM_CBRANCH
|
||||
ibc.Src = instr.Dst() % RegistersCount
|
||||
//TODO:??? it's +1 on other
|
||||
ibc.Dst = instr.Dst() % RegistersCount
|
||||
|
||||
target := uint16(int16(registerUsage[ibc.Src]))
|
||||
ibc.Dst = uint8(target)
|
||||
target := uint16(int16(registerUsage[ibc.Dst]))
|
||||
// set target!
|
||||
ibc.Src = uint8(target)
|
||||
ibc.ImmB = uint8(target >> 8)
|
||||
|
||||
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
|
||||
//conditionmask := CONDITIONMASK << shift
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
|
||||
ibc.Imm = instr.IMM64() | (uint64(1) << shift)
|
||||
if CONDITIONOFFSET > 0 || shift > 0 {
|
||||
ibc.Imm &= ^(uint64(1) << (shift - 1))
|
||||
}
|
||||
|
@ -331,7 +337,7 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
|
||||
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
|
||||
ibc.Opcode = VM_ISTORE
|
||||
ibc.Imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.Imm = instr.IMM64()
|
||||
if (instr.Mod() >> 4) < STOREL3CONDITION {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.MemMask = ScratchpadL1Mask
|
||||
|
@ -348,22 +354,15 @@ func CompileProgramToByteCode(prog []byte) (bc ByteCode) {
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
return bc
|
||||
|
||||
}
|
||||
|
||||
type ScratchPad [ScratchpadSize]byte
|
||||
|
||||
func (pad *ScratchPad) Init(seed *[64]byte) {
|
||||
// calculate and fill scratchpad
|
||||
clear(pad[:])
|
||||
aes.FillAes1Rx4(seed, pad[:])
|
||||
}
|
||||
func (pad *ScratchPad) Store64(addr uint32, val uint64) {
|
||||
*(*uint64)(unsafe.Pointer(&pad[addr])) = val
|
||||
//binary.LittleEndian.PutUint64(pad[addr:], val)
|
||||
}
|
||||
|
||||
func (pad *ScratchPad) Load64(addr uint32) uint64 {
|
||||
return *(*uint64)(unsafe.Pointer(&pad[addr]))
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue