Compare commits

...

30 commits

Author SHA1 Message Date
DataHoarder b0265950b6
Disable large page testing on 32-bit platforms
All checks were successful
continuous-integration/drone/push Build is passing
2024-05-02 16:28:38 +02:00
DataHoarder c41d6c8080
Support large pages, implement aligned / paged / large paged allocators
Some checks failed
continuous-integration/drone/push Build is failing
2024-05-02 16:18:50 +02:00
DataHoarder 9aa3631f37
Ensure 16-byte alignment of dataset/scratchpad/register file and use more performance fetch/write SIMD on amd64
All checks were successful
continuous-integration/drone/push Build is passing
2024-05-02 12:06:38 +02:00
DataHoarder 9826b7beb4
Added partial hash and fill AES for First/Next/Last hashing modes in VM 2024-05-02 11:42:23 +02:00
DataHoarder acfff4a4ad
Add hard float support for arm platform, add tests
All checks were successful
continuous-integration/drone/push Build is passing
2024-05-02 04:16:52 +02:00
DataHoarder a458a18f07
Added CalculateCommitment api for RandomX v2 hashes, added further testing
All checks were successful
continuous-integration/drone/push Build is passing
2024-05-02 03:46:03 +02:00
DataHoarder cceea5b0ba
Simplify amd64 / 386 rounding mode set 2024-05-02 03:00:26 +02:00
DataHoarder 8b063bde61
Match functionality / API with upstream randomx
All checks were successful
continuous-integration/drone/push Build is passing
2024-05-02 02:25:17 +02:00
DataHoarder c50cbc56b5
Reduce heap allocations under VM 2024-05-01 16:58:49 +02:00
DataHoarder 1d83de4880
Split hard/soft AES implementations 2024-05-01 16:25:35 +02:00
DataHoarder 25b7fc4cc0
Move internal packages to internal directory 2024-05-01 11:36:43 +02:00
DataHoarder 3f70ec75be
Remove unused functions on cpuid_amd64
All checks were successful
continuous-integration/drone/push Build is passing
2024-05-01 07:36:26 +02:00
DataHoarder 55d6161f6e
Version v3.1.0, implement generic NewDataset and GetFlags
Some checks are pending
continuous-integration/drone/tag Build is passing
continuous-integration/drone/push Build is running
2024-04-23 14:36:43 +02:00
DataHoarder 36f1a90a20
Version v3.0.0, support full datataset mode in 64-bit targets, modified api, optimized allocations, full VM run JIT on amd64, optimize AES asm
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2024-04-23 04:33:42 +02:00
DataHoarder 4903cd7407
Cleanup readme, superscalar 2024-04-20 20:22:05 +02:00
DataHoarder d20dd880ce
amd64: Implemented VM JIT
All checks were successful
continuous-integration/drone/push Build is passing
2024-04-20 19:53:47 +02:00
DataHoarder d72726b0fe
Added wasm testing to CI
All checks were successful
continuous-integration/drone/push Build is passing
2024-04-19 18:33:50 +02:00
DataHoarder 34cfab4176
redo JIT superscalar to include less custom assembly 2024-04-19 17:53:43 +02:00
DataHoarder a71d8f6a2e
allow lock-free vm execution in soft float mode 2024-04-18 12:08:49 +02:00
DataHoarder 14a10f544f
Support x86_64 aesenc/aesdec and roundtrip mode
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2024-04-18 11:38:55 +02:00
DataHoarder ef069318b9
fix purego bytecode / rounding mode calls
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2024-04-18 09:06:53 +02:00
DataHoarder 80f473de54
General cleanup of jit / VM / mmap usage
Some checks failed
continuous-integration/drone/push Build is failing
continuous-integration/drone/tag Build is failing
2024-04-18 07:57:15 +02:00
DataHoarder fe253fb825
cleanup vm_instruction IMM with sign extension 2024-04-18 07:11:51 +02:00
DataHoarder 699ce02f2d
hash register file memory at once instead on loop calls 2024-04-17 09:53:24 +02:00
DataHoarder b35751462b
hack: reserve stack on JIT call 2024-04-17 09:40:54 +02:00
DataHoarder 1ce9bff7d3
Initialize AES S-Box directly
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2024-04-17 06:45:08 +02:00
DataHoarder aab8f99dd4
Include softfloat64 and allow for purego implementation
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2024-04-17 06:04:29 +02:00
DataHoarder 432590f930
Move argon2 / float packages to their own folders, cleanup vm Run
All checks were successful
continuous-integration/drone/push Build is passing
2024-04-15 04:14:15 +02:00
DataHoarder 5b9b3c3565
Use direct register and scratchpad under bytecode execution 2024-04-15 02:22:04 +02:00
DataHoarder b72f79a653
Remove zero register from vm bytecode 2024-04-14 15:43:54 +02:00
79 changed files with 4413 additions and 1691 deletions

View file

@ -1,7 +1,7 @@
---
kind: pipeline
type: docker
name: from-source-amd64
name: go-amd64-asm-jit
platform:
os: linux
arch: amd64
@ -24,11 +24,65 @@ steps:
commands:
- apk update
- apk add --no-cache git
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: from-source-386
name: go-amd64-asm
platform:
os: linux
arch: amd64
environment:
GOPROXY: direct
GOARCH: amd64
GOAMD64: v3
GOOS: linux
GOTRACEBACK: 2
GOEXPERIMENT: "cgocheck2,newinliner"
CGO_ENABLED: "0"
workspace:
path: /drone/src
steps:
- name: test
image: golang:1.22-alpine3.19
commands:
- apk update
- apk add --no-cache git
- go test -tags disable_jit -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: go-amd64-purego
platform:
os: linux
arch: amd64
environment:
GOPROXY: direct
GOARCH: amd64
GOAMD64: v3
GOOS: linux
GOTRACEBACK: 2
GOEXPERIMENT: "cgocheck2,newinliner"
CGO_ENABLED: "0"
workspace:
path: /drone/src
steps:
- name: test
image: golang:1.22-alpine3.19
commands:
- apk update
- apk add --no-cache git
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: go-386-asm
platform:
os: linux
arch: amd64
@ -51,11 +105,38 @@ steps:
commands:
- apk update
- apk add --no-cache git
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: from-source-arm64
name: go-386-purego
platform:
os: linux
arch: amd64
environment:
GOPROXY: direct
GOARCH: 386
GO386: sse2
GOOS: linux
GOTRACEBACK: 2
GOEXPERIMENT: "cgocheck2,newinliner"
CGO_ENABLED: "0"
workspace:
path: /drone/src
steps:
- name: test
image: golang:1.22-alpine3.19
commands:
- apk update
- apk add --no-cache git
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: go-arm64-asm
platform:
os: linux
arch: arm64
@ -77,5 +158,113 @@ steps:
commands:
- apk update
- apk add --no-cache git
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -v .
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: go-arm64-purego
platform:
os: linux
arch: arm64
environment:
GOPROXY: direct
GOARCH: arm64
GOOS: linux
GOTRACEBACK: 2
GOEXPERIMENT: "cgocheck2,newinliner"
CGO_ENABLED: "0"
workspace:
path: /drone/src
steps:
- name: test
image: golang:1.22-alpine3.19
commands:
- apk update
- apk add --no-cache git
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
-
---
kind: pipeline
type: docker
name: go-arm-asm
platform:
os: linux
arch: arm64
environment:
GOPROXY: direct
GOARCH: arm
GOARM: 7
GOOS: linux
GOTRACEBACK: 2
GOEXPERIMENT: "cgocheck2,newinliner"
CGO_ENABLED: "0"
workspace:
path: /drone/src
steps:
- name: test
image: golang:1.22-alpine3.19
commands:
- apk update
- apk add --no-cache git
- go test -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: go-arm-purego
platform:
os: linux
arch: arm64
environment:
GOPROXY: direct
GOARCH: arm
GOARM: 7
GOOS: linux
GOTRACEBACK: 2
GOEXPERIMENT: "cgocheck2,newinliner"
CGO_ENABLED: "0"
workspace:
path: /drone/src
steps:
- name: test
image: golang:1.22-alpine3.19
commands:
- apk update
- apk add --no-cache git
- go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
---
kind: pipeline
type: docker
name: go-wasm-purego
platform:
os: linux
arch: arm64
environment:
GOPROXY: direct
GOARCH: wasm
GOOS: wasip1
GOTRACEBACK: 2
GOEXPERIMENT: "cgocheck2,newinliner"
CGO_ENABLED: "0"
workspace:
path: /drone/src
steps:
- name: test
image: golang:1.22-alpine3.19
commands:
- apk update
- apk add --no-cache git bash
- apk add --no-cache wasmtime --repository=https://dl-cdn.alpinelinux.org/alpine/edge/testing
- PATH=$PATH:$(go env GOROOT)/misc/wasm go test -tags purego -p 1 -failfast -timeout 20m -cover -gcflags=-d=checkptr -short -v .
...

View file

@ -1,24 +1,41 @@
# RandomX (Golang Implementation)
RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs.
RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware.
---
Fork from [git.dero.io/DERO_Foundation/RandomX](https://git.dero.io/DERO_Foundation/RandomX). Also related, their [Analysis of RandomX writeup](https://medium.com/deroproject/analysis-of-randomx-dde9dfe9bbc6).
Original code failed RandomX testcases and was implemented using big.Float.
This package implements RandomX without CGO, using only Golang code, pure float64 ops and two small assembly sections to implement CFROUND modes.
---
This package implements RandomX without CGO, using only Golang code, native float64 ops, some assembly, but with optional soft float _purego_ implementation.
All test cases pass properly.
Uses minimal Go assembly due to having to set rounding mode natively. Support can be added with supporting rounding mode under _asm_.
Supports Full mode and Light mode.
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime.
For the C++ implementation and design of RandomX, see [github.com/tevador/RandomX](https://github.com/tevador/RandomX)
| Platform | Supported | SuperScalar JIT | Notes |
|:-----------:|:---------:|:---------------:|:----------------:|
| **386** | ✅ | ❌ | |
| **amd64** | ✅ | ✅* | JIT only on Unix |
| **arm** | ❌ | - | |
| **arm64** | ✅ | ❌ | |
| **mips** | ❌ | - | |
| **mips64** | ❌ | - | |
| **riscv64** | ❌ | - | |
| **wasm** | ❌ | - | |
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|:---------------------:|:-----------:|:------------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|:-----------:|
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Full Mode | ❌ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ |
| Float Operations | **hw** | **hw** | **hw** | **hw** | soft | soft | soft | soft |
| AES Operations | soft | **hw** | soft | soft | soft | soft | soft | soft |
| Superscalar Execution | interpreter | **compiler** | interpreter | interpreter | interpreter | interpreter | interpreter | interpreter |
| VM Execution | interpreter | **compiler** | interpreter | interpreter | soft | soft | soft | soft |
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build tag manually.
[TinyGo](https://github.com/tinygo-org/tinygo) is supported under the `purego` build tag.
Any platform with no hard float support or when enabled manually will use soft float, using [softfloat64](https://git.gammaspectra.live/P2Pool/softfloat64). This will be very slow.
Full mode is NOT recommended in 32-bit systems and is unsupported, although depending on system it might be able to run. You might want to manually run `runtime.GC()` if cleaning up dataset to free memory.
Native hard float can be added with supporting rounding mode under _asm_.
JIT only supported under Unix systems (Linux, *BSD, macOS), and can be hard-disabled via the `disable_jit` build flag, or at runtime.

View file

@ -1,145 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package aes implements AES encryption (formerly Rijndael), as defined in
// U.S. Federal Information Processing Standards Publication 197.
//
// The AES operations in this package are not implemented using constant-time algorithms.
// An exception is when running on systems with enabled hardware support for AES
// that makes these operations constant-time. Examples include amd64 systems using AES-NI
// extensions and s390x systems using Message-Security-Assist extensions.
// On such systems, when the result of NewCipher is passed to cipher.NewGCM,
// the GHASH operation used by GCM is also constant-time.
package aes
import (
"math/bits"
)
// Multiply b and c as GF(2) polynomials modulo poly
func mul(b, c uint32) uint32 {
i := b
j := c
s := uint32(0)
for k := uint32(1); k < 0x100 && j != 0; k <<= 1 {
// Invariant: k == 1<<n, i == b * xⁿ
if j&k != 0 {
// s += i in GF(2); xor in binary
s ^= i
j ^= k // turn off bit to end loop early
}
// i *= x in GF(2) modulo the polynomial
i <<= 1
if i&0x100 != 0 {
i ^= poly
}
}
return s
}
// This file contains AES constants - 8720 bytes of initialized data.
// https://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
// AES is based on the mathematical behavior of binary polynomials
// (polynomials over GF(2)) modulo the irreducible polynomial x⁸ + x⁴ + x³ + x + 1.
// Addition of these binary polynomials corresponds to binary xor.
// Reducing mod poly corresponds to binary xor with poly every
// time a 0x100 bit appears.
const poly = 1<<8 | 1<<4 | 1<<3 | 1<<1 | 1<<0 // x⁸ + x⁴ + x³ + x + 1
// Powers of x mod poly in GF(2).
var powx = [16]byte{
0x01,
0x02,
0x04,
0x08,
0x10,
0x20,
0x40,
0x80,
0x1b,
0x36,
0x6c,
0xd8,
0xab,
0x4d,
0x9a,
0x2f,
}
// FIPS-197 Figure 7. S-box substitution values in hexadecimal format.
var sbox0 = [256]byte{
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
}
// FIPS-197 Figure 14. Inverse S-box substitution values in hexadecimal format.
var sbox1 = [256]byte{
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
}
// Lookup tables for encryption.
var encLut = func() (te [4][256]uint32) {
for i := 0; i < 256; i++ {
s := uint32(sbox0[i])
s2 := mul(s, 2)
s3 := mul(s, 3)
w := s2<<24 | s<<16 | s<<8 | s3
for j := 0; j < 4; j++ {
te[j][i] = bits.ReverseBytes32(w)
w = w<<24 | w>>8
}
}
return te
}()
// Lookup tables for decryption.
var decLut = func() (td [4][256]uint32) {
for i := 0; i < 256; i++ {
s := uint32(sbox1[i])
s9 := mul(s, 0x9)
sb := mul(s, 0xb)
sd := mul(s, 0xd)
se := mul(s, 0xe)
w := se<<24 | s9<<16 | sd<<8 | sb
for j := 0; j < 4; j++ {
td[j][i] = bits.ReverseBytes32(w)
w = w<<24 | w>>8
}
}
return td
}()

View file

@ -1,142 +0,0 @@
/*
Copyright (c) 2019 DERO Foundation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
"unsafe"
)
// HashAes1Rx4
//
// Calculate a 512-bit hash of 'input' using 4 lanes of AES.
// The input is treated as a set of round keys for the encryption
// of the initial state.
//
// 'inputSize' must be a multiple of 64.
//
// For a 2 MiB input, this has the same security as 32768-round
// AES encryption.
//
// Hashing throughput: >20 GiB/s per CPU core with hardware AES
func HashAes1Rx4(input []byte, output *[64]byte) {
if len(input)%64 != 0 {
panic("unsupported")
}
// states are copied
states := keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
soft_aesenc(&states[0], &in[0])
soft_aesdec(&states[1], &in[1])
soft_aesenc(&states[2], &in[2])
soft_aesdec(&states[3], &in[3])
}
soft_aesenc(&states[0], &keys.AesHash1R_XKeys[0])
soft_aesdec(&states[1], &keys.AesHash1R_XKeys[0])
soft_aesenc(&states[2], &keys.AesHash1R_XKeys[0])
soft_aesdec(&states[3], &keys.AesHash1R_XKeys[0])
soft_aesenc(&states[0], &keys.AesHash1R_XKeys[1])
soft_aesdec(&states[1], &keys.AesHash1R_XKeys[1])
soft_aesenc(&states[2], &keys.AesHash1R_XKeys[1])
soft_aesdec(&states[3], &keys.AesHash1R_XKeys[1])
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
}
// FillAes1Rx4
//
// Fill 'output' with pseudorandom data based on 512-bit 'state'.
// The state is encrypted using a single AES round per 16 bytes of output
// in 4 lanes.
//
// 'output' size must be a multiple of 64.
//
// The modified state is written back to 'state' to allow multiple
// calls to this function.
func FillAes1Rx4(state *[64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
for outptr := 0; outptr < len(output); outptr += len(state) {
soft_aesdec(&states[0], &keys.AesGenerator1R_Keys[0])
soft_aesenc(&states[1], &keys.AesGenerator1R_Keys[1])
soft_aesdec(&states[2], &keys.AesGenerator1R_Keys[2])
soft_aesenc(&states[3], &keys.AesGenerator1R_Keys[3])
copy(output[outptr:], state[:])
}
}
// FillAes4Rx4 used to generate final program
func FillAes4Rx4(state [64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// state is copied on caller
// Copy state
states := (*[4][4]uint32)(unsafe.Pointer(&state))
for outptr := 0; outptr < len(output); outptr += len(state) {
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[0])
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[0])
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[4])
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[4])
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[1])
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[1])
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[5])
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[5])
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[2])
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[2])
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[6])
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[6])
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[3])
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[3])
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[7])
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[7])
copy(output[outptr:], state[:])
}
}

View file

@ -1,58 +0,0 @@
package randomx
import "golang.org/x/crypto/blake2b"
import (
_ "golang.org/x/crypto/argon2"
_ "unsafe"
)
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
const RANDOMX_ARGON_MEMORY = 262144
// Number of Argon2d iterations for Cache initialization.
const RANDOMX_ARGON_ITERATIONS = 3
// Number of parallel lanes for Cache initialization.
const RANDOMX_ARGON_LANES = 1
// Argon2d salt
const RANDOMX_ARGON_SALT = "RandomX\x03"
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1
const ArgonBlockSize uint32 = 1024
type argonBlock [128]uint64
const syncPoints = 4
//go:linkname argon2_initHash golang.org/x/crypto/argon2.initHash
func argon2_initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
//go:linkname argon2_initBlocks golang.org/x/crypto/argon2.initBlocks
func argon2_initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []argonBlock
//go:linkname argon2_processBlocks golang.org/x/crypto/argon2.processBlocks
func argon2_processBlocks(B []argonBlock, time, memory, threads uint32, mode int)
// argon2_buildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call
func argon2_buildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []argonBlock {
if time < 1 {
panic("argon2: number of rounds too small")
}
if threads < 1 {
panic("argon2: parallelism degree too low")
}
const mode = 0 /* argon2d */
h0 := argon2_initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {
memory = 2 * syncPoints * uint32(threads)
}
B := argon2_initBlocks(&h0, memory, uint32(threads))
argon2_processBlocks(B, time, memory, uint32(threads), mode)
return B
}

View file

@ -1,14 +0,0 @@
package asm
type RoundingMode uint8
const (
RoundingModeToNearest = RoundingMode(iota)
RoundingModeToNegative
RoundingModeToPositive
RoundingModeToZero
)
func SetRoundingMode(mode RoundingMode) {
setRoundingMode(uint8(mode))
}

View file

@ -1,20 +0,0 @@
//go:build 386
package asm
// stmxcsr reads the MXCSR control and status register.
//
//go:noescape
func stmxcsr(addr *uint32)
// ldmxcsr writes to the MXCSR control and status register.
//
//go:noescape
func ldmxcsr(addr *uint32)
func setRoundingMode(mode uint8) {
var csr uint32
stmxcsr(&csr)
csr = (csr & (^uint32(0x6000))) | ((uint32(mode) & 3) << 13)
ldmxcsr(&csr)
}

View file

@ -1,13 +0,0 @@
#include "textflag.h"
// stmxcsr reads the MXCSR control and status register.
TEXT ·stmxcsr(SB),NOSPLIT|NOFRAME,$0-4
MOVL addr+0(FP), SI
STMXCSR (SI)
RET
// ldmxcsr writes to the MXCSR control and status register.
TEXT ·ldmxcsr(SB),NOSPLIT|NOFRAME,$0-4
MOVL addr+0(FP), SI
LDMXCSR (SI)
RET

View file

@ -1,20 +0,0 @@
//go:build amd64
package asm
// stmxcsr reads the MXCSR control and status register.
//
//go:noescape
func stmxcsr(addr *uint32)
// ldmxcsr writes to the MXCSR control and status register.
//
//go:noescape
func ldmxcsr(addr *uint32)
func setRoundingMode(mode uint8) {
var csr uint32
stmxcsr(&csr)
csr = (csr & (^uint32(0x6000))) | ((uint32(mode) & 3) << 13)
ldmxcsr(&csr)
}

View file

@ -1,13 +0,0 @@
#include "textflag.h"
// stmxcsr reads the MXCSR control and status register.
TEXT ·stmxcsr(SB),NOSPLIT|NOFRAME,$0-8
MOVQ addr+0(FP), SI
STMXCSR (SI)
RET
// ldmxcsr writes to the MXCSR control and status register.
TEXT ·ldmxcsr(SB),NOSPLIT|NOFRAME,$0-8
MOVQ addr+0(FP), SI
LDMXCSR (SI)
RET

View file

@ -1,50 +0,0 @@
package randomx
import (
"encoding/binary"
"golang.org/x/crypto/blake2b"
)
type Blake2Generator struct {
data [64]byte
dataindex int
allocRegIndex [8]int
allocRegisters [8]Register
}
func Init_Blake2Generator(key []byte, nonce uint32) *Blake2Generator {
var b Blake2Generator
b.dataindex = len(b.data)
if len(key) > 60 {
copy(b.data[:], key[0:60])
} else {
copy(b.data[:], key)
}
binary.LittleEndian.PutUint32(b.data[60:], nonce)
return &b
}
func (b *Blake2Generator) checkdata(bytesNeeded int) {
if b.dataindex+bytesNeeded > cap(b.data) {
//blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
h := blake2b.Sum512(b.data[:])
copy(b.data[:], h[:])
b.dataindex = 0
}
}
func (b *Blake2Generator) GetByte() byte {
b.checkdata(1)
ret := b.data[b.dataindex]
b.dataindex++
return ret
}
func (b *Blake2Generator) GetUint32() uint32 {
b.checkdata(4)
ret := binary.LittleEndian.Uint32(b.data[b.dataindex:])
b.dataindex += 4
return ret
}

219
cache.go
View file

@ -1,54 +1,83 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
"errors"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
"runtime"
"slices"
"unsafe"
)
type MemoryBlock [128]uint64
type MemoryBlock [argon2.BlockSize / 8]uint64
func (m *MemoryBlock) GetLine(addr uint64) *RegisterLine {
addr >>= 3
//[addr : addr+8 : addr+8]
return (*RegisterLine)(unsafe.Add(unsafe.Pointer(m), addr*8))
return (*RegisterLine)(unsafe.Pointer(unsafe.SliceData(m[addr : addr+8 : addr+8])))
}
type Randomx_Cache struct {
Blocks []MemoryBlock
type Cache struct {
blocks *[RANDOMX_ARGON_MEMORY]MemoryBlock
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
JitPrograms [RANDOMX_PROGRAM_COUNT]ProgramFunc
jitPrograms [RANDOMX_PROGRAM_COUNT]SuperScalarProgramFunc
Flags uint64
flags Flags
}
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
if flags == RANDOMX_FLAG_DEFAULT {
flags = RANDOMX_FLAG_JIT
}
return &Randomx_Cache{
Flags: flags,
// NewCache Creates a randomx_cache structure and allocates memory for RandomX Cache.
// *
// * @param flags is any combination of these 2 flags (each flag can be set or not set):
// * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
// * RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes
// * subsequent Dataset initialization faster
// * Optionally, one of these two flags may be selected:
// * RANDOMX_FLAG_ARGON2_SSSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set
// * makes subsequent cache initialization faster
// * RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set
// * makes subsequent cache initialization faster
// *
// * @return Pointer to an allocated randomx_cache structure.
// * Returns NULL if:
// * (1) memory allocation fails
// * (2) the RANDOMX_FLAG_JIT is set and JIT compilation is not supported on the current platform
// * (3) an invalid or unsupported RANDOMX_FLAG_ARGON2 value is set
// */
func NewCache(flags Flags) (c *Cache, err error) {
var blocks *[RANDOMX_ARGON_MEMORY]MemoryBlock
if flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
if largePageAllocator == nil {
return nil, errors.New("huge pages not supported")
}
blocks, err = memory.Allocate[[RANDOMX_ARGON_MEMORY]MemoryBlock](largePageAllocator)
if err != nil {
return nil, err
}
} else {
blocks, err = memory.Allocate[[RANDOMX_ARGON_MEMORY]MemoryBlock](cacheLineAlignedAllocator)
if err != nil {
return nil, err
}
}
return &Cache{
flags: flags,
blocks: blocks,
}, nil
}
func (cache *Randomx_Cache) HasJIT() bool {
return cache.Flags&RANDOMX_FLAG_JIT > 0 && cache.JitPrograms[0] != nil
func (c *Cache) hasInitializedJIT() bool {
return c.flags.HasJIT() && c.jitPrograms[0] != nil
}
func (cache *Randomx_Cache) VM_Initialize() *VM {
return &VM{
Dataset: &Randomx_DatasetLight{
Cache: cache,
},
}
}
func (cache *Randomx_Cache) Close() error {
for _, p := range cache.JitPrograms {
// Close Releases all memory occupied by the Cache structure.
func (c *Cache) Close() error {
for _, p := range c.jitPrograms {
if p != nil {
err := p.Close()
if err != nil {
@ -56,45 +85,63 @@ func (cache *Randomx_Cache) Close() error {
}
}
}
return nil
if c.flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
return memory.Free(largePageAllocator, c.blocks)
} else {
return memory.Free(cacheLineAlignedAllocator, c.blocks)
}
}
func (cache *Randomx_Cache) Init(key []byte) {
// Lock due to external JIT madness
runtime.LockOSThread()
defer runtime.UnlockOSThread()
// Init Initializes the cache memory and SuperscalarHash using the provided key value.
// Does nothing if called again with the same key value.
func (c *Cache) Init(key []byte) {
//TODO: cache key and do not regenerate
kkey := slices.Clone(key)
argonBlocks := unsafe.Slice((*argon2.Block)(unsafe.Pointer(c.blocks)), len(c.blocks))
argonBlocks := argon2_buildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
argon2.BuildBlocks(argonBlocks, key, []byte(RANDOMX_ARGON_SALT), RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES)
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
const nonce uint32 = 0
cache.Blocks = memoryBlocks
gen := blake2.New(key, nonce)
for i := range c.programs {
// build a superscalar program
prog := BuildSuperScalarProgram(gen)
nonce := uint32(0) //uint32(len(key))
gen := Init_Blake2Generator(key, nonce)
for i := 0; i < 8; i++ {
cache.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
cache.JitPrograms[i] = generateSuperscalarCode(cache.Programs[i])
if c.flags.HasJIT() {
c.jitPrograms[i] = generateSuperscalarCode(prog)
// fallback if can't compile program
if c.jitPrograms[i] == nil {
c.programs[i] = prog
} else if err := memory.PageReadExecute(c.jitPrograms[i]); err != nil {
c.programs[i] = prog
} else {
c.programs[i] = SuperScalarProgram{prog[0]}
}
} else {
c.programs[i] = prog
}
}
}
// GetMixBlock fetch a 64 byte block in uint64 form
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
const Mask = CacheSize/CacheLineSize - 1
mask := CacheSize/CacheLineSize - 1
// getMixBlock fetch a 64 byte block in uint64 form
func (c *Cache) getMixBlock(addr uint64) *RegisterLine {
addr = (addr & mask) * CacheLineSize
addr = (addr & Mask) * CacheLineSize
block := addr / 1024
return cache.Blocks[block].GetLine(addr % 1024)
return c.blocks[block].GetLine(addr % 1024)
}
func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
func (c *Cache) GetMemory() *[RANDOMX_ARGON_MEMORY]MemoryBlock {
return c.blocks
}
func (c *Cache) initDataset(rl *RegisterLine, itemNumber uint64) {
registerValue := itemNumber
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
@ -106,51 +153,45 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(registerValue)
program := cache.Programs[i]
executeSuperscalar(program.Program(), rl)
for q := range rl {
rl[q] ^= mix[q]
if c.hasInitializedJIT() {
if c.flags.HasJIT() {
// Lock due to external JIT madness
runtime.LockOSThread()
defer runtime.UnlockOSThread()
}
registerValue = rl[program.AddressRegister()]
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := c.getMixBlock(registerValue)
c.jitPrograms[i].Execute(uintptr(unsafe.Pointer(rl)))
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[c.programs[i].AddressRegister()]
}
} else {
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := c.getMixBlock(registerValue)
program := c.programs[i]
executeSuperscalar(program.Program(), rl)
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[program.AddressRegister()]
}
}
}
func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
registerValue := itemNumber
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
rl[1] = rl[0] ^ keys.SuperScalar_Constants[1]
rl[2] = rl[0] ^ keys.SuperScalar_Constants[2]
rl[3] = rl[0] ^ keys.SuperScalar_Constants[3]
rl[4] = rl[0] ^ keys.SuperScalar_Constants[4]
rl[5] = rl[0] ^ keys.SuperScalar_Constants[5]
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(registerValue)
cache.JitPrograms[i].Execute(rl)
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[cache.Programs[i].AddressRegister()]
}
}
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
panic("todo")
func (c *Cache) datasetInit(dataset []RegisterLine, startItem, endItem uint64) {
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
cache.InitDatasetItem(&dataset[0], itemNumber)
c.initDataset(&dataset[0], itemNumber)
}
}

101
cache_test.go Normal file
View file

@ -0,0 +1,101 @@
package randomx
import "testing"
func Test_Cache_Init(t *testing.T) {
t.Parallel()
cache, err := NewCache(GetFlags())
if err != nil {
t.Fatal(err)
}
defer cache.Close()
cache.Init(Tests[1].key)
memory := cache.GetMemory()
var tests = []struct {
index int
value uint64
}{
{0, 0x191e0e1d23c02186},
{1568413, 0xf1b62fe6210bf8b1},
{33554431, 0x1f47f056d05cd99b},
}
for i, tt := range tests {
if memory[tt.index/128][tt.index%128] != tt.value {
t.Errorf("i=%d, index=%d", i, tt.index)
t.Errorf("expected=%016x, actual=%016x", tt.value, memory[tt.index/128][tt.index%128])
}
}
}
func Test_Cache_InitDataset(t *testing.T) {
t.Parallel()
var tests = []struct {
index int
value uint64
}{
{0, 0x680588a85ae222db},
{10000000, 0x7943a1f6186ffb72},
{20000000, 0x9035244d718095e1},
{30000000, 0x145a5091f7853099},
}
t.Run("interpreter", func(t *testing.T) {
t.Parallel()
flags := GetFlags()
flags &^= RANDOMX_FLAG_JIT
cache, err := NewCache(flags)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
cache.Init(Tests[1].key)
var datasetItem RegisterLine
for i, tt := range tests {
cache.initDataset(&datasetItem, uint64(tt.index))
if datasetItem[0] != tt.value {
t.Errorf("i=%d, index=%d", i, tt.index)
t.Errorf("expected=%016x, actual=%016x", tt.value, datasetItem[0])
}
}
})
t.Run("compiler", func(t *testing.T) {
t.Parallel()
flags := GetFlags()
flags |= RANDOMX_FLAG_JIT
if !flags.HasJIT() {
t.Skip("not supported on this platform")
}
cache, err := NewCache(flags)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
cache.Init(Tests[1].key)
if !cache.hasInitializedJIT() {
t.Skip("not supported on this platform")
}
var datasetItem RegisterLine
for i, tt := range tests {
cache.initDataset(&datasetItem, uint64(tt.index))
if datasetItem[0] != tt.value {
t.Errorf("i=%d, index=%d", i, tt.index)
t.Errorf("expected=%016x, actual=%016x", tt.value, datasetItem[0])
}
}
})
}

15
commitment.go Normal file
View file

@ -0,0 +1,15 @@
package randomx
import "golang.org/x/crypto/blake2b"
// CalculateCommitment Calculate a RandomX commitment from a RandomX hash and its input.
func CalculateCommitment(input []byte, hashIn, hashOut *[RANDOMX_HASH_SIZE]byte) {
hasher, err := blake2b.New(RANDOMX_HASH_SIZE, nil)
if err != nil {
panic(err)
}
hasher.Write(input)
hasher.Write(hashIn[:])
hasher.Sum(hashOut[:0])
}

41
commitment_test.go Normal file
View file

@ -0,0 +1,41 @@
package randomx
import (
"encoding/hex"
"testing"
)
func Test_CalculateCommitment(t *testing.T) {
t.Parallel()
cache, err := NewCache(GetFlags())
if err != nil {
t.Fatal(err)
}
defer cache.Close()
test := Tests[1]
cache.Init(test.key)
vm, err := NewVM(GetFlags(), cache, nil)
if err != nil {
t.Fatal(err)
}
defer vm.Close()
var outputHash [RANDOMX_HASH_SIZE]byte
vm.CalculateHash(test.input, &outputHash)
CalculateCommitment(test.input, &outputHash, &outputHash)
outputHex := hex.EncodeToString(outputHash[:])
expected := "d53ccf348b75291b7be76f0a7ac8208bbced734b912f6fca60539ab6f86be919"
if expected != outputHex {
t.Errorf("key=%v, input=%v", test.key, test.input)
t.Errorf("expected=%s, actual=%s", expected, outputHex)
t.FailNow()
}
}

View file

@ -29,6 +29,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
)
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
const RANDOMX_ARGON_MEMORY = 262144
// Number of Argon2d iterations for Cache initialization.
const RANDOMX_ARGON_ITERATIONS = 3
// Number of parallel lanes for Cache initialization.
const RANDOMX_ARGON_LANES = 1
// Argon2d salt
const RANDOMX_ARGON_SALT = "RandomX\x03"
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1
// Number of random Cache accesses per Dataset item. Minimum is 2.
const RANDOMX_CACHE_ACCESSES = 8
@ -65,7 +84,9 @@ const RANDOMX_JUMP_BITS = 8
// Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
const RANDOMX_JUMP_OFFSET = 8
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
const RANDOMX_HASH_SIZE = 32
const DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
const SuperscalarMaxSize = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64
@ -74,7 +95,7 @@ const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3
const CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & (^(CacheLineSize - 1))
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(ArgonBlockSize)
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(argon2.BlockSize)
const ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / 8
const ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / 8
@ -87,25 +108,13 @@ const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
const CONDITIONMASK = (1 << RANDOMX_JUMP_BITS) - 1
const STOREL3CONDITION = 14
const REGISTERSCOUNT = 8
const REGISTERCOUNTFLT = 4
const mantissaSize = 52
const exponentSize = 11
const mantissaMask = (uint64(1) << mantissaSize) - 1
const exponentMask = (uint64(1) << exponentSize) - 1
const exponentBias = 1023
const dynamicExponentBits = 4
const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1
const RANDOMX_FLAG_DEFAULT = uint64(0)
const RANDOMX_FLAG_JIT = uint64(1 << iota)
func isZeroOrPowerOf2(x uint32) bool {
return (x & (x - 1)) == 0
}
var largePageAllocator = memory.NewLargePageAllocator()
var pageAllocator = memory.NewPageAllocator()
var cacheLineAlignedAllocator = memory.NewAlignedAllocator(CacheLineSize)

View file

@ -1,7 +1,111 @@
package randomx
type Randomx_Dataset interface {
InitDataset(startItem, endItem uint64)
ReadDataset(address uint64, r, cache *RegisterLine)
PrefetchDataset(address uint64)
import (
"errors"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
"sync"
)
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
const DatasetItemCount = DatasetSize / CacheLineSize
type Dataset struct {
memory []RegisterLine
flags Flags
}
// NewDataset Creates a randomx_dataset structure and allocates memory for RandomX Dataset.
// Only one flag is supported (can be set or not set): RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
// Returns nil if allocation fails
func NewDataset(flags Flags) (result *Dataset, err error) {
defer func() {
//catch too large memory allocation or unable to allocate, for example on 32-bit targets or out of memory
if r := recover(); r != nil {
result = nil
if e, ok := r.(error); ok && e != nil {
err = e
} else {
err = errors.New("out of memory")
}
}
}()
var alignedMemory []RegisterLine
if flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
if largePageAllocator == nil {
return nil, errors.New("huge pages not supported")
}
alignedMemory, err = memory.AllocateSlice[RegisterLine](largePageAllocator, DatasetItemCount)
if err != nil {
return nil, err
}
} else {
alignedMemory, err = memory.AllocateSlice[RegisterLine](cacheLineAlignedAllocator, DatasetItemCount)
if err != nil {
return nil, err
}
}
return &Dataset{
memory: alignedMemory,
flags: flags,
}, nil
}
func (d *Dataset) prefetchDataset(address uint64) {
}
func (d *Dataset) readDataset(address uint64, r *RegisterLine) {
cache := &d.memory[address/CacheLineSize]
for i := range r {
r[i] ^= cache[i]
}
}
// Memory Returns a pointer to the internal memory buffer of the dataset structure.
// The size of the internal memory buffer is DatasetItemCount * RANDOMX_DATASET_ITEM_SIZE.
func (d *Dataset) Memory() []RegisterLine {
return d.memory
}
func (d *Dataset) InitDataset(cache *Cache, startItem, itemCount uint64) {
if startItem >= DatasetItemCount || itemCount > DatasetItemCount {
panic("out of range")
}
if startItem+itemCount > DatasetItemCount {
panic("out of range")
}
cache.datasetInit(d.memory[startItem:startItem+itemCount], startItem, startItem+itemCount)
}
func (d *Dataset) Close() error {
if d.flags.Has(RANDOMX_FLAG_LARGE_PAGES) {
return memory.FreeSlice(largePageAllocator, d.memory)
} else {
return memory.FreeSlice(cacheLineAlignedAllocator, d.memory)
}
}
func (d *Dataset) InitDatasetParallel(cache *Cache, n int) {
n = max(1, n)
var wg sync.WaitGroup
for i := uint64(1); i < uint64(n); i++ {
a := (DatasetItemCount * i) / uint64(n)
b := (DatasetItemCount * (i + 1)) / uint64(n)
wg.Add(1)
go func(a, b uint64) {
defer wg.Done()
d.InitDataset(cache, a, b-a)
}(a, b)
}
d.InitDataset(cache, 0, DatasetItemCount/uint64(n))
wg.Wait()
}

View file

@ -1,26 +0,0 @@
package randomx
type Randomx_DatasetLight struct {
Cache *Randomx_Cache
Memory []uint64
}
func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
}
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLine) {
if d.Cache.HasJIT() {
d.Cache.InitDatasetItemJIT(cache, address/CacheLineSize)
} else {
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
}
for i := range r {
r[i] ^= cache[i]
}
}
func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) {
//d.Cache.initDataset(d.Cache.Programs)
}

14
exec.go
View file

@ -1,3 +1,15 @@
package randomx
type ProgramFunc []byte
import "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
type SuperScalarProgramFunc []byte
type VMProgramFunc []byte
func (f SuperScalarProgramFunc) Close() error {
return memory.FreeSlice(pageAllocator, f)
}
func (f VMProgramFunc) Close() error {
return memory.FreeSlice(pageAllocator, f)
}

View file

@ -1,11 +0,0 @@
//go:build !unix || disable_jit
package randomx
func (f ProgramFunc) Execute(rl *RegisterLine) {
}
func (f ProgramFunc) Close() error {
}

View file

@ -1,50 +0,0 @@
//go:build unix && !disable_jit
package randomx
import (
"golang.org/x/sys/unix"
"unsafe"
)
func (f ProgramFunc) Execute(rl *RegisterLine) {
if f == nil {
panic("program is nil")
}
memoryPtr := &f
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
fun(rl)
}
func (f ProgramFunc) Close() error {
return unix.Munmap(f)
}
func mapProgram(program []byte) ProgramFunc {
// Write only
execFunc, err := unix.Mmap(-1, 0, len(program), unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
if err != nil {
panic(err)
}
// Introduce machine code into the memory region
copy(execFunc, program)
// uphold W^X
// Read and Exec only
err = unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_EXEC)
if err != nil {
defer func() {
// unmap if we err
err := unix.Munmap(execFunc)
if err != nil {
panic(err)
}
}()
panic(err)
}
return execFunc
}

68
flags.go Normal file
View file

@ -0,0 +1,68 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
"golang.org/x/sys/cpu"
"runtime"
)
type Flags uint64
func (f Flags) Has(flags Flags) bool {
return f&flags == flags
}
func (f Flags) HasJIT() bool {
return f.Has(RANDOMX_FLAG_JIT) && supportsJIT
}
const RANDOMX_FLAG_DEFAULT Flags = 0
const (
// RANDOMX_FLAG_LARGE_PAGES Select large page allocation for dataset
RANDOMX_FLAG_LARGE_PAGES = Flags(1 << iota)
// RANDOMX_FLAG_HARD_AES Selects between hardware or software AES
RANDOMX_FLAG_HARD_AES
// RANDOMX_FLAG_FULL_MEM Selects between full or light mode dataset
RANDOMX_FLAG_FULL_MEM
// RANDOMX_FLAG_JIT Enables JIT features
RANDOMX_FLAG_JIT
// RANDOMX_FLAG_SECURE Enables W^X for JIT code
RANDOMX_FLAG_SECURE
RANDOMX_FLAG_ARGON2_SSSE3
RANDOMX_FLAG_ARGON2_AVX2
RANDOMX_FLAG_ARGON2 = RANDOMX_FLAG_ARGON2_AVX2 | RANDOMX_FLAG_ARGON2_SSSE3
)
// GetFlags The recommended flags to be used on the current machine.
// Does not include:
// * RANDOMX_FLAG_LARGE_PAGES
// * RANDOMX_FLAG_FULL_MEM
// * RANDOMX_FLAG_SECURE
// These flags must be added manually if desired.
//
// On OpenBSD RANDOMX_FLAG_SECURE is enabled by default in JIT mode as W^X is enforced by the OS.
func GetFlags() (flags Flags) {
flags = RANDOMX_FLAG_DEFAULT
if runtime.GOARCH == "amd64" {
flags |= RANDOMX_FLAG_JIT
if aes.HasHardAESImplementation && cpu.X86.HasAES {
flags |= RANDOMX_FLAG_HARD_AES
}
if cpu.X86.HasSSSE3 {
flags |= RANDOMX_FLAG_ARGON2_SSSE3
}
if cpu.X86.HasAVX2 {
flags |= RANDOMX_FLAG_ARGON2_AVX2
}
}
if runtime.GOOS == "openbsd" || runtime.GOOS == "netbsd" || ((runtime.GOOS == "darwin" || runtime.GOOS == "ios") && runtime.GOARCH == "arm64") {
flags |= RANDOMX_FLAG_SECURE
}
return flags
}

4
go.mod
View file

@ -1,7 +1,9 @@
module git.gammaspectra.live/P2Pool/go-randomx/v2
module git.gammaspectra.live/P2Pool/go-randomx/v3
go 1.21
require golang.org/x/crypto v0.22.0
require golang.org/x/sys v0.19.0
require git.gammaspectra.live/P2Pool/softfloat64 v1.0.0

2
go.sum
View file

@ -1,3 +1,5 @@
git.gammaspectra.live/P2Pool/softfloat64 v1.0.0 h1:XqxDpowntpV8gvBzG9bMC8VVzxZJT/YEk7BfwmaCamU=
git.gammaspectra.live/P2Pool/softfloat64 v1.0.0/go.mod h1:ZhnGqXOS6F6aJpiiT38Cvk5eHoBNqjkKfp3w3AcnomA=
golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=

127
internal/aes/const.go Normal file
View file

@ -0,0 +1,127 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package aes implements AES encryption (formerly Rijndael), as defined in
// U.S. Federal Information Processing Standards Publication 197.
//
// The AES operations in this package are not implemented using constant-time algorithms.
// An exception is when running on systems with enabled hardware support for AES
// that makes these operations constant-time. Examples include amd64 systems using AES-NI
// extensions and s390x systems using Message-Security-Assist extensions.
// On such systems, when the result of NewCipher is passed to cipher.NewGCM,
// the GHASH operation used by GCM is also constant-time.
package aes
import (
"bytes"
"math/bits"
)
// This file generates AES constants - 8720 bytes of initialized data.
// https://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
// AES is based on the mathematical behavior of binary polynomials
// (polynomials over GF(2)) modulo the irreducible polynomial x⁸ + x⁴ + x³ + x + 1.
// Addition of these binary polynomials corresponds to binary xor.
// Reducing mod poly corresponds to binary xor with poly every
// time a 0x100 bit appears.
const poly = 1<<8 | 1<<4 | 1<<3 | 1<<1 | 1<<0 // x⁸ + x⁴ + x³ + x + 1
// Multiply b and c as GF(2) polynomials modulo poly
func mul(b, c uint32) uint32 {
i := b
j := c
s := uint32(0)
for k := uint32(1); k < 0x100 && j != 0; k <<= 1 {
// Invariant: k == 1<<n, i == b * xⁿ
if j&k != 0 {
// s += i in GF(2); xor in binary
s ^= i
j ^= k // turn off bit to end loop early
}
// i *= x in GF(2) modulo the polynomial
i <<= 1
if i&0x100 != 0 {
i ^= poly
}
}
return s
}
// sbox0 FIPS-197 Figure 7. S-box substitution values generation
var sbox0 = func() (sbox [256]byte) {
var p, q uint8 = 1, 1
for {
/* multiply p by 3 */
if p&0x80 != 0 {
p ^= (p << 1) ^ 0x1b
} else {
p ^= p << 1
}
/* divide q by 3 (equals multiplication by 0xf6) */
q ^= q << 1
q ^= q << 2
q ^= q << 4
if q&0x80 != 0 {
q ^= 0x09
}
/* compute the affine transformation */
xformed := q ^ bits.RotateLeft8(q, 1) ^ bits.RotateLeft8(q, 2) ^ bits.RotateLeft8(q, 3) ^ bits.RotateLeft8(q, 4)
sbox[p] = xformed ^ 0x63
if p == 1 {
break
}
}
/* 0 is a special case since it has no inverse */
sbox[0] = 0x63
return sbox
}()
// sbox1 FIPS-197 Figure 14. Inverse S-box substitution values generation
var sbox1 = func() (isbox [256]byte) {
for i := range sbox0 {
isbox[i] = uint8(bytes.IndexByte(sbox0[:], uint8(i)))
}
return isbox
}()
// encLut Lookup tables for encryption.
var encLut = func() (te [4][256]uint32) {
for i := 0; i < 256; i++ {
s := uint32(sbox0[i])
s2 := mul(s, 2)
s3 := mul(s, 3)
w := s2<<24 | s<<16 | s<<8 | s3
for j := 0; j < 4; j++ {
te[j][i] = bits.ReverseBytes32(w)
w = w<<24 | w>>8
}
}
return te
}()
// decLut Lookup tables for decryption.
var decLut = func() (td [4][256]uint32) {
for i := 0; i < 256; i++ {
s := uint32(sbox1[i])
s9 := mul(s, 0x9)
sb := mul(s, 0xb)
sd := mul(s, 0xd)
se := mul(s, 0xe)
w := se<<24 | s9<<16 | sd<<8 | sb
for j := 0; j < 4; j++ {
td[j][i] = bits.ReverseBytes32(w)
w = w<<24 | w>>8
}
}
return td
}()

View file

@ -0,0 +1,69 @@
//go:build amd64 && !purego
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
"golang.org/x/sys/cpu"
"runtime"
"unsafe"
)
const HasHardAESImplementation = true
type hardAES struct {
}
func NewHardAES() AES {
if cpu.X86.HasAES {
return hardAES{}
}
return nil
}
func (aes hardAES) HashAes1Rx4(input []byte, output *[64]byte) {
if len(input)%len(output) != 0 {
panic("unsupported")
}
asm.HashAes1Rx4(&keys.AesHash1R_State, &keys.AesHash1R_XKeys, output, unsafe.SliceData(input), uint64(len(input)))
}
func (aes hardAES) FillAes1Rx4(state *[64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
asm.FillAes1Rx4(states, &keys.AesGenerator1R_Keys, unsafe.SliceData(output), uint64(len(output)))
runtime.KeepAlive(state)
}
func (aes hardAES) FillAes4Rx4(state [64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// state is copied on caller
// Copy state
states := (*[4][4]uint32)(unsafe.Pointer(&state))
for outptr := 0; outptr < len(output); outptr += len(state) {
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys0)
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys1)
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys2)
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys3)
copy(output[outptr:], state[:])
}
}
func (aes hardAES) HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte) {
//TODO
aes.HashAes1Rx4(scratchpad, output)
aes.FillAes1Rx4(fillState, scratchpad)
}

View file

@ -0,0 +1,9 @@
//go:build !amd64 || purego
package aes
const HasHardAESImplementation = false
func NewHardAES() AES {
return nil
}

59
internal/aes/hash.go Normal file
View file

@ -0,0 +1,59 @@
/*
Copyright (c) 2019 DERO Foundation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
)
var fillAes4Rx4Keys0 = [4][4]uint32{
keys.AesGenerator4R_Keys[0],
keys.AesGenerator4R_Keys[0],
keys.AesGenerator4R_Keys[4],
keys.AesGenerator4R_Keys[4],
}
var fillAes4Rx4Keys1 = [4][4]uint32{
keys.AesGenerator4R_Keys[1],
keys.AesGenerator4R_Keys[1],
keys.AesGenerator4R_Keys[5],
keys.AesGenerator4R_Keys[5],
}
var fillAes4Rx4Keys2 = [4][4]uint32{
keys.AesGenerator4R_Keys[2],
keys.AesGenerator4R_Keys[2],
keys.AesGenerator4R_Keys[6],
keys.AesGenerator4R_Keys[6],
}
var fillAes4Rx4Keys3 = [4][4]uint32{
keys.AesGenerator4R_Keys[3],
keys.AesGenerator4R_Keys[3],
keys.AesGenerator4R_Keys[7],
keys.AesGenerator4R_Keys[7],
}

38
internal/aes/impl.go Normal file
View file

@ -0,0 +1,38 @@
package aes
type AES interface {
// HashAes1Rx4
//
// Calculate a 512-bit hash of 'input' using 4 lanes of AES.
// The input is treated as a set of round keys for the encryption
// of the initial state.
//
// 'input' size must be a multiple of 64.
//
// For a 2 MiB input, this has the same security as 32768-round
// AES encryption.
//
// Hashing throughput: >20 GiB/s per CPU core with hardware AES
HashAes1Rx4(input []byte, output *[64]byte)
// FillAes1Rx4
//
// Fill 'output' with pseudorandom data based on 512-bit 'state'.
// The state is encrypted using a single AES round per 16 bytes of output
// in 4 lanes.
//
// 'output' size must be a multiple of 64.
//
// The modified state is written back to 'state' to allow multiple
// calls to this function.
FillAes1Rx4(state *[64]byte, output []byte)
// HashAndFillAes1Rx4 Hashes and fills scratchpad and output in one sweep
HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte)
// FillAes4Rx4 used to generate final program
//
// 'state' is copied when calling
FillAes4Rx4(state [64]byte, output []byte)
}

View file

@ -29,3 +29,24 @@ func soft_aesdec(state *[4]uint32, key *[4]uint32) {
state[2] = key[2] ^ td0[uint8(s2)] ^ td1[uint8(s1>>8)] ^ td2[uint8(s0>>16)] ^ td3[uint8(s3>>24)]
state[3] = key[3] ^ td0[uint8(s3)] ^ td1[uint8(s2>>8)] ^ td2[uint8(s1>>16)] ^ td3[uint8(s0>>24)]
}
func soft_aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
soft_aesdec(&states[0], &keys[0])
soft_aesenc(&states[1], &keys[1])
soft_aesdec(&states[2], &keys[2])
soft_aesenc(&states[3], &keys[3])
}
func soft_aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
soft_aesenc(&states[0], &keys[0])
soft_aesdec(&states[1], &keys[1])
soft_aesenc(&states[2], &keys[2])
soft_aesdec(&states[3], &keys[3])
}
func soft_aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
soft_aesenc(&states[0], key)
soft_aesdec(&states[1], key)
soft_aesenc(&states[2], key)
soft_aesdec(&states[3], key)
}

75
internal/aes/soft.go Normal file
View file

@ -0,0 +1,75 @@
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
"runtime"
"unsafe"
)
type softAES struct {
}
func NewSoftAES() AES {
return softAES{}
}
func (aes softAES) HashAes1Rx4(input []byte, output *[64]byte) {
if len(input)%len(output) != 0 {
panic("unsupported")
}
// states are copied
states := (*[4][4]uint32)(unsafe.Pointer(output))
*states = keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
soft_aesroundtrip_encdec(states, in)
}
soft_aesroundtrip_encdec1(states, &keys.AesHash1R_XKeys[0])
soft_aesroundtrip_encdec1(states, &keys.AesHash1R_XKeys[1])
runtime.KeepAlive(output)
}
func (aes softAES) FillAes1Rx4(state *[64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
for outptr := 0; outptr < len(output); outptr += len(state) {
soft_aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
copy(output[outptr:], state[:])
}
}
func (aes softAES) FillAes4Rx4(state [64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// state is copied on caller
// Copy state
states := (*[4][4]uint32)(unsafe.Pointer(&state))
for outptr := 0; outptr < len(output); outptr += len(state) {
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys0)
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys1)
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys2)
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys3)
copy(output[outptr:], state[:])
}
}
func (aes softAES) HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte) {
//TODO
aes.HashAes1Rx4(scratchpad, output)
aes.FillAes1Rx4(fillState, scratchpad)
}

76
internal/argon2/argon2.go Normal file
View file

@ -0,0 +1,76 @@
package argon2
import (
"encoding/binary"
"golang.org/x/crypto/blake2b"
)
import (
_ "golang.org/x/crypto/argon2"
_ "unsafe"
)
const BlockSize uint32 = 1024
type Block [BlockSize / 8]uint64
const syncPoints = 4
//go:linkname initHash golang.org/x/crypto/argon2.initHash
func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
//go:linkname processBlocks golang.org/x/crypto/argon2.processBlocks
func processBlocks(B []Block, time, memory, threads uint32, mode int)
//go:linkname blake2bHash golang.org/x/crypto/argon2.blake2bHash
func blake2bHash(out []byte, in []byte)
// initBlocks From golang.org/x/crypto/argon2.initBlocks with external memory allocation
func initBlocks(B []Block, h0 *[blake2b.Size + 8]byte, memory, threads uint32) {
var block0 [1024]byte
clear(B)
for lane := uint32(0); lane < threads; lane++ {
j := lane * (memory / threads)
binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane)
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0)
blake2bHash(block0[:], h0[:])
for i := range B[j+0] {
B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:])
}
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1)
blake2bHash(block0[:], h0[:])
for i := range B[j+1] {
B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:])
}
}
}
// BuildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call and external memory allocation
func BuildBlocks(B []Block, password, salt []byte, time, memory uint32, threads uint8) {
if time < 1 {
panic("argon2: number of rounds too small")
}
if threads < 1 {
panic("argon2: parallelism degree too low")
}
if len(B) != int(memory) {
panic("argon2: invalid block size")
}
const mode = 0 /* argon2d */
const keyLen = 0
h0 := initHash(password, salt, nil, nil, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {
memory = 2 * syncPoints * uint32(threads)
}
initBlocks(B, &h0, memory, uint32(threads))
processBlocks(B, time, memory, uint32(threads), mode)
}

18
internal/asm/aes_amd64.go Normal file
View file

@ -0,0 +1,18 @@
//go:build amd64 && !purego
package asm
//go:noescape
func FillAes1Rx4(states *[4][4]uint32, keys *[4][4]uint32, output *byte, outputLen uint64)
//go:noescape
func HashAes1Rx4(initialState *[4][4]uint32, xKeys *[2][4]uint32, output *[64]byte, input *byte, inputLen uint64)
//go:noescape
func AESRoundTrip_DecEnc(states *[4][4]uint32, keys *[4][4]uint32)
//go:noescape
func AESRoundTrip_EncDec(states *[4][4]uint32, keys *[4][4]uint32)
//go:noescape
func AESRoundTrip_EncDec1(states *[4][4]uint32, key *[4]uint32)

172
internal/asm/aes_amd64.s Normal file
View file

@ -0,0 +1,172 @@
//go:build amd64 && !purego
#include "textflag.h"
TEXT ·FillAes1Rx4(SB),NOSPLIT|NOFRAME,$0-32
MOVQ states+0(FP), AX
MOVQ keys+8(FP), BX
MOVQ output+16(FP), CX
MOVQ outputLen+24(FP), DX
// initial state
VMOVDQU 0(AX), X0
VMOVDQU 16(AX), X1
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
// keys: X4-X7
VMOVDQU 0(BX), X4
VMOVDQU 16(BX), X5
VMOVDQU 32(BX), X6
VMOVDQU 48(BX), X7
loop:
AESDEC X4, X0
AESENC X5, X1
AESDEC X6, X2
AESENC X7, X3
// store state onto output
VMOVDQU X0, 0(CX)
VMOVDQU X1, 16(CX)
VMOVDQU X2, 32(CX)
VMOVDQU X3, 48(CX)
ADDQ $64, CX
// outputLen -= 64, continue if not 0
SUBQ $64, DX
JNE loop
// offload initial state
VMOVDQU X0, 0(AX)
VMOVDQU X1, 16(AX)
VMOVDQU X2, 32(AX)
VMOVDQU X3, 48(AX)
RET
TEXT ·HashAes1Rx4(SB),NOSPLIT|NOFRAME,$0-40
MOVQ initialState+0(FP), AX
// initial state
VMOVDQU 0(AX), X0
VMOVDQU 16(AX), X1
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
MOVQ xKeys+8(FP), AX
MOVQ output+16(FP), BX
MOVQ input+24(FP), CX
MOVQ inputLen+32(FP), DX
loop:
// input as keys: X4-X7
VMOVDQU 0(CX), X4
VMOVDQU 16(CX), X5
VMOVDQU 32(CX), X6
VMOVDQU 48(CX), X7
AESENC X4, X0
AESDEC X5, X1
AESENC X6, X2
AESDEC X7, X3
ADDQ $64, CX
// inputLen -= 64, continue if not 0
SUBQ $64, DX
JNE loop
// do encdec1 with both keys!
VMOVDQU 0(AX), X4
VMOVDQU 16(AX), X5
AESENC X4, X0
AESDEC X4, X1
AESENC X4, X2
AESDEC X4, X3
AESENC X5, X0
AESDEC X5, X1
AESENC X5, X2
AESDEC X5, X3
// offload into output
VMOVDQU X0, 0(BX)
VMOVDQU X1, 16(BX)
VMOVDQU X2, 32(BX)
VMOVDQU X3, 48(BX)
RET
TEXT ·AESRoundTrip_DecEnc(SB),NOSPLIT|NOFRAME,$0-16
MOVQ states+0(FP), AX
MOVQ keys+8(FP), BX
VMOVDQU 0(AX), X0
VMOVDQU 0(BX), X1
VMOVDQU 16(AX), X2
VMOVDQU 16(BX), X3
VMOVDQU 32(AX), X4
VMOVDQU 32(BX), X5
VMOVDQU 48(AX), X6
VMOVDQU 48(BX), X7
AESDEC X1, X0
AESENC X3, X2
AESDEC X5, X4
AESENC X7, X6
VMOVDQU X0, 0(AX)
VMOVDQU X2, 16(AX)
VMOVDQU X4, 32(AX)
VMOVDQU X6, 48(AX)
RET
TEXT ·AESRoundTrip_EncDec(SB),NOSPLIT|NOFRAME,$0-16
MOVQ states+0(FP), AX
MOVQ keys+8(FP), BX
VMOVDQU 0(AX), X0
VMOVDQU 0(BX), X1
VMOVDQU 16(AX), X2
VMOVDQU 16(BX), X3
VMOVDQU 32(AX), X4
VMOVDQU 32(BX), X5
VMOVDQU 48(AX), X6
VMOVDQU 48(BX), X7
AESENC X1, X0
AESDEC X3, X2
AESENC X5, X4
AESDEC X7, X6
VMOVDQU X0, 0(AX)
VMOVDQU X2, 16(AX)
VMOVDQU X4, 32(AX)
VMOVDQU X6, 48(AX)
RET
TEXT ·AESRoundTrip_EncDec1(SB),NOSPLIT|NOFRAME,$0-16
MOVQ states+0(FP), AX
MOVQ key+8(FP), BX
VMOVDQU 0(BX), X0
VMOVDQU 0(AX), X1
VMOVDQU 16(AX), X2
VMOVDQU 32(AX), X3
VMOVDQU 48(AX), X4
AESENC X0, X1
AESDEC X0, X2
AESENC X0, X3
AESDEC X0, X4
VMOVDQU X1, 0(AX)
VMOVDQU X2, 16(AX)
VMOVDQU X3, 32(AX)
VMOVDQU X4, 48(AX)
RET

11
internal/asm/aes_noasm.go Normal file
View file

@ -0,0 +1,11 @@
//go:build !amd64 || purego
package asm
func AESRoundEncrypt(state *[4]uint32, key *[4]uint32) {
panic("not implemented")
}
func AESRoundDecrypt(state *[4]uint32, key *[4]uint32) {
panic("not implemented")
}

View file

@ -0,0 +1,5 @@
//go:build amd64 && !purego
package asm
func Cpuid(op uint32) (eax, ebx, ecx, edx uint32)

View file

@ -0,0 +1,15 @@
//go:build amd64 && !purego
#include "textflag.h"
// func Cpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·Cpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET

5
internal/asm/round.go Normal file
View file

@ -0,0 +1,5 @@
package asm
func SetRoundingMode[T ~uint64 | ~uint8](mode T) {
setRoundingMode(uint8(mode))
}

View file

@ -0,0 +1,6 @@
//go:build 386 && !purego
package asm
//go:noescape
func setRoundingMode(mode uint8)

21
internal/asm/round_386.s Normal file
View file

@ -0,0 +1,21 @@
//go:build 386 && !purego
#include "textflag.h"
TEXT ·setRoundingMode(SB),NOSPLIT|NOFRAME,$4-1
MOVB addr+0(FP), AX
ANDL $3, AX
ROLL $13, AX
// get current MXCSR register
PUSHL AX
STMXCSR 0(SP)
// put new rounding mode
ANDL $~0x6000, 0(SP)
ORL AX, 0(SP)
// store new MXCSR register
LDMXCSR 0(SP)
POPL AX
RET

View file

@ -0,0 +1,6 @@
//go:build amd64 && !purego
package asm
//go:noescape
func setRoundingMode(mode uint8)

View file

@ -0,0 +1,21 @@
//go:build amd64 && !purego
#include "textflag.h"
TEXT ·setRoundingMode(SB),NOSPLIT|NOFRAME,$8-1
MOVB addr+0(FP), AX
ANDQ $3, AX
ROLQ $13, AX
// get current MXCSR register
PUSHQ AX
STMXCSR 0(SP)
// put new rounding mode
ANDL $~0x6000, 0(SP)
ORL AX, 0(SP)
// store new MXCSR register
LDMXCSR 0(SP)
POPQ AX
RET

23
internal/asm/round_arm.go Normal file
View file

@ -0,0 +1,23 @@
//go:build (arm.6 || arm.7) && !purego
package asm
// GetFPSCR returns the value of FPSCR register.
func getFPSCR() (value uint32)
// SetFPSCR writes the FPSCR value.
func setFPSCR(value uint32)
func setRoundingMode(mode uint8) {
switch mode {
// switch plus/minus infinity
case 1:
mode = 2
case 2:
mode = 1
}
fpscr := getFPSCR()
fpscr = (fpscr & (^uint32(0x0C00000))) | ((uint32(mode) & 3) << 22)
setFPSCR(fpscr)
}

13
internal/asm/round_arm.s Normal file
View file

@ -0,0 +1,13 @@
//go:build (arm.6 || arm.7) && !purego
#include "textflag.h"
TEXT ·getFPSCR(SB),NOSPLIT,$0-4
WORD $0xeef1ba10 // vmrs r11, fpscr
MOVW R11, value+0(FP)
RET
TEXT ·setFPSCR(SB),NOSPLIT,$0-4
MOVW value+0(FP), R11
WORD $0xeee1ba10 // vmsr fpscr, r11
RET

View file

@ -1,4 +1,4 @@
//go:build arm64
//go:build arm64 && !purego
package asm

View file

@ -1,3 +1,5 @@
//go:build arm64 && !purego
#include "textflag.h"
TEXT ·getFPCR(SB),NOSPLIT,$0-8

View file

@ -1,4 +1,4 @@
//go:build !arm64 && !amd64 && !386
//go:build (!arm64 && !(arm.6 || arm.7) && !amd64 && !386) || purego
package asm

View file

@ -0,0 +1,46 @@
package blake2
import (
"encoding/binary"
"golang.org/x/crypto/blake2b"
)
type Generator struct {
state [blake2b.Size]byte
i int
}
func New(seed []byte, nonce uint32) *Generator {
var state [blake2b.Size]byte
copy(state[:60], seed)
binary.LittleEndian.PutUint32(state[60:], nonce)
g := &Generator{
i: len(state),
state: state,
}
return g
}
func (g *Generator) GetUint32() (v uint32) {
if (g.i + 4) > len(g.state) {
g.reseed()
}
v = binary.LittleEndian.Uint32(g.state[g.i:])
g.i += 4
return v
}
func (g *Generator) GetByte() (v byte) {
if (g.i + 1) > len(g.state) {
g.reseed()
}
v = g.state[g.i]
g.i++
return v
}
func (g *Generator) reseed() {
g.state = blake2b.Sum512(g.state[:])
g.i = 0
}

View file

@ -0,0 +1,32 @@
package memory
import "unsafe"
type AlignedAllocator uint64
func NewAlignedAllocator(alignment uint64) Allocator {
if !isZeroOrPowerOf2(alignment) {
panic("alignment must be a power of 2")
}
return AlignedAllocator(alignment)
}
func (a AlignedAllocator) AllocMemory(size uint64) ([]byte, error) {
if a <= 4 {
//slice allocations are 16-byte aligned, fast path
return make([]byte, size, max(size, uint64(a))), nil
}
memory := make([]byte, size+uint64(a))
ptr := uintptr(unsafe.Pointer(unsafe.SliceData(memory)))
align := uint64(a) - (uint64(ptr) & (uint64(a) - 1))
if align == uint64(a) {
return memory[:size:size], nil
}
return memory[align : align+size : align+size], nil
}
func (a AlignedAllocator) FreeMemory(memory []byte) error {
//let gc free
return nil
}

45
internal/memory/alloc.go Normal file
View file

@ -0,0 +1,45 @@
package memory
import (
"unsafe"
)
type Allocator interface {
AllocMemory(size uint64) ([]byte, error)
FreeMemory(memory []byte) error
}
func Allocate[T any](a Allocator) (*T, error) {
var zeroType T
mem, err := a.AllocMemory(uint64(unsafe.Sizeof(zeroType)))
if err != nil {
return nil, err
}
return (*T)(unsafe.Pointer(unsafe.SliceData(mem))), nil
}
func Free[T any](a Allocator, v *T) error {
var zeroType T
return a.FreeMemory(unsafe.Slice((*byte)(unsafe.Pointer(v)), uint64(unsafe.Sizeof(zeroType))))
}
func AllocateSlice[T any, T2 ~int | ~uint64 | ~uint32](a Allocator, size T2) ([]T, error) {
var zeroType T
mem, err := a.AllocMemory(uint64(unsafe.Sizeof(zeroType)) * uint64(size))
if err != nil {
return nil, err
}
return unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(mem))), size), nil
}
func FreeSlice[T any](a Allocator, v []T) error {
var zeroType T
return a.FreeMemory(unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(v))), uint64(unsafe.Sizeof(zeroType))*uint64(len(v))))
}
func isZeroOrPowerOf2(x uint64) bool {
return (x & (x - 1)) == 0
}

View file

@ -0,0 +1,45 @@
//go:build freebsd && !purego
package memory
import (
"golang.org/x/sys/unix"
)
type LargePageAllocator struct {
}
func NewLargePageAllocator() Allocator {
return LargePageAllocator{}
}
/*
* Request specific alignment (n == log2 of the desired alignment).
*
* MAP_ALIGNED_SUPER requests optimal superpage alignment, but does
* not enforce a specific alignment.
*/
//#define MAP_ALIGNED(n) ((n) << MAP_ALIGNMENT_SHIFT)
//#define MAP_ALIGNMENT_SHIFT 24
//#define MAP_ALIGNMENT_MASK MAP_ALIGNED(0xff)
//#define MAP_ALIGNED_SUPER MAP_ALIGNED(1) /* align on a superpage */
const MAP_ALIGNED_SUPER = 1 << 24
func (a LargePageAllocator) AllocMemory(size uint64) ([]byte, error) {
memory, err := unix.Mmap(-1, 0, int(size), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS|MAP_ALIGNED_SUPER)
if err != nil {
return nil, err
}
return memory, nil
}
func (a LargePageAllocator) FreeMemory(memory []byte) error {
if memory == nil {
return nil
}
return unix.Munmap(memory)
}

View file

@ -0,0 +1,10 @@
//go:build openbsd || netbsd || dragonfly || darwin || ios || !unix || purego
package memory
var LargePageNoMemoryErr error
// NewLargePageAllocator Not supported in platform
func NewLargePageAllocator() Allocator {
return nil
}

View file

@ -0,0 +1,31 @@
//go:build unix && !(freebsd || openbsd || netbsd || dragonfly || darwin || ios) && !purego
package memory
import (
"golang.org/x/sys/unix"
)
type LargePageAllocator struct {
}
func NewLargePageAllocator() Allocator {
return LargePageAllocator{}
}
func (a LargePageAllocator) AllocMemory(size uint64) ([]byte, error) {
memory, err := unix.Mmap(-1, 0, int(size), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS|unix.MAP_HUGETLB|unix.MAP_POPULATE)
if err != nil {
return nil, err
}
return memory, nil
}
func (a LargePageAllocator) FreeMemory(memory []byte) error {
if memory == nil {
return nil
}
return unix.Munmap(memory)
}

View file

@ -0,0 +1,22 @@
//go:build !unix || purego
package memory
var PageNoMemoryErr error
func NewPageAllocator() Allocator {
return nil
}
func PageReadWrite(memory []byte) error {
panic("not supported")
}
func PageReadExecute(memory []byte) error {
panic("not supported")
}
// PageReadWriteExecute Insecure!
func PageReadWriteExecute(memory []byte) error {
panic("not supported")
}

View file

@ -0,0 +1,46 @@
//go:build unix && !purego
package memory
import (
"golang.org/x/sys/unix"
)
var PageNoMemoryErr = unix.ENOMEM
type PageAllocator struct {
}
func NewPageAllocator() Allocator {
return PageAllocator{}
}
func (a PageAllocator) AllocMemory(size uint64) ([]byte, error) {
memory, err := unix.Mmap(-1, 0, int(size), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
if err != nil {
return nil, err
}
return memory, nil
}
func (a PageAllocator) FreeMemory(memory []byte) error {
if memory == nil {
return nil
}
return unix.Munmap(memory)
}
func PageReadWrite(memory []byte) error {
return unix.Mprotect(memory, unix.PROT_READ|unix.PROT_WRITE)
}
func PageReadExecute(memory []byte) error {
return unix.Mprotect(memory, unix.PROT_READ|unix.PROT_EXEC)
}
// PageReadWriteExecute Insecure!
func PageReadWriteExecute(memory []byte) error {
return unix.Mprotect(memory, unix.PROT_READ|unix.PROT_WRITE|unix.PROT_EXEC)
}

268
jit_amd64.go Normal file
View file

@ -0,0 +1,268 @@
//go:build unix && amd64 && !disable_jit && !purego
package randomx
import (
"encoding/binary"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
)
const supportsJIT = true
/*
REGISTER ALLOCATION:
; rax -> temporary
; rbx -> todo: iteration counter "ic"
; rcx -> temporary
; rdx -> temporary
; rsi -> scratchpad pointer
; rdi -> todo: dataset pointer
; rbp -> (do not use, it's used by Golang sampling) jump target //todo: memory registers "ma" (high 32 bits), "mx" (low 32 bits)
; rsp -> stack pointer
; r8 -> "r0"
; r9 -> "r1"
; r10 -> "r2"
; r11 -> "r3"
; r12 -> "r4"
; r13 -> "r5"
; r14 -> "r6"
; r15 -> "r7"
; xmm0 -> "f0"
; xmm1 -> "f1"
; xmm2 -> "f2"
; xmm3 -> "f3"
; xmm4 -> "e0"
; xmm5 -> "e1"
; xmm6 -> "e2"
; xmm7 -> "e3"
; xmm8 -> "a0"
; xmm9 -> "a1"
; xmm10 -> "a2"
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
*/
const MaxRandomXInstrCodeSize = 32 //FDIV_M requires up to 32 bytes of x86 code
const MaxSuperscalarInstrSize = 14 //IMUL_RCP requires 14 bytes of x86 code
const SuperscalarProgramHeader = 128 //overhead per superscalar program
const CodeAlign = 4096 //align code size to a multiple of 4 KiB
const ReserveCodeSize = CodeAlign //function prologue/epilogue + reserve
func alignSize[T ~uintptr | ~uint32 | ~uint64 | ~int64 | ~int32 | ~int](pos, align T) T {
return ((pos-1)/align + 1) * align
}
var RandomXCodeSize = alignSize[uint64](ReserveCodeSize+MaxRandomXInstrCodeSize*RANDOMX_PROGRAM_SIZE, CodeAlign)
var SuperscalarSize = alignSize[uint64](ReserveCodeSize+(SuperscalarProgramHeader+MaxSuperscalarInstrSize*SuperscalarMaxSize)*RANDOMX_CACHE_ACCESSES, CodeAlign)
var CodeSize = uint32(RandomXCodeSize + SuperscalarSize)
var superScalarHashOffset = int32(RandomXCodeSize)
var REX_ADD_RR = []byte{0x4d, 0x03}
var REX_ADD_RM = []byte{0x4c, 0x03}
var REX_SUB_RR = []byte{0x4d, 0x2b}
var REX_SUB_RM = []byte{0x4c, 0x2b}
var REX_MOV_RR = []byte{0x41, 0x8b}
var REX_MOV_RR64 = []byte{0x49, 0x8b}
var REX_MOV_R64R = []byte{0x4c, 0x8b}
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
var REX_IMUL_RRI = []byte{0x4d, 0x69}
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
var REX_MUL_R = []byte{0x49, 0xf7}
var REX_MUL_M = []byte{0x48, 0xf7}
var REX_81 = []byte{0x49, 0x81}
var AND_EAX_I byte = 0x25
var MOV_EAX_I byte = 0xb8
var MOV_RAX_I = []byte{0x48, 0xb8}
var MOV_RCX_I = []byte{0x48, 0xb9}
var REX_LEA = []byte{0x4f, 0x8d}
var REX_MUL_MEM = []byte{0x48, 0xf7, 0x24, 0x0e}
var REX_IMUL_MEM = []byte{0x48, 0xf7, 0x2c, 0x0e}
var REX_SHR_RAX = []byte{0x48, 0xc1, 0xe8}
var RAX_ADD_SBB_1 = []byte{0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00}
var MUL_RCX = []byte{0x48, 0xf7, 0xe1}
var REX_SHR_RDX = []byte{0x48, 0xc1, 0xea}
var REX_SH = []byte{0x49, 0xc1}
var MOV_RCX_RAX_SAR_RCX_63 = []byte{0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f}
var AND_ECX_I = []byte{0x81, 0xe1}
var ADD_RAX_RCX = []byte{0x48, 0x01, 0xC8}
var SAR_RAX_I8 = []byte{0x48, 0xC1, 0xF8}
var NEG_RAX = []byte{0x48, 0xF7, 0xD8}
var ADD_R_RAX = []byte{0x4C, 0x03}
var XOR_EAX_EAX = []byte{0x33, 0xC0}
var ADD_RDX_R = []byte{0x4c, 0x01}
var SUB_RDX_R = []byte{0x4c, 0x29}
var SAR_RDX_I8 = []byte{0x48, 0xC1, 0xFA}
var TEST_RDX_RDX = []byte{0x48, 0x85, 0xD2}
var SETS_AL_ADD_RDX_RAX = []byte{0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0}
var REX_NEG = []byte{0x49, 0xF7}
var REX_XOR_RR = []byte{0x4D, 0x33}
var REX_XOR_RI = []byte{0x49, 0x81}
var REX_XOR_RM = []byte{0x4c, 0x33}
var REX_ROT_CL = []byte{0x49, 0xd3}
var REX_ROT_I8 = []byte{0x49, 0xc1}
var SHUFPD = []byte{0x66, 0x0f, 0xc6}
var REX_ADDPD = []byte{0x66, 0x41, 0x0f, 0x58}
var REX_CVTDQ2PD_XMM12 = []byte{0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06}
var REX_SUBPD = []byte{0x66, 0x41, 0x0f, 0x5c}
var REX_XORPS = []byte{0x41, 0x0f, 0x57}
var REX_MULPD = []byte{0x66, 0x41, 0x0f, 0x59}
var REX_MAXPD = []byte{0x66, 0x41, 0x0f, 0x5f}
var REX_DIVPD = []byte{0x66, 0x41, 0x0f, 0x5e}
var SQRTPD = []byte{0x66, 0x0f, 0x51}
var AND_OR_MOV_LDMXCSR = []byte{0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58}
var ROL_RAX = []byte{0x48, 0xc1, 0xc0}
var XOR_ECX_ECX = []byte{0x33, 0xC9}
var REX_CMP_R32I = []byte{0x41, 0x81}
var REX_CMP_M32I = []byte{0x81, 0x3c, 0x06}
var MOVAPD = []byte{0x66, 0x0f, 0x29}
var REX_MOV_MR = []byte{0x4c, 0x89}
var REX_XOR_EAX = []byte{0x41, 0x33}
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
var JNZ = []byte{0x0f, 0x85}
var JMP byte = 0xe9
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
var REX_XCHG = []byte{0x4d, 0x87}
var REX_ANDPS_XMM12 = []byte{0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6}
var REX_PADD = []byte{0x66, 0x44, 0x0f}
var PADD_OPCODES = []byte{0xfc, 0xfd, 0xfe, 0xd4}
var CALL = 0xe8
var REX_ADD_I = []byte{0x49, 0x81}
var REX_TEST = []byte{0x49, 0xF7}
var JZ = []byte{0x0f, 0x84}
var JZ_SHORT byte = 0x74
var RET byte = 0xc3
var LEA_32 = []byte{0x41, 0x8d}
var MOVNTI = []byte{0x4c, 0x0f, 0xc3}
var ADD_EBX_I = []byte{0x81, 0xc3}
var NOP1 = []byte{0x90}
var NOP2 = []byte{0x66, 0x90}
var NOP3 = []byte{0x66, 0x66, 0x90}
var NOP4 = []byte{0x0F, 0x1F, 0x40, 0x00}
var NOP5 = []byte{0x0F, 0x1F, 0x44, 0x00, 0x00}
var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
var NOPX = [][]byte{NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8}
var JMP_ALIGN_PREFIX = [14][]byte{
{},
{0x2E},
{0x2E, 0x2E},
{0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
}
func genSIB(scale, index, base int) byte {
return byte((scale << 6) | (index << 3) | base)
}
func genAddressReg(buf []byte, instr *ByteCodeInstruction, rax bool) []byte {
buf = append(buf, LEA_32...)
if rax {
buf = append(buf, 0x80+instr.Src+0)
} else {
buf = append(buf, 0x80+instr.Src+8)
}
if instr.Src == RegisterNeedsSib {
buf = append(buf, 0x24)
}
buf = binary.LittleEndian.AppendUint32(buf, uint32(instr.Imm))
if rax {
buf = append(buf, AND_EAX_I)
} else {
buf = append(buf, AND_ECX_I...)
}
buf = binary.LittleEndian.AppendUint32(buf, instr.MemMask)
return buf
}
func valAsString(values ...uint32) []byte {
r := make([]byte, 4*len(values))
for i, v := range values {
dst := r[i*4:]
dst[0] = byte(v & 0xff)
dst[1] = byte((v >> 8) & 0xff)
dst[2] = byte((v >> 16) & 0xff)
dst[3] = byte((v >> 24) & 0xff)
switch {
case dst[0] == 0:
return r[:i*4]
case dst[1] == 0:
return r[:i*4+1]
case dst[2] == 0:
return r[:i*4+2]
case dst[3] == 0:
return r[:i*4+3]
}
}
return r
}
func familyModel(maxFunctionId uint32) (family, model, stepping int) {
if maxFunctionId < 0x1 {
return 0, 0, 0
}
eax, _, _, _ := asm.Cpuid(1)
// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
family = int((eax >> 8) & 0xf)
extFam := family == 0x6 // Intel is 0x6, needs extended model.
if family == 0xf {
// Add ExtFamily
family += int((eax >> 20) & 0xff)
extFam = true
}
// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
model = int((eax >> 4) & 0xf)
if extFam {
// Add ExtModel
model += int((eax >> 12) & 0xf0)
}
stepping = int(eax & 0xf)
return family, model, stepping
}
var BranchesWithin32B = func() bool {
a, b, c, d := asm.Cpuid(0)
v := string(valAsString(b, d, c))
if v == "GenuineIntel" {
family, model, stepping := familyModel(a)
// Intel JCC erratum mitigation
if family == 6 {
// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
return ((model == 0x4E) && (stepping == 0x3)) ||
((model == 0x55) && ((stepping == 0x4) || (stepping == 0x7))) ||
((model == 0x5E) && (stepping == 0x3)) ||
((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
((model == 0xA6) && (stepping == 0x0)) ||
((model == 0xAE) && (stepping == 0xA))
}
}
return false
}()

7
jit_generic.go Normal file
View file

@ -0,0 +1,7 @@
//go:build !unix || !amd64 || disable_jit || purego
package randomx
const supportsJIT = false
var RandomXCodeSize uint64 = 0

79
math.go Normal file
View file

@ -0,0 +1,79 @@
package randomx
import (
"math"
"math/bits"
)
const (
mantbits64 uint = 52
expbits64 uint = 11
)
const mantissaMask = (uint64(1) << mantbits64) - 1
const exponentMask = (uint64(1) << expbits64) - 1
const exponentBias = 1023
const dynamicExponentBits = 4
const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantbits64 + dynamicExponentBits)) - 1
const mask22bit = (uint64(1) << 22) - 1
func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode)
}
func ScaleNegate(f float64) float64 {
return math.Float64frombits(math.Float64bits(f) ^ 0x80F0000000000000)
}
func SmallPositiveFloatBits(entropy uint64) float64 {
exponent := entropy >> 59 //0..31
mantissa := entropy & mantissaMask
exponent += exponentBias
exponent &= exponentMask
exponent = exponent << mantbits64
return math.Float64frombits(exponent | mantissa)
}
func StaticExponent(entropy uint64) uint64 {
exponent := constExponentBits
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
exponent <<= mantbits64
return exponent
}
func ExponentMask(entropy uint64) uint64 {
return (entropy & mask22bit) | StaticExponent(entropy)
}
func Xor(a, b float64) float64 {
return math.Float64frombits(math.Float64bits(a) ^ math.Float64bits(b))
}
func smulh(a, b int64) uint64 {
hi_, _ := bits.Mul64(uint64(a), uint64(b))
t1 := (a >> 63) & b
t2 := (b >> 63) & a
return uint64(int64(hi_) - t1 - t2)
}
// reciprocal
// Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64.
// divisor must not be 0 or a power of 2
func reciprocal(divisor uint32) uint64 {
const p2exp63 = uint64(1) << 63
quotient := p2exp63 / uint64(divisor)
remainder := p2exp63 % uint64(divisor)
shift := bits.Len32(divisor)
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
}
func signExtend2sCompl(x uint32) uint64 {
return uint64(int64(int32(x)))
}

28
math_test.go Normal file
View file

@ -0,0 +1,28 @@
package randomx
import "testing"
func TestReciprocal(t *testing.T) {
t.Parallel()
var tests = []struct {
a uint32
b uint64
}{
{3, 12297829382473034410},
{13, 11351842506898185609},
{33, 17887751829051686415},
{65537, 18446462603027742720},
{15000001, 10316166306300415204},
{3845182035, 10302264209224146340},
{0xffffffff, 9223372039002259456},
}
for i, tt := range tests {
r := reciprocal(tt.a)
if r != tt.b {
t.Errorf("i=%d, a=%d", i, tt.a)
t.Errorf("expected=%016x, actual=%016x", tt.b, r)
}
}
}

View file

@ -30,31 +30,95 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"fmt"
"encoding/hex"
"errors"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
"os"
"runtime"
"slices"
"strings"
"unsafe"
)
import "testing"
var Tests = []struct {
key []byte // key
input []byte // input
expected string // expected result
}{
{[]byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
{[]byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"}, // test a
{[]byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"}, // test b
{[]byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"}, // test c
{[]byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"}, // test d
type testdata struct {
name string
key []byte
input []byte
// expected result, in hex
expected string
}
func Test_Randomx(t *testing.T) {
func mustHex(str string) []byte {
b, err := hex.DecodeString(str)
if err != nil {
panic(err)
}
return b
}
c := Randomx_alloc_cache(0)
var Tests = []testdata{
{"example", []byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
{"test_a", []byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"},
{"test_b", []byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"},
{"test_c", []byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"},
{"test_d", []byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"},
{"test_e", []byte("test key 001"), mustHex("0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416"), "c56414121acda1713c2f2a819d8ae38aed7c80c35c2a769298d34f03833cd5f1"},
}
for ix, tt := range Tests {
func testFlags(name string, flags Flags) (f Flags, skip bool) {
flags |= GetFlags()
flags &^= RANDOMX_FLAG_LARGE_PAGES
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
c.Init(tt.key)
nn := strings.Split(name, "/")
switch nn[len(nn)-1] {
case "interpreter":
flags &^= RANDOMX_FLAG_JIT
case "compiler":
flags |= RANDOMX_FLAG_JIT
if !flags.HasJIT() {
return flags, true
}
case "softaes":
flags &^= RANDOMX_FLAG_HARD_AES
case "hardaes":
flags |= RANDOMX_FLAG_HARD_AES
if aes.NewHardAES() == nil {
return flags, true
}
case "largepages":
flags |= RANDOMX_FLAG_LARGE_PAGES
if largePageAllocator == nil {
return flags, true
}
if unsafe.Sizeof(uint(0)) < 8 {
//not 64-bit platforms
return flags, true
}
}
return flags, false
}
func Test_RandomXLight(t *testing.T) {
t.Parallel()
for _, n := range []string{"interpreter", "compiler", "softaes", "hardaes", "largepages"} {
t.Run(n, func(t *testing.T) {
t.Parallel()
tFlags, skip := testFlags(t.Name(), 0)
if skip {
t.Skip("not supported on this platform")
}
c, err := NewCache(tFlags)
if err != nil {
if tFlags.Has(RANDOMX_FLAG_LARGE_PAGES) && errors.Is(err, memory.PageNoMemoryErr) {
t.Skip("cannot allocate memory")
}
t.Fatal(err)
}
defer func() {
err := c.Close()
if err != nil {
@ -62,66 +126,282 @@ func Test_Randomx(t *testing.T) {
}
}()
vm := c.VM_Initialize()
for _, test := range Tests {
t.Run(test.name, func(t *testing.T) {
c.Init(test.key)
var output_hash [32]byte
vm.CalculateHash(tt.input, &output_hash)
vm, err := NewVM(tFlags, c, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := vm.Close()
if err != nil {
t.Error(err)
}
}()
actual := fmt.Sprintf("%x", output_hash)
if actual != tt.expected {
t.Errorf("#%d Fib(%v): expected %s, actual %s", ix, tt.key, tt.expected, actual)
var outputHash [RANDOMX_HASH_SIZE]byte
vm.CalculateHash(test.input, &outputHash)
outputHex := hex.EncodeToString(outputHash[:])
if outputHex != test.expected {
t.Errorf("key=%v, input=%v", test.key, test.input)
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
t.FailNow()
}
})
}
})
}
}
func Test_RandomXBatch(t *testing.T) {
t.Parallel()
for _, n := range []string{"softaes", "hardaes"} {
t.Run(n, func(t *testing.T) {
t.Parallel()
tFlags, skip := testFlags(t.Name(), 0)
if skip {
t.Skip("not supported on this platform")
}
c, err := NewCache(tFlags)
if tFlags.Has(RANDOMX_FLAG_LARGE_PAGES) && errors.Is(err, memory.PageNoMemoryErr) {
t.Skip("cannot allocate memory")
}
if err != nil {
t.Fatal(err)
}
defer func() {
err := c.Close()
if err != nil {
t.Error(err)
}
}()
tests := Tests[1:4]
c.Init(tests[0].key)
vm, err := NewVM(tFlags, c, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := vm.Close()
if err != nil {
t.Error(err)
}
}()
var outputHash [3][RANDOMX_HASH_SIZE]byte
vm.CalculateHashFirst(tests[0].input)
vm.CalculateHashNext(tests[1].input, &outputHash[0])
vm.CalculateHashNext(tests[2].input, &outputHash[1])
vm.CalculateHashLast(&outputHash[2])
for i, test := range tests {
outputHex := hex.EncodeToString(outputHash[i][:])
if outputHex != test.expected {
t.Errorf("key=%v, input=%v", test.key, test.input)
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
t.FailNow()
}
}
})
}
}
func Benchmark_RandomX(b *testing.B) {
func Test_RandomXFull(t *testing.T) {
if testing.Short() {
t.Skip("Skipping full mode with -short")
}
if os.Getenv("CI") != "" {
t.Skip("Skipping full mode in CI environment")
}
for _, n := range []string{"interpreter", "compiler", "softaes", "hardaes", "largepages"} {
t.Run(n, func(t *testing.T) {
tFlags, skip := testFlags(t.Name(), RANDOMX_FLAG_FULL_MEM)
if skip {
t.Skip("not supported on this platform")
}
c, err := NewCache(tFlags)
if tFlags.Has(RANDOMX_FLAG_LARGE_PAGES) && errors.Is(err, memory.PageNoMemoryErr) {
t.Skip("cannot allocate memory")
}
if err != nil {
t.Fatal(err)
}
defer func() {
err := c.Close()
if err != nil {
t.Error(err)
}
}()
dataset, err := NewDataset(tFlags)
if err != nil {
t.Fatal(err)
}
defer func() {
err := dataset.Close()
if err != nil {
t.Error(err)
}
}()
for _, test := range Tests {
t.Run(test.name, func(t *testing.T) {
c.Init(test.key)
dataset.InitDatasetParallel(c, runtime.NumCPU())
vm, err := NewVM(tFlags, nil, dataset)
if err != nil {
t.Fatal(err)
}
defer func() {
err := vm.Close()
if err != nil {
t.Error(err)
}
}()
var outputHash [RANDOMX_HASH_SIZE]byte
vm.CalculateHash(test.input, &outputHash)
outputHex := hex.EncodeToString(outputHash[:])
if outputHex != test.expected {
t.Errorf("key=%v, input=%v", test.key, test.input)
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
t.FailNow()
}
})
// cleanup between runs
runtime.GC()
}
})
// cleanup 2 GiB between runs
runtime.GC()
}
}
var BenchmarkTest = Tests[0]
var BenchmarkCache *Cache
var BenchmarkDataset *Dataset
var BenchmarkFlags = GetFlags()
func TestMain(m *testing.M) {
if slices.Contains(os.Args, "-test.bench") {
flags := GetFlags()
flags |= RANDOMX_FLAG_FULL_MEM
var err error
//init light and full dataset
BenchmarkCache, err = NewCache(flags | RANDOMX_FLAG_LARGE_PAGES)
if err != nil {
BenchmarkCache, err = NewCache(flags)
if err != nil {
panic(err)
}
}
defer BenchmarkCache.Close()
BenchmarkCache.Init(BenchmarkTest.key)
BenchmarkDataset, err = NewDataset(flags | RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES)
if err != nil {
BenchmarkDataset, err = NewDataset(flags | RANDOMX_FLAG_FULL_MEM)
if err != nil {
panic(err)
}
}
defer BenchmarkDataset.Close()
BenchmarkDataset.InitDatasetParallel(BenchmarkCache, runtime.NumCPU())
}
os.Exit(m.Run())
}
func Benchmark_RandomXLight(b *testing.B) {
b.ReportAllocs()
tt := Tests[0]
vm, err := NewVM(BenchmarkFlags, BenchmarkCache, nil)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
c := Randomx_alloc_cache(0)
c.Init(tt.key)
defer func() {
err := c.Close()
if err != nil {
b.Error(err)
}
}()
vm := c.VM_Initialize()
b.ResetTimer()
for i := 0; i < b.N; i++ {
var output_hash [32]byte
vm.CalculateHash(tt.input, &output_hash)
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
}
func Benchmark_RandomXParallel(b *testing.B) {
func Benchmark_RandomXFull(b *testing.B) {
b.ReportAllocs()
tt := Tests[0]
c := Randomx_alloc_cache(0)
c.Init(tt.key)
defer func() {
err := c.Close()
if err != nil {
b.Error(err)
}
}()
vm, err := NewVM(BenchmarkFlags|RANDOMX_FLAG_FULL_MEM, nil, BenchmarkDataset)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
b.ResetTimer()
for i := 0; i < b.N; i++ {
var output_hash [32]byte
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
}
func Benchmark_RandomXLight_Parallel(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
var output_hash [32]byte
vm := c.VM_Initialize()
vm, err := NewVM(BenchmarkFlags, BenchmarkCache, nil)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
for pb.Next() {
vm.CalculateHash(tt.input, &output_hash)
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
})
}
func Benchmark_RandomXFull_Parallel(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
var output_hash [32]byte
vm, err := NewVM(BenchmarkFlags|RANDOMX_FLAG_FULL_MEM, nil, BenchmarkDataset)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
for pb.Next() {
vm.CalculateHash(BenchmarkTest.input, &output_hash)
runtime.KeepAlive(output_hash)
}
})

View file

@ -1,3 +1,29 @@
package randomx
type RegisterLine [REGISTERSCOUNT]uint64
import "unsafe"
const RegistersCount = 8
const RegistersCountFloat = 4
const LOW = 0
const HIGH = 1
type RegisterLine [RegistersCount]uint64
type RegisterFile struct {
R RegisterLine
F [RegistersCountFloat][2]float64
E [RegistersCountFloat][2]float64
A [RegistersCountFloat][2]float64
FPRC uint8
}
const RegisterFileSize = RegistersCount*8 + RegistersCountFloat*2*8*3
func (rf *RegisterFile) Memory() *[RegisterFileSize]byte {
return (*[RegisterFileSize]byte)(unsafe.Pointer(rf))
}
func (rf *RegisterFile) Clear() {
clear(rf.Memory()[:])
}

View file

@ -29,7 +29,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import "math/bits"
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
"math/bits"
)
type ExecutionPort byte
@ -201,7 +204,7 @@ var buffer3 = []int{4, 9, 3}
var buffer4 = []int{4, 4, 4, 4}
var buffer5 = []int{3, 3, 10}
var Decoder_To_Instruction_Length = [][]int{
var decoderToInstructionSize = [][]int{
buffer0,
buffer1,
buffer2,
@ -258,7 +261,7 @@ func (d DecoderType) String() string {
}
}
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Generator) DecoderType {
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *blake2.Generator) DecoderType {
if ins.Opcode == S_IMULH_R || ins.Opcode == S_ISMULH_R {
return Decoder3310
@ -295,172 +298,20 @@ func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Gene
return Decoder484
}
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
var slot10 = []*Instruction{&IMUL_RCP}
// SuperScalarInstruction superscalar program is built with superscalar instructions
type SuperScalarInstruction struct {
Opcode byte
Dst_Reg int
Src_Reg int
Mod byte
Imm32 uint32
Type int
OpGroup int
OpGroupPar int
GroupParIsSource int
ins *Instruction
CanReuse bool
}
func (sins *SuperScalarInstruction) FixSrcReg() {
if sins.Src_Reg >= 0 {
// do nothing
} else {
sins.Src_Reg = sins.Dst_Reg
}
}
func (sins *SuperScalarInstruction) Reset() {
sins.Opcode = 99
sins.Src_Reg = -1
sins.Dst_Reg = -1
sins.CanReuse = false
sins.GroupParIsSource = 0
}
func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator) {
sins.Reset()
sins.ins = ins
sins.OpGroupPar = -1
sins.Opcode = ins.Opcode
switch ins.Opcode {
case S_ISUB_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IXOR_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IXOR_R
sins.GroupParIsSource = 1
case S_IADD_RS:
sins.Mod = gen.GetByte()
// set modshift on Imm32
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
//sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IMUL_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMUL_R
sins.GroupParIsSource = 1
case S_IROR_C:
sins.Mod = 0
for sins.Imm32 = 0; sins.Imm32 == 0; {
sins.Imm32 = uint32(gen.GetByte() & 63)
}
sins.OpGroup = S_IROR_C
sins.OpGroupPar = -1
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IADD_C7
sins.OpGroupPar = -1
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IXOR_C7
sins.OpGroupPar = -1
case S_IMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_ISMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_ISMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_IMUL_RCP:
sins.Mod = 0
for {
sins.Imm32 = gen.GetUint32()
if (sins.Imm32&sins.Imm32 - 1) != 0 {
break
}
}
sins.OpGroup = S_IMUL_RCP
default:
panic("should not occur")
}
}
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Generator, instruction_len int, decoder_type int, islast, isfirst bool) {
switch instruction_len {
case 3:
if islast {
create(sins, slot3L[gen.GetByte()&3], gen)
} else {
create(sins, slot3[gen.GetByte()&1], gen)
}
case 4:
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
if decoder_type == int(Decoder4444) && !islast {
create(sins, &IMUL_R, gen)
} else {
create(sins, slot4[gen.GetByte()&1], gen)
}
case 7:
create(sins, slot7[gen.GetByte()&1], gen)
case 8:
create(sins, slot8[gen.GetByte()&1], gen)
case 9:
create(sins, slot9[gen.GetByte()&1], gen)
case 10:
create(sins, slot10[0], gen)
default:
panic("should not be possible")
}
}
type SuperScalarProgram []SuperScalarInstruction
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
p[0].Dst_Reg = addressRegister
func (p SuperScalarProgram) setAddressRegister(addressRegister uint8) {
p[0].Dst = addressRegister
}
func (p SuperScalarProgram) AddressRegister() int {
return p[0].Dst_Reg
func (p SuperScalarProgram) AddressRegister() uint8 {
return p[0].Dst
}
func (p SuperScalarProgram) Program() []SuperScalarInstruction {
return p[1:]
}
func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
func BuildSuperScalarProgram(gen *blake2.Generator) SuperScalarProgram {
cycle := 0
depcycle := 0
//retire_cycle := 0
@ -474,12 +325,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
code_size := 0
program := make(SuperScalarProgram, 1, 512)
preAllocatedRegisters := gen.allocRegIndex[:]
registers := gen.allocRegisters[:]
for i := range registers {
registers[i] = Register{}
}
var registers [8]Register
sins := &SuperScalarInstruction{}
sins.ins = &Instruction{Opcode: S_NOP}
@ -508,7 +354,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if ports_saturated || program_size >= SuperscalarMaxSize {
break
}
CreateSuperScalarInstruction(sins, gen, Decoder_To_Instruction_Length[int(decoder)][buffer_index], int(decoder), len(Decoder_To_Instruction_Length[decoder]) == (buffer_index+1), buffer_index == 0)
CreateSuperScalarInstruction(sins, gen, decoderToInstructionSize[decoder][buffer_index], decoder, len(decoderToInstructionSize[decoder]) == (buffer_index+1), buffer_index == 0)
macro_op_index = 0
}
@ -529,7 +375,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if macro_op_index == sins.ins.SrcOP { // FIXME
forward := 0
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(preAllocatedRegisters, scheduleCycle, registers, gen); forward++ {
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(scheduleCycle, &registers, gen); forward++ {
scheduleCycle++
cycle++
}
@ -547,7 +393,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if macro_op_index == sins.ins.DstOP { // FIXME
forward := 0
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(preAllocatedRegisters, scheduleCycle, throwAwayCount > 0, registers, gen); forward++ {
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(scheduleCycle, throwAwayCount > 0, &registers, gen); forward++ {
scheduleCycle++
cycle++
}
@ -569,9 +415,9 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
if macro_op_index == sins.ins.ResultOP { // fix me
registers[sins.Dst_Reg].Latency = depcycle
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar
registers[sins.Dst].Latency = depcycle
registers[sins.Dst].LastOpGroup = sins.OpGroup
registers[sins.Dst].LastOpPar = sins.OpGroupPar
}
@ -609,12 +455,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if i == 0 {
continue
}
lastdst := asic_latencies[program[i].Dst_Reg] + 1
lastdst := asic_latencies[program[i].Dst] + 1
lastsrc := 0
if program[i].Dst_Reg != program[i].Src_Reg {
lastsrc = asic_latencies[program[i].Src_Reg] + 1
if program[i].Dst != program[i].Src {
lastsrc = asic_latencies[program[i].Src] + 1
}
asic_latencies[program[i].Dst_Reg] = max(lastdst, lastsrc)
asic_latencies[program[i].Dst] = max(lastdst, lastsrc)
}
asic_latency_max := 0
@ -628,7 +474,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
}
// Set AddressRegister hack
program.setAddressRegister(address_reg)
program.setAddressRegister(uint8(address_reg))
return program
}
@ -702,122 +548,101 @@ type Register struct {
//RegisterNeedsSib = 4; //x86 r12 register
}
// RegisterNeedsDisplacement x86 r13 register
const RegisterNeedsDisplacement = 5
// RegisterNeedsSib x86 r12 register
const RegisterNeedsSib = 4
func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters []int, cycle int, Registers []Register, gen *Blake2Generator) bool {
available_registers := preAllocatedAvailableRegisters[:0]
func (sins *SuperScalarInstruction) SelectSource(cycle int, registers *[8]Register, gen *blake2.Generator) bool {
availableRegisters := make([]uint8, 0, 8)
for i := range Registers {
if Registers[i].Latency <= cycle {
available_registers = append(available_registers, i)
for i := range registers {
if registers[i].Latency <= cycle {
availableRegisters = append(availableRegisters, uint8(i))
}
}
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
sins.Src_Reg = RegisterNeedsDisplacement
sins.OpGroupPar = sins.Src_Reg
if len(availableRegisters) == 2 && sins.Opcode == S_IADD_RS {
if availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement {
sins.Src = RegisterNeedsDisplacement
sins.OpGroupPar = int(sins.Src)
return true
}
}
if selectRegister(available_registers, gen, &sins.Src_Reg) {
if selectRegister(availableRegisters, gen, &sins.Src) {
if sins.GroupParIsSource == 0 {
} else {
sins.OpGroupPar = sins.Src_Reg
sins.OpGroupPar = int(sins.Src)
}
return true
}
return false
}
func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegisters []int, cycle int, allowChainedMul bool, Registers []Register, gen *Blake2Generator) bool {
preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0]
func (sins *SuperScalarInstruction) SelectDestination(cycle int, allowChainedMul bool, Registers *[8]Register, gen *blake2.Generator) bool {
var availableRegisters = make([]uint8, 0, 8)
for i := range Registers {
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src_Reg) &&
if Registers[i].Latency <= cycle && (sins.CanReuse || uint8(i) != sins.Src) &&
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
preAllocatedAvailableRegisters = append(preAllocatedAvailableRegisters, i)
availableRegisters = append(availableRegisters, uint8(i))
}
}
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst_Reg)
return selectRegister(availableRegisters, gen, &sins.Dst)
}
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
func selectRegister(availableRegisters []uint8, gen *blake2.Generator, reg *uint8) bool {
index := 0
if len(available_registers) == 0 {
if len(availableRegisters) == 0 {
return false
}
if len(available_registers) > 1 {
if len(availableRegisters) > 1 {
tmp := gen.GetUint32()
index = int(tmp % uint32(len(available_registers)))
index = int(tmp % uint32(len(availableRegisters)))
} else {
index = 0
}
*reg = available_registers[index]
*reg = availableRegisters[index]
return true
}
const Mask = CacheSize/CacheLineSize - 1
// executeSuperscalar execute the superscalar program
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
//TODO: produce around (14 * 8 * 8) = 896 different opcodes with hardcoded registers
for i := range p {
ins := &p[i]
switch ins.Opcode {
case S_ISUB_R:
r[ins.Dst_Reg] -= r[ins.Src_Reg]
r[ins.Dst] -= r[ins.Src]
case S_IXOR_R:
r[ins.Dst_Reg] ^= r[ins.Src_Reg]
r[ins.Dst] ^= r[ins.Src]
case S_IADD_RS:
r[ins.Dst_Reg] += r[ins.Src_Reg] << ins.Imm32
r[ins.Dst] += r[ins.Src] << ins.Imm32
case S_IMUL_R:
r[ins.Dst_Reg] *= r[ins.Src_Reg]
r[ins.Dst] *= r[ins.Src]
case S_IROR_C:
r[ins.Dst_Reg] = bits.RotateLeft64(r[ins.Dst_Reg], 0-int(ins.Imm32))
r[ins.Dst] = bits.RotateLeft64(r[ins.Dst], 0-int(ins.Imm32))
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
r[ins.Dst_Reg] += signExtend2sCompl(ins.Imm32)
r[ins.Dst] += signExtend2sCompl(ins.Imm32)
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
r[ins.Dst_Reg] ^= signExtend2sCompl(ins.Imm32)
r[ins.Dst] ^= signExtend2sCompl(ins.Imm32)
case S_IMULH_R:
r[ins.Dst_Reg], _ = bits.Mul64(r[ins.Dst_Reg], r[ins.Src_Reg])
r[ins.Dst], _ = bits.Mul64(r[ins.Dst], r[ins.Src])
case S_ISMULH_R:
r[ins.Dst_Reg] = smulh(int64(r[ins.Dst_Reg]), int64(r[ins.Src_Reg]))
r[ins.Dst] = smulh(int64(r[ins.Dst]), int64(r[ins.Src]))
case S_IMUL_RCP:
r[ins.Dst_Reg] *= randomx_reciprocal(ins.Imm32)
r[ins.Dst] *= ins.Imm64
}
}
}
func smulh(a, b int64) uint64 {
hi_, _ := bits.Mul64(uint64(a), uint64(b))
t1 := (a >> 63) & b
t2 := (b >> 63) & a
return uint64(int64(hi_) - t1 - t2)
}
func randomx_reciprocal(divisor uint32) uint64 {
const p2exp63 = uint64(1) << 63
quotient := p2exp63 / uint64(divisor)
remainder := p2exp63 % uint64(divisor)
shift := uint32(bits.Len32(divisor))
return (quotient << shift) + ((remainder << shift) / uint64(divisor))
}
func signExtend2sCompl(x uint32) uint64 {
return uint64(int64(int32(x)))
}

View file

@ -1,152 +0,0 @@
//go:build unix && amd64 && !disable_jit
package randomx
import (
"encoding/binary"
)
var REX_SUB_RR = []byte{0x4d, 0x2b}
var REX_MOV_RR64 = []byte{0x49, 0x8b}
var REX_MOV_R64R = []byte{0x4c, 0x8b}
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
var REX_MUL_R = []byte{0x49, 0xf7}
var REX_81 = []byte{0x49, 0x81}
var MOV_RAX_I = []byte{0x48, 0xb8}
var REX_LEA = []byte{0x4f, 0x8d}
var REX_XOR_RR = []byte{0x4D, 0x33}
var REX_XOR_RI = []byte{0x49, 0x81}
var REX_ROT_I8 = []byte{0x49, 0xc1}
func genSIB(scale, index, base int) byte {
return byte((scale << 6) | (index << 3) | base)
}
/*
push rbp
push rbx
push rsi
push r12
push r13
push r14
push r15
mov rbp,rsp
sub rsp,(0x8*7)
mov rsi, rax; # register dataset
prefetchnta byte ptr [rsi]
mov r8, qword ptr [rsi+0]
mov r9, qword ptr [rsi+8]
mov r10, qword ptr [rsi+16]
mov r11, qword ptr [rsi+24]
mov r12, qword ptr [rsi+32]
mov r13, qword ptr [rsi+40]
mov r14, qword ptr [rsi+48]
mov r15, qword ptr [rsi+56]
*/
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
/*
prefetchw byte ptr [rsi]
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9
mov qword ptr [rsi+16], r10
mov qword ptr [rsi+24], r11
mov qword ptr [rsi+32], r12
mov qword ptr [rsi+40], r13
mov qword ptr [rsi+48], r14
mov qword ptr [rsi+56], r15
add rsp,(0x8*7)
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rbx
pop rbp
ret
*/
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
// generateSuperscalarCode
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
var program []byte
program = append(program, codeInitBlock...)
p := scalarProgram.Program()
for i := range p {
instr := &p[i]
dst := instr.Dst_Reg % REGISTERSCOUNT
src := instr.Src_Reg % REGISTERSCOUNT
switch instr.Opcode {
case S_ISUB_R:
program = append(program, REX_SUB_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IXOR_R:
program = append(program, REX_XOR_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IADD_RS:
program = append(program, REX_LEA...)
program = append(program,
byte(0x04+8*dst),
genSIB(int(instr.Imm32), src, dst),
)
case S_IMUL_R:
program = append(program, REX_IMUL_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IROR_C:
program = append(program, REX_ROT_I8...)
program = append(program,
byte(0xc8+dst),
byte(instr.Imm32&63),
)
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
program = append(program, REX_81...)
program = append(program, byte(0xc0+dst))
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
//TODO: align NOP on C8/C9
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
program = append(program, REX_XOR_RI...)
program = append(program, byte(0xf0+dst))
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
//TODO: align NOP on C8/C9
case S_IMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, byte(0xc0+dst))
program = append(program, REX_MUL_R...)
program = append(program, byte(0xe0+src))
program = append(program, REX_MOV_R64R...)
program = append(program, byte(0xc2+8*dst))
case S_ISMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, byte(0xc0+dst))
program = append(program, REX_MUL_R...)
program = append(program, byte(0xe8+src))
program = append(program, REX_MOV_R64R...)
program = append(program, byte(0xc2+8*dst))
case S_IMUL_RCP:
program = append(program, MOV_RAX_I...)
program = binary.LittleEndian.AppendUint64(program, randomx_reciprocal(instr.Imm32))
program = append(program, REX_IMUL_RM...)
program = append(program, byte(0xc0+8*instr.Dst_Reg))
default:
panic("unreachable")
}
}
program = append(program, codeRetBlock...)
return mapProgram(program)
}

157
superscalar_instruction.go Normal file
View file

@ -0,0 +1,157 @@
package randomx
import "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
// SuperScalarInstruction superscalar program is built with superscalar instructions
type SuperScalarInstruction struct {
Opcode byte
Dst uint8
Src uint8
Mod byte
Imm32 uint32
Imm64 uint64
OpGroup int
OpGroupPar int
GroupParIsSource int
ins *Instruction
CanReuse bool
}
func (sins *SuperScalarInstruction) FixSrcReg() {
if sins.Src == 0xff {
sins.Src = sins.Dst
}
}
func (sins *SuperScalarInstruction) Reset() {
sins.Opcode = 99
sins.Src = 0xff
sins.Dst = 0xff
sins.CanReuse = false
sins.GroupParIsSource = 0
}
func createSuperScalarInstruction(sins *SuperScalarInstruction, ins *Instruction, gen *blake2.Generator) {
sins.Reset()
sins.ins = ins
sins.OpGroupPar = -1
sins.Opcode = ins.Opcode
switch ins.Opcode {
case S_ISUB_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IXOR_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IXOR_R
sins.GroupParIsSource = 1
case S_IADD_RS:
sins.Mod = gen.GetByte()
// set modshift on Imm32
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
//sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case S_IMUL_R:
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMUL_R
sins.GroupParIsSource = 1
case S_IROR_C:
sins.Mod = 0
for sins.Imm32 = 0; sins.Imm32 == 0; {
sins.Imm32 = uint32(gen.GetByte() & 63)
}
sins.OpGroup = S_IROR_C
sins.OpGroupPar = -1
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IADD_C7
sins.OpGroupPar = -1
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IXOR_C7
sins.OpGroupPar = -1
case S_IMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_ISMULH_R:
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_ISMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case S_IMUL_RCP:
sins.Mod = 0
for {
sins.Imm32 = gen.GetUint32()
if (sins.Imm32&sins.Imm32 - 1) != 0 {
break
}
}
sins.Imm64 = reciprocal(sins.Imm32)
sins.OpGroup = S_IMUL_RCP
default:
panic("should not occur")
}
}
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
var slot10 = []*Instruction{&IMUL_RCP}
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *blake2.Generator, instructionLen int, decoderType DecoderType, last, first bool) {
switch instructionLen {
case 3:
if last {
createSuperScalarInstruction(sins, slot3L[gen.GetByte()&3], gen)
} else {
createSuperScalarInstruction(sins, slot3[gen.GetByte()&1], gen)
}
case 4:
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
if decoderType == Decoder4444 && !last {
createSuperScalarInstruction(sins, &IMUL_R, gen)
} else {
createSuperScalarInstruction(sins, slot4[gen.GetByte()&1], gen)
}
case 7:
createSuperScalarInstruction(sins, slot7[gen.GetByte()&1], gen)
case 8:
createSuperScalarInstruction(sins, slot8[gen.GetByte()&1], gen)
case 9:
createSuperScalarInstruction(sins, slot9[gen.GetByte()&1], gen)
case 10:
createSuperScalarInstruction(sins, slot10[0], gen)
default:
panic("should not be possible")
}
}

101
superscalar_jit_amd64.go Normal file
View file

@ -0,0 +1,101 @@
//go:build unix && amd64 && !disable_jit && !purego
package randomx
import (
"encoding/binary"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
"unsafe"
)
//go:noescape
func superscalar_run(rf, jmp uintptr)
func (f SuperScalarProgramFunc) Execute(rf uintptr) {
if f == nil {
panic("program is nil")
}
superscalar_run(rf, uintptr(unsafe.Pointer(unsafe.SliceData(f))))
}
// generateSuperscalarCode
func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgramFunc {
var program []byte
p := scalarProgram.Program()
for i := range p {
instr := &p[i]
dst := instr.Dst % RegistersCount
src := instr.Src % RegistersCount
switch instr.Opcode {
case S_ISUB_R:
program = append(program, REX_SUB_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IXOR_R:
program = append(program, REX_XOR_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IADD_RS:
program = append(program, REX_LEA...)
program = append(program,
byte(0x04+8*dst),
genSIB(int(instr.Imm32), int(src), int(dst)),
)
case S_IMUL_R:
program = append(program, REX_IMUL_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IROR_C:
program = append(program, REX_ROT_I8...)
program = append(program,
byte(0xc8+dst),
byte(instr.Imm32&63),
)
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
program = append(program, REX_81...)
program = append(program, byte(0xc0+dst))
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
//TODO: align NOP on C8/C9
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
program = append(program, REX_XOR_RI...)
program = append(program, byte(0xf0+dst))
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
//TODO: align NOP on C8/C9
case S_IMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, byte(0xc0+dst))
program = append(program, REX_MUL_R...)
program = append(program, byte(0xe0+src))
program = append(program, REX_MOV_R64R...)
program = append(program, byte(0xc2+8*dst))
case S_ISMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, byte(0xc0+dst))
program = append(program, REX_MUL_R...)
program = append(program, byte(0xe8+src))
program = append(program, REX_MOV_R64R...)
program = append(program, byte(0xc2+8*dst))
case S_IMUL_RCP:
program = append(program, MOV_RAX_I...)
program = binary.LittleEndian.AppendUint64(program, instr.Imm64)
program = append(program, REX_IMUL_RM...)
program = append(program, byte(0xc0+8*instr.Dst))
default:
panic("unreachable")
}
}
program = append(program, RET)
pagedMemory, err := memory.AllocateSlice[byte](pageAllocator, len(program))
if err != nil {
return nil
}
copy(pagedMemory, program)
return pagedMemory
}

42
superscalar_jit_amd64.s Normal file
View file

@ -0,0 +1,42 @@
//go:build unix && amd64 && !disable_jit && !purego
#include "textflag.h"
TEXT ·superscalar_run(SB),$0-16
MOVQ rf+0(FP), SI
PREFETCHNTA 0(SI)
// move register line to registers
MOVQ 0(SI), R8
MOVQ 8(SI), R9
MOVQ 16(SI), R10
MOVQ 24(SI), R11
MOVQ 32(SI), R12
MOVQ 40(SI), R13
MOVQ 48(SI), R14
MOVQ 56(SI), R15
MOVQ jmp+8(FP), AX
// jump to JIT code
CALL AX
// prefetchw BYTE PTR [rsi]
// PREFETCHW 0(SI)
BYTE $0x0F
BYTE $0x0D
BYTE $0x0E
// move registers back to register line
MOVQ R8, 0(SI)
MOVQ R9, 8(SI)
MOVQ R10, 16(SI)
MOVQ R11, 24(SI)
MOVQ R12, 32(SI)
MOVQ R13, 40(SI)
MOVQ R14, 48(SI)
MOVQ R15, 56(SI)
RET

View file

@ -1,8 +1,12 @@
//go:build !unix || !amd64 || disable_jit
//go:build !unix || !amd64 || purego || disable_jit
package randomx
func (f SuperScalarProgramFunc) Execute(rf uintptr) {
}
// generateSuperscalarCode
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgramFunc {
return nil
}

487
vm.go
View file

@ -30,267 +30,384 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/aes"
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"errors"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory"
"math"
"runtime"
"unsafe"
)
import "encoding/binary"
import "golang.org/x/crypto/blake2b"
type REG struct {
Hi uint64
Lo uint64
}
type VM struct {
StateStart [64]byte
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
Prog []byte
ScratchPad [ScratchpadSize]byte
pad *ScratchPad
ByteCode [RANDOMX_PROGRAM_SIZE]InstructionByteCode
flags Flags
// program configuration see program.hpp
// buffer first 128 bytes are entropy below rest are program bytes
buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte
entropy [16]uint64
hashState [blake2b.Size]byte
reg REGISTER_FILE // the register file
mem MemoryRegisters
config Config // configuration
datasetOffset uint64
registerFile *RegisterFile
Dataset Randomx_Dataset
AES aes.AES
Cache *Randomx_Cache // randomx cache
Cache *Cache
Dataset *Dataset
program ByteCode
jitProgram VMProgramFunc
}
func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode)
}
type Config struct {
eMask [2]uint64
readReg [4]uint64
}
type REGISTER_FILE struct {
r RegisterLine
f [4][2]float64
e [4][2]float64
a [4][2]float64
}
type MemoryRegisters struct {
mx, ma uint64
}
const LOW = 0
const HIGH = 1
// calculate hash based on input
func (vm *VM) Run(input_hash [64]byte) {
aes.FillAes4Rx4(input_hash, vm.buffer[:])
for i := range vm.entropy {
vm.entropy[i] = binary.LittleEndian.Uint64(vm.buffer[i*8:])
// NewVM Creates and initializes a RandomX virtual machine.
// *
// * @param flags is any combination of these 5 flags (each flag can be set or not set):
// * RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages
// * RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES
// * RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset
// * RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler
// * RANDOMX_FLAG_SECURE - when combined with RANDOMX_FLAG_JIT, the JIT pages are never
// * writable and executable at the same time (W^X policy)
// * The numeric values of the first 4 flags are ordered so that a higher value will provide
// * faster hash calculation and a lower numeric value will provide higher portability.
// * Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest.
// * @param cache is a pointer to an initialized randomx_cache structure. Can be
// * NULL if RANDOMX_FLAG_FULL_MEM is set.
// * @param dataset is a pointer to a randomx_dataset structure. Can be NULL
// * if RANDOMX_FLAG_FULL_MEM is not set.
// *
// * @return Pointer to an initialized randomx_vm structure.
// * Returns NULL if:
// * (1) Scratchpad memory allocation fails.
// * (2) The requested initialization flags are not supported on the current platform.
// * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set
// * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set
// */
func NewVM(flags Flags, cache *Cache, dataset *Dataset) (*VM, error) {
if cache == nil && !flags.Has(RANDOMX_FLAG_FULL_MEM) {
return nil, errors.New("nil cache in light mode")
}
if dataset == nil && flags.Has(RANDOMX_FLAG_FULL_MEM) {
return nil, errors.New("nil dataset in full mode")
}
vm.Prog = vm.buffer[len(vm.entropy)*8:]
pad, err := memory.Allocate[ScratchPad](cacheLineAlignedAllocator)
if err != nil {
return nil, err
}
registerFile, err := memory.Allocate[RegisterFile](cacheLineAlignedAllocator)
if err != nil {
return nil, err
}
_ = pad
_ = registerFile
clear(vm.reg.r[:])
vm := &VM{
Cache: cache,
Dataset: dataset,
flags: flags,
pad: new(ScratchPad),
registerFile: new(RegisterFile),
}
if flags.Has(RANDOMX_FLAG_HARD_AES) {
vm.AES = aes.NewHardAES()
}
// fallback
if vm.AES == nil {
vm.AES = aes.NewSoftAES()
}
if flags.HasJIT() {
vm.jitProgram, err = memory.AllocateSlice[byte](pageAllocator, int(RandomXCodeSize))
if err != nil {
return nil, err
}
if !flags.Has(RANDOMX_FLAG_SECURE) {
err = memory.PageReadWriteExecute(vm.jitProgram)
if err != nil {
vm.jitProgram.Close()
return nil, err
}
}
}
return vm, nil
}
// run calculate hash based on input. Not thread safe.
// Warning: Underlying callers will run float64 SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (vm *VM) run() {
// buffer first 128 bytes are entropy below rest are program bytes
vm.AES.FillAes4Rx4(vm.hashState, vm.buffer[:])
entropy := (*[16]uint64)(unsafe.Pointer(&vm.buffer))
// do more initialization before we run
for i := range vm.entropy[:8] {
vm.reg.a[i/2][i%2] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[i]))
reg := vm.registerFile
reg.Clear()
// initialize constant registers
for i := range entropy[:8] {
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
}
vm.mem.ma = vm.entropy[8] & CacheLineAlignMask
vm.mem.mx = vm.entropy[10]
// memory registers
var ma, mx uint32
addressRegisters := vm.entropy[12]
for i := range vm.config.readReg {
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
ma = uint32(entropy[8] & CacheLineAlignMask)
mx = uint32(entropy[10])
addressRegisters := entropy[12]
var readReg [4]uint64
for i := range readReg {
readReg[i] = uint64(i*2) + (addressRegisters & 1)
addressRegisters >>= 1
}
vm.datasetOffset = (vm.entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.config.eMask[LOW] = getFloatMask(vm.entropy[14])
vm.config.eMask[HIGH] = getFloatMask(vm.entropy[15])
datasetOffset := (entropy[13] % (DatasetExtraItems + 1)) * CacheLineSize
vm.Compile_TO_Bytecode()
eMask := [2]uint64{ExponentMask(entropy[14]), ExponentMask(entropy[15])}
spAddr0 := vm.mem.mx
spAddr1 := vm.mem.ma
prog := vm.buffer[len(entropy)*8:]
CompileProgramToByteCode(prog, &vm.program)
var jitProgram VMProgramFunc
if vm.jitProgram != nil {
if vm.Dataset == nil { //light mode
if vm.flags.Has(RANDOMX_FLAG_SECURE) {
err := memory.PageReadWrite(vm.jitProgram)
if err != nil {
panic(err)
}
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
err = memory.PageReadExecute(vm.jitProgram)
if err != nil {
panic(err)
}
} else {
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
}
} else {
// full mode and we have JIT
if vm.flags.Has(RANDOMX_FLAG_SECURE) {
err := memory.PageReadWrite(vm.jitProgram)
if err != nil {
panic(err)
}
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
err = memory.PageReadExecute(vm.jitProgram)
if err != nil {
panic(err)
}
} else {
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
}
vm.jitProgram.ExecuteFull(reg, vm.pad, &vm.Dataset.Memory()[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
return
}
}
spAddr0 := uint64(mx)
spAddr1 := uint64(ma)
var rlCache RegisterLine
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := vm.reg.r[vm.config.readReg[0]] ^ vm.reg.r[vm.config.readReg[1]]
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
spAddr0 ^= spMix
spAddr0 &= ScratchpadL3Mask64
spAddr1 ^= spMix >> 32
spAddr1 &= ScratchpadL3Mask64
for i := uint64(0); i < REGISTERSCOUNT; i++ {
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
//TODO: optimize these loads!
for i := uint64(0); i < RegistersCount; i++ {
reg.R[i] ^= vm.pad.Load64(uint32(spAddr0 + 8*i))
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.f[i] = vm.Load32FA(spAddr1 + 8*i)
for i := uint64(0); i < RegistersCountFloat; i++ {
reg.F[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*i))
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.e[i] = vm.Load32FA(spAddr1 + 8*(i+REGISTERCOUNTFLT))
for i := uint64(0); i < RegistersCountFloat; i++ {
reg.E[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], eMask[LOW])
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
}
// todo: pass register file directly!
vm.InterpretByteCode()
// run the actual bytecode
if jitProgram != nil {
// light mode
jitProgram.Execute(reg, vm.pad, eMask)
} else {
vm.program.Execute(reg, vm.pad, eMask)
}
vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]]
vm.mem.mx &= CacheLineAlignMask
mx ^= uint32(reg.R[readReg[2]] ^ reg.R[readReg[3]])
mx &= uint32(CacheLineAlignMask)
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
// execute diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r, &rlCache)
if vm.Dataset != nil {
// full mode
vm.Dataset.prefetchDataset(datasetOffset + uint64(mx))
// load output from superscalar program to get dataset 64 bytes
vm.Dataset.readDataset(datasetOffset+uint64(ma), &reg.R)
} else {
// light mode
// execute output from superscalar program to get dataset 64 bytes
vm.Cache.initDataset(&rlCache, (datasetOffset+uint64(ma))/CacheLineSize)
for i := range reg.R {
reg.R[i] ^= rlCache[i]
}
}
// swap the elements
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
mx, ma = ma, mx
for i := uint64(0); i < REGISTERSCOUNT; i++ {
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr1+8*i:], vm.reg.r[i])
for i := uint64(0); i < RegistersCount; i++ {
vm.pad.Store64(uint32(spAddr1+8*i), reg.R[i])
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
for i := uint64(0); i < RegistersCountFloat; i++ {
reg.F[i][LOW] = Xor(reg.F[i][LOW], reg.E[i][LOW])
reg.F[i][HIGH] = Xor(reg.F[i][HIGH], reg.E[i][HIGH])
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i:], math.Float64bits(vm.reg.f[i][LOW]))
binary.LittleEndian.PutUint64(vm.ScratchPad[spAddr0+16*i+8:], math.Float64bits(vm.reg.f[i][HIGH]))
vm.pad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW]))
vm.pad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH]))
}
spAddr0 = 0
spAddr1 = 0
}
}
func (vm *VM) InitScratchpad(seed *[64]byte) {
// calculate and fill scratchpad
clear(vm.ScratchPad[:])
aes.FillAes1Rx4(seed, vm.ScratchPad[:])
func (vm *VM) initScratchpad(seed *[64]byte) {
clear(vm.pad[:])
vm.AES.FillAes1Rx4(seed, vm.pad[:])
}
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
var buf [8]byte
func (vm *VM) runLoops() {
if lockThreadDueToRoundingMode {
// Lock thread due to rounding mode flags
runtime.LockOSThread()
defer runtime.UnlockOSThread()
}
// Lock thread due to rounding mode flags
runtime.LockOSThread()
defer runtime.UnlockOSThread()
//restore rounding mode to golang expected one
defer asm.SetRoundingMode(asm.RoundingModeToNearest)
// always force a restore before startup
ResetRoundingMode(vm.registerFile)
// reset rounding mode if new hash being calculated
asm.SetRoundingMode(asm.RoundingModeToNearest)
tempHash := blake2b.Sum512(input)
vm.InitScratchpad(&tempHash)
hash512, _ := blake2b.New512(nil)
// restore rounding mode at the end
defer ResetRoundingMode(vm.registerFile)
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
vm.Run(tempHash)
vm.run()
hash512.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
hash512.Write(buf[:])
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.a {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][HIGH]))
hash512.Write(buf[:])
}
hash512.Sum(tempHash[:0])
// write R, F, E, A registers
vm.hashState = blake2b.Sum512(vm.registerFile.Memory()[:])
}
// final loop executes here
vm.Run(tempHash)
vm.run()
}
// now hash the scratch pad and place into register a
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
hash256, _ := blake2b.New256(nil)
hash256.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
hash256.Write(buf[:])
// SetCache Reinitializes a virtual machine with a new Cache.
// This function should be called anytime the Cache is reinitialized with a new key.
// Does nothing if called with a Cache containing the same key value as already set.
// VM must be initialized without RANDOMX_FLAG_FULL_MEM.
func (vm *VM) SetCache(cache *Cache) {
if vm.flags.Has(RANDOMX_FLAG_FULL_MEM) {
panic("unsupported")
}
vm.Cache = cache
//todo
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
hash256.Write(buf[:])
// SetDataset Reinitializes a virtual machine with a new Dataset.
// VM must be initialized with RANDOMX_FLAG_FULL_MEM.
func (vm *VM) SetDataset(dataset *Dataset) {
if !vm.flags.Has(RANDOMX_FLAG_FULL_MEM) {
panic("unsupported")
}
vm.Dataset = dataset
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
hash256.Write(buf[:])
// CalculateHash Calculates a RandomX hash value.
func (vm *VM) CalculateHash(input []byte, output *[RANDOMX_HASH_SIZE]byte) {
vm.hashState = blake2b.Sum512(input)
vm.initScratchpad(&vm.hashState)
vm.runLoops()
// now hash the scratch pad as it will act as register A
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
regMem := vm.registerFile.Memory()
// write hash onto register A
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:])
// write R, F, E, A registers
*output = blake2b.Sum256(regMem[:])
}
// CalculateHashFirst will begin a hash calculation.
func (vm *VM) CalculateHashFirst(input []byte) {
vm.hashState = blake2b.Sum512(input)
vm.initScratchpad(&vm.hashState)
}
// CalculateHashNext will output the hash value of the previous input and begin the calculation of the next hash.
func (vm *VM) CalculateHashNext(nextInput []byte, output *[RANDOMX_HASH_SIZE]byte) {
vm.runLoops()
// now hash the scratch pad as it will act as register A
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
// Finish current hash and fill the scratchpad for the next hash at the same time
regMem := vm.registerFile.Memory()
vm.hashState = blake2b.Sum512(nextInput)
// write hash onto register A
vm.AES.HashAndFillAes1Rx4(vm.pad[:], (*[64]byte)(unsafe.Pointer(unsafe.SliceData(regMem[RegisterFileSize-RegistersCountFloat*2*8:]))), &vm.hashState)
runtime.KeepAlive(regMem)
// write R, F, E, A registers
*output = blake2b.Sum256(regMem[:])
}
// CalculateHashLast will output the hash value of the previous input.
func (vm *VM) CalculateHashLast(output *[RANDOMX_HASH_SIZE]byte) {
vm.runLoops()
// now hash the scratch pad as it will act as register A
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
regMem := vm.registerFile.Memory()
// write hash onto register A
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:])
// write R, F, E, A registers
*output = blake2b.Sum256(regMem[:])
}
// Close Releases all memory occupied by the structure.
func (vm *VM) Close() error {
memory.Free(cacheLineAlignedAllocator, vm.pad)
memory.Free(cacheLineAlignedAllocator, vm.registerFile)
if vm.jitProgram != nil {
return vm.jitProgram.Close()
}
// copy tempHash as it first copied to register and then hashed
hash256.Write(tempHash[:])
hash256.Sum(output[:0])
}
const mask22bit = (uint64(1) << 22) - 1
func getSmallPositiveFloatBits(entropy uint64) uint64 {
exponent := entropy >> 59 //0..31
mantissa := entropy & mantissaMask
exponent += exponentBias
exponent &= exponentMask
exponent = exponent << mantissaSize
return exponent | mantissa
}
func getStaticExponent(entropy uint64) uint64 {
exponent := constExponentBits
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
exponent <<= mantissaSize
return exponent
}
func getFloatMask(entropy uint64) uint64 {
return (entropy & mask22bit) | getStaticExponent(entropy)
return nil
}

91
vm_bytecode.go Normal file
View file

@ -0,0 +1,91 @@
package randomx
type ByteCodeInstruction struct {
Dst, Src byte
ImmB uint8
Opcode ByteCodeInstructionOp
MemMask uint32
Imm uint64
EMask uint64
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
}
func (i ByteCodeInstruction) jumpTarget() int {
return int(int16((uint16(i.ImmB) << 8) | uint16(i.Src)))
}
func (i ByteCodeInstruction) getScratchpadAddress(ptr uint64) uint32 {
return uint32(ptr+i.Imm) & i.MemMask
}
func (i ByteCodeInstruction) getScratchpadZeroAddress() uint32 {
return uint32(i.Imm) & i.MemMask
}
type ByteCode [RANDOMX_PROGRAM_SIZE]ByteCodeInstruction
type ByteCodeInstructionOp int
const (
VM_NOP = ByteCodeInstructionOp(iota)
VM_IADD_RS
VM_IADD_M
VM_IADD_MZ
VM_ISUB_R
VM_ISUB_I
VM_ISUB_M
VM_ISUB_MZ
VM_IMUL_R
VM_IMUL_I
VM_IMUL_M
VM_IMUL_MZ
VM_IMULH_R
VM_IMULH_M
VM_IMULH_MZ
VM_ISMULH_R
VM_ISMULH_M
VM_ISMULH_MZ
VM_INEG_R
VM_IXOR_R
VM_IXOR_I
VM_IXOR_M
VM_IXOR_MZ
VM_IROR_R
VM_IROR_I
VM_IROL_R
VM_IROL_I
VM_ISWAP_R
VM_FSWAP_RF
VM_FSWAP_RE
VM_FADD_R
VM_FADD_M
VM_FSUB_R
VM_FSUB_M
VM_FSCAL_R
VM_FMUL_R
VM_FDIV_M
VM_FSQRT_R
VM_CFROUND
VM_CBRANCH
VM_ISTORE
)

466
vm_bytecode_jit_amd64.go Normal file
View file

@ -0,0 +1,466 @@
//go:build unix && amd64 && !disable_jit && !purego
package randomx
import (
"encoding/binary"
"math/bits"
"unsafe"
)
//go:noescape
func vm_run(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64, jmp uintptr)
//go:noescape
func vm_run_full(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations, memoryRegisters uint64, eMask [2]uint64, jmp uintptr)
/*
#define RANDOMX_DATASET_BASE_SIZE 2147483648
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
mov ecx, ebp ;# ecx = ma
;#and ecx, RANDOMX_DATASET_BASE_MASK
and ecx, 2147483584
xor r8, qword ptr [rdi+rcx]
ror rbp, 32 ;# swap "ma" and "mx"
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
;#and edx, RANDOMX_DATASET_BASE_MASK
and edx, 2147483584
prefetchnta byte ptr [rdi+rdx]
xor r9, qword ptr [rdi+rcx+8]
xor r10, qword ptr [rdi+rcx+16]
xor r11, qword ptr [rdi+rcx+24]
xor r12, qword ptr [rdi+rcx+32]
xor r13, qword ptr [rdi+rcx+40]
xor r14, qword ptr [rdi+rcx+48]
xor r15, qword ptr [rdi+rcx+56]
*/
var programReadDataset = []byte{0x89, 0xE9, 0x81, 0xE1, 0xC0, 0xFF, 0xFF, 0x7F, 0x4C, 0x33, 0x04, 0x0F, 0x48, 0xC1, 0xCD, 0x20, 0x48, 0x31, 0xC5, 0x89, 0xEA, 0x81, 0xE2, 0xC0, 0xFF, 0xFF, 0x7F, 0x0F, 0x18, 0x04, 0x17, 0x4C, 0x33, 0x4C, 0x0F, 0x08, 0x4C, 0x33, 0x54, 0x0F, 0x10, 0x4C, 0x33, 0x5C, 0x0F, 0x18, 0x4C, 0x33, 0x64, 0x0F, 0x20, 0x4C, 0x33, 0x6C, 0x0F, 0x28, 0x4C, 0x33, 0x74, 0x0F, 0x30, 0x4C, 0x33, 0x7C, 0x0F, 0x38}
/*
lea rcx, [rsi+rax]
push rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
push rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14
*/
var programLoopLoad = []byte{0x48, 0x8D, 0x0C, 0x06, 0x51, 0x4C, 0x33, 0x01, 0x4C, 0x33, 0x49, 0x08, 0x4C, 0x33, 0x51, 0x10, 0x4C, 0x33, 0x59, 0x18, 0x4C, 0x33, 0x61, 0x20, 0x4C, 0x33, 0x69, 0x28, 0x4C, 0x33, 0x71, 0x30, 0x4C, 0x33, 0x79, 0x38, 0x48, 0x8D, 0x0C, 0x16, 0x51, 0xF3, 0x0F, 0xE6, 0x01, 0xF3, 0x0F, 0xE6, 0x49, 0x08, 0xF3, 0x0F, 0xE6, 0x51, 0x10, 0xF3, 0x0F, 0xE6, 0x59, 0x18, 0xF3, 0x0F, 0xE6, 0x61, 0x20, 0xF3, 0x0F, 0xE6, 0x69, 0x28, 0xF3, 0x0F, 0xE6, 0x71, 0x30, 0xF3, 0x0F, 0xE6, 0x79, 0x38, 0x41, 0x0F, 0x54, 0xE5, 0x41, 0x0F, 0x54, 0xED, 0x41, 0x0F, 0x54, 0xF5, 0x41, 0x0F, 0x54, 0xFD, 0x41, 0x0F, 0x56, 0xE6, 0x41, 0x0F, 0x56, 0xEE, 0x41, 0x0F, 0x56, 0xF6, 0x41, 0x0F, 0x56, 0xFE}
/*
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11
mov qword ptr [rcx+32], r12
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
;# aligned mode
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
movapd xmmword ptr [rcx+48], xmm3
*/
var programLoopStoreAligned = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x29, 0x01, 0x66, 0x0F, 0x29, 0x49, 0x10, 0x66, 0x0F, 0x29, 0x51, 0x20, 0x66, 0x0F, 0x29, 0x59, 0x30}
/*
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
mov rdx, rax
;#and eax, RANDOMX_SCRATCHPAD_MASK
and eax, 2097088
ror rdx, 32
;#and edx, RANDOMX_SCRATCHPAD_MASK
and edx, 2097088
*/
var programCalculateSpAddrs = []byte{0x48, 0x89, 0xC2, 0x25, 0xC0, 0xFF, 0x1F, 0x00, 0x48, 0xC1, 0xCA, 0x20, 0x81, 0xE2, 0xC0, 0xFF, 0x1F, 0x00}
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations uint64, ma, mx uint32, eMask [2]uint64) {
if f == nil {
panic("program is nil")
}
jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f)))
vm_run_full(rf, pad, dataset, iterations, (uint64(ma)<<32)|uint64(mx), eMask, jmpPtr)
}
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
if f == nil {
panic("program is nil")
}
jmpPtr := uintptr(unsafe.Pointer(unsafe.SliceData(f)))
vm_run(rf, pad, eMask, jmpPtr)
}
func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
program = program[:0]
isFullMode := readReg != nil
if isFullMode {
program = append(program, programCalculateSpAddrs...)
// prologue
program = append(program, programLoopLoad...)
}
var instructionOffsets [RANDOMX_PROGRAM_SIZE]int32
for ix := range c {
instructionOffsets[ix] = int32(len(program))
instr := &c[ix]
switch instr.Opcode {
case VM_IADD_RS:
program = append(program, REX_LEA...)
if instr.Dst == RegisterNeedsDisplacement {
program = append(program, 0xac)
} else {
program = append(program, 0x04+8*instr.Dst)
}
program = append(program, genSIB(int(instr.ImmB), int(instr.Src), int(instr.Dst)))
if instr.Dst == RegisterNeedsDisplacement {
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
}
case VM_IADD_M:
program = genAddressReg(program, instr, true)
program = append(program, REX_ADD_RM...)
program = append(program, 0x04+8*instr.Dst)
program = append(program, 0x06)
case VM_IADD_MZ:
program = append(program, REX_ADD_RM...)
program = append(program, 0x86+8*instr.Dst)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
case VM_ISUB_R:
program = append(program, REX_SUB_RR...)
program = append(program, 0xc0+8*instr.Dst+instr.Src)
case VM_ISUB_I:
program = append(program, REX_81...)
program = append(program, 0xe8+instr.Dst)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
case VM_ISUB_M:
program = genAddressReg(program, instr, true)
program = append(program, REX_SUB_RM...)
program = append(program, 0x04+8*instr.Dst)
program = append(program, 0x06)
case VM_ISUB_MZ:
program = append(program, REX_SUB_RM...)
program = append(program, 0x86+8*instr.Dst)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
case VM_IMUL_R:
program = append(program, REX_IMUL_RR...)
program = append(program, 0xc0+8*instr.Dst+instr.Src)
case VM_IMUL_I:
// also handles imul_rcp, with 64-bit special
if bits.Len64(instr.Imm) > 32 {
program = append(program, MOV_RAX_I...)
program = binary.LittleEndian.AppendUint64(program, instr.Imm)
program = append(program, REX_IMUL_RM...)
program = append(program, 0xc0+8*instr.Dst)
} else {
program = append(program, REX_IMUL_RRI...)
program = append(program, 0xc0+9*instr.Dst)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
}
case VM_IMUL_M:
program = genAddressReg(program, instr, true)
program = append(program, REX_IMUL_RM...)
program = append(program, 0x04+8*instr.Dst)
program = append(program, 0x06)
case VM_IMUL_MZ:
program = append(program, REX_IMUL_RM...)
program = append(program, 0x86+8*instr.Dst)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
case VM_IMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+instr.Dst)
program = append(program, REX_MUL_R...)
program = append(program, 0xe0+instr.Src)
program = append(program, REX_MOV_R64R...)
program = append(program, 0xc2+8*instr.Dst)
case VM_IMULH_M:
program = genAddressReg(program, instr, false)
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+instr.Dst)
program = append(program, REX_MUL_MEM...)
program = append(program, REX_MOV_R64R...)
program = append(program, 0xc2+8*instr.Dst)
case VM_IMULH_MZ:
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+instr.Dst)
program = append(program, REX_MUL_M...)
program = append(program, 0xa6)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
program = append(program, REX_MOV_R64R...)
program = append(program, 0xc2+8*instr.Dst)
case VM_ISMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+instr.Dst)
program = append(program, REX_MUL_R...)
program = append(program, 0xe8+instr.Src)
program = append(program, REX_MOV_R64R...)
program = append(program, 0xc2+8*instr.Dst)
case VM_ISMULH_M:
program = genAddressReg(program, instr, false)
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+instr.Dst)
program = append(program, REX_IMUL_MEM...)
program = append(program, REX_MOV_R64R...)
program = append(program, 0xc2+8*instr.Dst)
case VM_ISMULH_MZ:
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+instr.Dst)
program = append(program, REX_MUL_M...)
program = append(program, 0xae)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
program = append(program, REX_MOV_R64R...)
program = append(program, 0xc2+8*instr.Dst)
case VM_INEG_R:
program = append(program, REX_NEG...)
program = append(program, 0xd8+instr.Dst)
case VM_IXOR_R:
program = append(program, REX_XOR_RR...)
program = append(program, 0xc0+8*instr.Dst+instr.Src)
case VM_IXOR_I:
program = append(program, REX_XOR_RI...)
program = append(program, 0xf0+instr.Dst)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
case VM_IXOR_M:
program = genAddressReg(program, instr, true)
program = append(program, REX_XOR_RM...)
program = append(program, 0x04+8*instr.Dst)
program = append(program, 0x06)
case VM_IXOR_MZ:
program = append(program, REX_XOR_RM...)
program = append(program, 0x86+8*instr.Dst)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
case VM_IROR_R:
program = append(program, REX_MOV_RR...)
program = append(program, 0xc8+instr.Src)
program = append(program, REX_ROT_CL...)
program = append(program, 0xc8+instr.Dst)
case VM_IROR_I:
program = append(program, REX_ROT_I8...)
program = append(program, 0xc8+instr.Dst)
program = append(program, byte(instr.Imm&63))
case VM_IROL_R:
program = append(program, REX_MOV_RR...)
program = append(program, 0xc8+instr.Src)
program = append(program, REX_ROT_CL...)
program = append(program, 0xc0+instr.Dst)
case VM_IROL_I:
program = append(program, REX_ROT_I8...)
program = append(program, 0xc0+instr.Dst)
program = append(program, byte(instr.Imm&63))
case VM_ISWAP_R:
program = append(program, REX_XCHG...)
program = append(program, 0xc0+instr.Src+8*instr.Dst)
case VM_FSWAP_RF:
program = append(program, SHUFPD...)
program = append(program, 0xc0+9*instr.Dst)
program = append(program, 1)
case VM_FSWAP_RE:
program = append(program, SHUFPD...)
program = append(program, 0xc0+9*(instr.Dst+RegistersCountFloat))
program = append(program, 1)
case VM_FADD_R:
program = append(program, REX_ADDPD...)
program = append(program, 0xc0+instr.Src+8*instr.Dst)
case VM_FADD_M:
program = genAddressReg(program, instr, true)
program = append(program, REX_CVTDQ2PD_XMM12...)
program = append(program, REX_ADDPD...)
program = append(program, 0xc4+8*instr.Dst)
case VM_FSUB_R:
program = append(program, REX_SUBPD...)
program = append(program, 0xc0+instr.Src+8*instr.Dst)
case VM_FSUB_M:
program = genAddressReg(program, instr, true)
program = append(program, REX_CVTDQ2PD_XMM12...)
program = append(program, REX_SUBPD...)
program = append(program, 0xc4+8*instr.Dst)
case VM_FSCAL_R:
program = append(program, REX_XORPS...)
program = append(program, 0xc7+8*instr.Dst)
case VM_FMUL_R:
program = append(program, REX_MULPD...)
program = append(program, 0xe0+instr.Src+8*instr.Dst)
case VM_FDIV_M:
program = genAddressReg(program, instr, true)
program = append(program, REX_CVTDQ2PD_XMM12...)
program = append(program, REX_ANDPS_XMM12...)
program = append(program, REX_DIVPD...)
program = append(program, 0xe4+8*instr.Dst)
case VM_FSQRT_R:
program = append(program, SQRTPD...)
program = append(program, 0xe4+9*instr.Dst)
case VM_CFROUND:
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+instr.Src)
rotate := byte((13 - instr.Imm) & 63)
if rotate != 0 {
program = append(program, ROL_RAX...)
program = append(program, rotate)
}
program = append(program, AND_OR_MOV_LDMXCSR...)
case VM_CBRANCH:
reg := instr.Dst
target := instr.jumpTarget() + 1
jmpOffset := instructionOffsets[target] - (int32(len(program)) + 16)
if BranchesWithin32B {
branchBegin := uint32(int32(len(program)) + 7)
branchEnd := branchBegin
if jmpOffset >= -128 {
branchEnd += 9
} else {
branchEnd += 13
}
// If the jump crosses or touches 32-byte boundary, align it
if (branchBegin ^ branchEnd) >= 32 {
alignmentSize := 32 - (branchBegin & 31)
alignmentSize -= alignmentSize
program = append(program, JMP_ALIGN_PREFIX[alignmentSize]...)
}
}
program = append(program, REX_ADD_I...)
program = append(program, 0xc0+reg)
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
program = append(program, REX_TEST...)
program = append(program, 0xc0+reg)
program = binary.LittleEndian.AppendUint32(program, instr.MemMask)
if jmpOffset >= -128 {
program = append(program, JZ_SHORT)
program = append(program, byte(jmpOffset))
} else {
program = append(program, JZ...)
program = binary.LittleEndian.AppendUint32(program, uint32(jmpOffset-4))
}
case VM_ISTORE:
//genAddressRegDst
program = append(program, LEA_32...)
program = append(program, 0x80+instr.Dst)
if instr.Dst == RegisterNeedsSib {
program = append(program, 0x24)
}
program = binary.LittleEndian.AppendUint32(program, uint32(instr.Imm))
program = append(program, AND_EAX_I)
program = binary.LittleEndian.AppendUint32(program, instr.MemMask)
program = append(program, REX_MOV_MR...)
program = append(program, 0x04+8*instr.Src)
program = append(program, 0x06)
case VM_NOP:
program = append(program, NOP1...)
}
}
if isFullMode {
// end of prologue
program = append(program, REX_MOV_RR...)
program = append(program, 0xc0+byte(readReg[2]))
program = append(program, REX_XOR_EAX...)
program = append(program, 0xc0+byte(readReg[3]))
// read dataset
program = append(program, programReadDataset...)
// epilogue
program = append(program, REX_MOV_RR64...)
program = append(program, 0xc0+byte(readReg[0]))
program = append(program, REX_XOR_RAX_R64...)
program = append(program, 0xc0+byte(readReg[1]))
//todo: prefetch scratchpad
program = append(program, programLoopStoreAligned...)
if BranchesWithin32B {
branchBegin := uint32(len(program))
branchEnd := branchBegin + 9
// If the jump crosses or touches 32-byte boundary, align it
if (branchBegin ^ branchEnd) >= 32 {
alignmentSize := 32 - (branchBegin & 31)
if alignmentSize > 8 {
program = append(program, NOPX[alignmentSize-9][:alignmentSize-8]...)
alignmentSize = 8
}
program = append(program, NOPX[alignmentSize-1][:alignmentSize]...)
}
}
program = append(program, SUB_EBX...)
program = append(program, JNZ...)
program = binary.LittleEndian.AppendUint32(program, uint32(-len(program)-4))
//exit otherwise
}
program = append(program, RET)
return program
}

204
vm_bytecode_jit_amd64.s Normal file
View file

@ -0,0 +1,204 @@
//go:build unix && amd64 && !disable_jit && !purego
#include "textflag.h"
TEXT ·vm_run(SB),$8-40
// move register file to registers
MOVQ rf+0(FP), AX
PREFETCHNTA 0(AX)
// r0-r7
MOVQ (0*8)(AX), R8
MOVQ (1*8)(AX), R9
MOVQ (2*8)(AX), R10
MOVQ (3*8)(AX), R11
MOVQ (4*8)(AX), R12
MOVQ (5*8)(AX), R13
MOVQ (6*8)(AX), R14
MOVQ (7*8)(AX), R15
// f0-f3
VMOVAPD (8*8)(AX), X0
VMOVAPD (10*8)(AX), X1
VMOVAPD (12*8)(AX), X2
VMOVAPD (14*8)(AX), X3
// e0-e3
VMOVAPD (16*8)(AX), X4
VMOVAPD (18*8)(AX), X5
VMOVAPD (20*8)(AX), X6
VMOVAPD (22*8)(AX), X7
// a0-a3
VMOVAPD (24*8)(AX), X8
VMOVAPD (26*8)(AX), X9
VMOVAPD (28*8)(AX), X10
VMOVAPD (30*8)(AX), X11
// mantissa mask
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
MOVQ $0x00ffffffffffffff, AX
VMOVQ AX, X13
VPBROADCASTQ X13, X13
// eMask
VMOVDQU64 eMask+16(FP), X14
// scale mask
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
MOVQ $0x80F0000000000000, AX
VMOVQ AX, X15
VPBROADCASTQ X15, X15
// scratchpad pointer
MOVQ pad+8(FP), SI
// JIT location
MOVQ jmp+32(FP), AX
// jump to JIT code
CALL AX
// move register file back to registers
MOVQ rf+0(FP), AX
// prefetchw BYTE PTR [rax]
// PREFETCHW 0(AX)
BYTE $0x0F
BYTE $0x0D
BYTE $0x08
// r0-r7
MOVQ R8, (0*8)(AX)
MOVQ R9, (1*8)(AX)
MOVQ R10, (2*8)(AX)
MOVQ R11, (3*8)(AX)
MOVQ R12, (4*8)(AX)
MOVQ R13, (5*8)(AX)
MOVQ R14, (6*8)(AX)
MOVQ R15, (7*8)(AX)
// f0-f3
VMOVAPD X0, (8*8)(AX)
VMOVAPD X1, (10*8)(AX)
VMOVAPD X2, (12*8)(AX)
VMOVAPD X3, (14*8)(AX)
// e0-e3
VMOVAPD X4, (16*8)(AX)
VMOVAPD X5, (18*8)(AX)
VMOVAPD X6, (20*8)(AX)
VMOVAPD X7, (22*8)(AX)
// a0-a3 are constant, no need to move
RET
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
TEXT ·vm_run_full(SB),$32-64
// move register file to registers
MOVQ rf+0(FP), AX
PREFETCHNTA 0(AX)
// r0-r7
MOVQ (0*8)(AX), R8
MOVQ (1*8)(AX), R9
MOVQ (2*8)(AX), R10
MOVQ (3*8)(AX), R11
MOVQ (4*8)(AX), R12
MOVQ (5*8)(AX), R13
MOVQ (6*8)(AX), R14
MOVQ (7*8)(AX), R15
// f0-f3
VMOVAPD (8*8)(AX), X0
VMOVAPD (10*8)(AX), X1
VMOVAPD (12*8)(AX), X2
VMOVAPD (14*8)(AX), X3
// e0-e3
VMOVAPD (16*8)(AX), X4
VMOVAPD (18*8)(AX), X5
VMOVAPD (20*8)(AX), X6
VMOVAPD (22*8)(AX), X7
// load constants a0-a3
VMOVAPD (24*8)(AX), X8
VMOVAPD (26*8)(AX), X9
VMOVAPD (28*8)(AX), X10
VMOVAPD (30*8)(AX), X11
//TODO: rest of init
// mantissa mask
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
MOVQ $0x00ffffffffffffff, AX
VMOVQ AX, X13
VPBROADCASTQ X13, X13
// eMask
VMOVDQU64 eMask+40(FP), X14
// scale mask
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
MOVQ $0x80F0000000000000, AX
VMOVQ AX, X15
VPBROADCASTQ X15, X15
// scratchpad pointer on rsi
MOVQ pad+8(FP), SI
// dataset pointer on rdi
MOVQ dataset+16(FP), DI
// iterations on rbx
MOVQ iterations+24(FP), BX
// ma and mx on rbp TODO: change this
MOVQ memoryRegisters+32(FP), BP
// do ma/mx calcs
MOVQ BP, AX
RORQ $32, BP
//AX = spAddr0
//DX = spAddr1
// JIT location
MOVQ jmp+56(FP), CX
// jump to JIT code
// this handles readReg[0-3] and dataset reading, load, stores
CALL CX
// move register file back to registers
MOVQ rf+0(FP), AX
// prefetchw BYTE PTR [rax]
// PREFETCHW 0(AX)
BYTE $0x0F
BYTE $0x0D
BYTE $0x08
// r0-r7
MOVQ R8, (0*8)(AX)
MOVQ R9, (1*8)(AX)
MOVQ R10, (2*8)(AX)
MOVQ R11, (3*8)(AX)
MOVQ R12, (4*8)(AX)
MOVQ R13, (5*8)(AX)
MOVQ R14, (6*8)(AX)
MOVQ R15, (7*8)(AX)
// f0-f3
VMOVAPD X0, (8*8)(AX)
VMOVAPD X1, (10*8)(AX)
VMOVAPD X2, (12*8)(AX)
VMOVAPD X3, (14*8)(AX)
// e0-e3
VMOVAPD X4, (16*8)(AX)
VMOVAPD X5, (18*8)(AX)
VMOVAPD X6, (20*8)(AX)
VMOVAPD X7, (22*8)(AX)
// a0-a3 are constant, no need to move
RET

View file

@ -0,0 +1,14 @@
//go:build !unix || !amd64 || disable_jit || purego
package randomx
func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
return nil
}
func (f VMProgramFunc) Execute(rf *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
}
func (f VMProgramFunc) ExecuteFull(rf *RegisterFile, pad *ScratchPad, dataset *RegisterLine, iterations uint64, ma, mx uint32, eMask [2]uint64) {
}

137
vm_bytecode_native.go Normal file
View file

@ -0,0 +1,137 @@
//go:build (arm64 || arm.6 || arm.7 || amd64 || 386) && !purego
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
"math"
"math/bits"
)
// Execute Runs a RandomX program with the given register file and scratchpad
// Warning: This will call asm.SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to softfloat64.RoundingModeToNearest between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
for pc := 0; pc < len(c); pc++ {
i := &c[pc]
switch i.Opcode {
case VM_NOP: // we do nothing
case VM_IADD_RS:
f.R[i.Dst] += (f.R[i.Src] << i.ImmB) + i.Imm
case VM_IADD_M:
f.R[i.Dst] += pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IADD_MZ:
f.R[i.Dst] += pad.Load64(uint32(i.Imm))
case VM_ISUB_R:
f.R[i.Dst] -= f.R[i.Src]
case VM_ISUB_I:
f.R[i.Dst] -= i.Imm
case VM_ISUB_M:
f.R[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_ISUB_MZ:
f.R[i.Dst] -= pad.Load64(uint32(i.Imm))
case VM_IMUL_R:
f.R[i.Dst] *= f.R[i.Src]
case VM_IMUL_I:
// also handles imul_rcp
f.R[i.Dst] *= i.Imm
case VM_IMUL_M:
f.R[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IMUL_MZ:
f.R[i.Dst] *= pad.Load64(uint32(i.Imm))
case VM_IMULH_R:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], f.R[i.Src])
case VM_IMULH_M:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(i.getScratchpadAddress(f.R[i.Src])))
case VM_IMULH_MZ:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(uint32(i.Imm)))
case VM_ISMULH_R:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(f.R[i.Src]))
case VM_ISMULH_M:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.R[i.Src]))))
case VM_ISMULH_MZ:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(uint32(i.Imm))))
case VM_INEG_R:
f.R[i.Dst] = -f.R[i.Dst]
case VM_IXOR_R:
f.R[i.Dst] ^= f.R[i.Src]
case VM_IXOR_I:
f.R[i.Dst] ^= i.Imm
case VM_IXOR_M:
f.R[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IXOR_MZ:
f.R[i.Dst] ^= pad.Load64(uint32(i.Imm))
case VM_IROR_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(f.R[i.Src]&63))
case VM_IROR_I:
//todo: can merge into VM_IROL_I
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(i.Imm&63))
case VM_IROL_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(f.R[i.Src]&63))
case VM_IROL_I:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(i.Imm&63))
case VM_ISWAP_R:
f.R[i.Dst], f.R[i.Src] = f.R[i.Src], f.R[i.Dst]
case VM_FSWAP_RF:
f.F[i.Dst][HIGH], f.F[i.Dst][LOW] = f.F[i.Dst][LOW], f.F[i.Dst][HIGH]
case VM_FSWAP_RE:
f.E[i.Dst][HIGH], f.E[i.Dst][LOW] = f.E[i.Dst][LOW], f.E[i.Dst][HIGH]
case VM_FADD_R:
f.F[i.Dst][LOW] += f.A[i.Src][LOW]
f.F[i.Dst][HIGH] += f.A[i.Src][HIGH]
case VM_FADD_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] += lo
f.F[i.Dst][HIGH] += hi
case VM_FSUB_R:
f.F[i.Dst][LOW] -= f.A[i.Src][LOW]
f.F[i.Dst][HIGH] -= f.A[i.Src][HIGH]
case VM_FSUB_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] -= lo
f.F[i.Dst][HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
f.F[i.Dst][LOW] = ScaleNegate(f.F[i.Dst][LOW])
f.F[i.Dst][HIGH] = ScaleNegate(f.F[i.Dst][HIGH])
case VM_FMUL_R:
f.E[i.Dst][LOW] *= f.A[i.Src][LOW]
f.E[i.Dst][HIGH] *= f.A[i.Src][HIGH]
case VM_FDIV_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.E[i.Dst][LOW] /= MaskRegisterExponentMantissa(lo, eMask[LOW])
f.E[i.Dst][HIGH] /= MaskRegisterExponentMantissa(hi, eMask[HIGH])
case VM_FSQRT_R:
f.E[i.Dst][LOW] = math.Sqrt(f.E[i.Dst][LOW])
f.E[i.Dst][HIGH] = math.Sqrt(f.E[i.Dst][HIGH])
case VM_CFROUND:
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
SetRoundingMode(f, uint8(tmp))
case VM_CBRANCH:
f.R[i.Dst] += i.Imm
if (f.R[i.Dst] & uint64(i.MemMask)) == 0 {
pc = i.jumpTarget()
}
case VM_ISTORE:
pad.Store64(i.getScratchpadAddress(f.R[i.Dst]), f.R[i.Src])
}
}
}
const lockThreadDueToRoundingMode = true
func SetRoundingMode(f *RegisterFile, mode uint8) {
if f.FPRC == mode {
return
}
f.FPRC = mode
asm.SetRoundingMode(mode)
}
func ResetRoundingMode(f *RegisterFile) {
f.FPRC = 0
asm.SetRoundingMode(uint8(0))
}

131
vm_bytecode_purego.go Normal file
View file

@ -0,0 +1,131 @@
//go:build (!arm64 && !(arm.6 || arm.7) && !amd64 && !386) || purego
package randomx
import (
"git.gammaspectra.live/P2Pool/softfloat64"
"math/bits"
)
// Execute Runs a RandomX program with the given register file and scratchpad
// Warning: This will call float64 SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (c *ByteCode) Execute(f *RegisterFile, pad *ScratchPad, eMask [2]uint64) {
for pc := 0; pc < len(c); pc++ {
i := &c[pc]
switch i.Opcode {
case VM_NOP: // we do nothing
case VM_IADD_RS:
f.R[i.Dst] += (f.R[i.Src] << i.ImmB) + i.Imm
case VM_IADD_M:
f.R[i.Dst] += pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IADD_MZ:
f.R[i.Dst] += pad.Load64(uint32(i.Imm))
case VM_ISUB_R:
f.R[i.Dst] -= f.R[i.Src]
case VM_ISUB_I:
f.R[i.Dst] -= i.Imm
case VM_ISUB_M:
f.R[i.Dst] -= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_ISUB_MZ:
f.R[i.Dst] -= pad.Load64(uint32(i.Imm))
case VM_IMUL_R:
f.R[i.Dst] *= f.R[i.Src]
case VM_IMUL_I:
// also handles imul_rcp
f.R[i.Dst] *= i.Imm
case VM_IMUL_M:
f.R[i.Dst] *= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IMUL_MZ:
f.R[i.Dst] *= pad.Load64(uint32(i.Imm))
case VM_IMULH_R:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], f.R[i.Src])
case VM_IMULH_M:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(i.getScratchpadAddress(f.R[i.Src])))
case VM_IMULH_MZ:
f.R[i.Dst], _ = bits.Mul64(f.R[i.Dst], pad.Load64(uint32(i.Imm)))
case VM_ISMULH_R:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(f.R[i.Src]))
case VM_ISMULH_M:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(i.getScratchpadAddress(f.R[i.Src]))))
case VM_ISMULH_MZ:
f.R[i.Dst] = smulh(int64(f.R[i.Dst]), int64(pad.Load64(uint32(i.Imm))))
case VM_INEG_R:
f.R[i.Dst] = -f.R[i.Dst]
case VM_IXOR_R:
f.R[i.Dst] ^= f.R[i.Src]
case VM_IXOR_I:
f.R[i.Dst] ^= i.Imm
case VM_IXOR_M:
f.R[i.Dst] ^= pad.Load64(i.getScratchpadAddress(f.R[i.Src]))
case VM_IXOR_MZ:
f.R[i.Dst] ^= pad.Load64(uint32(i.Imm))
case VM_IROR_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(f.R[i.Src]&63))
case VM_IROR_I:
//todo: can merge into VM_IROL_I
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], 0-int(i.Imm&63))
case VM_IROL_R:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(f.R[i.Src]&63))
case VM_IROL_I:
f.R[i.Dst] = bits.RotateLeft64(f.R[i.Dst], int(i.Imm&63))
case VM_ISWAP_R:
f.R[i.Dst], f.R[i.Src] = f.R[i.Src], f.R[i.Dst]
case VM_FSWAP_RF:
f.F[i.Dst][HIGH], f.F[i.Dst][LOW] = f.F[i.Dst][LOW], f.F[i.Dst][HIGH]
case VM_FSWAP_RE:
f.E[i.Dst][HIGH], f.E[i.Dst][LOW] = f.E[i.Dst][LOW], f.E[i.Dst][HIGH]
case VM_FADD_R:
f.F[i.Dst][LOW] = softfloat64.Add(f.F[i.Dst][LOW], f.A[i.Src][LOW], softfloat64.RoundingMode(f.FPRC))
f.F[i.Dst][HIGH] = softfloat64.Add(f.F[i.Dst][HIGH], f.A[i.Src][HIGH], softfloat64.RoundingMode(f.FPRC))
case VM_FADD_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] = softfloat64.Add(f.F[i.Dst][LOW], lo, softfloat64.RoundingMode(f.FPRC))
f.F[i.Dst][HIGH] = softfloat64.Add(f.F[i.Dst][HIGH], hi, softfloat64.RoundingMode(f.FPRC))
case VM_FSUB_R:
f.F[i.Dst][LOW] = softfloat64.Sub(f.F[i.Dst][LOW], f.A[i.Src][LOW], softfloat64.RoundingMode(f.FPRC))
f.F[i.Dst][HIGH] = softfloat64.Sub(f.F[i.Dst][HIGH], f.A[i.Src][HIGH], softfloat64.RoundingMode(f.FPRC))
case VM_FSUB_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.F[i.Dst][LOW] = softfloat64.Sub(f.F[i.Dst][LOW], lo, softfloat64.RoundingMode(f.FPRC))
f.F[i.Dst][HIGH] = softfloat64.Sub(f.F[i.Dst][HIGH], hi, softfloat64.RoundingMode(f.FPRC))
case VM_FSCAL_R:
// no dependent on rounding modes
f.F[i.Dst][LOW] = ScaleNegate(f.F[i.Dst][LOW])
f.F[i.Dst][HIGH] = ScaleNegate(f.F[i.Dst][HIGH])
case VM_FMUL_R:
f.E[i.Dst][LOW] = softfloat64.Mul(f.E[i.Dst][LOW], f.A[i.Src][LOW], softfloat64.RoundingMode(f.FPRC))
f.E[i.Dst][HIGH] = softfloat64.Mul(f.E[i.Dst][HIGH], f.A[i.Src][HIGH], softfloat64.RoundingMode(f.FPRC))
case VM_FDIV_M:
lo, hi := pad.Load32F(i.getScratchpadAddress(f.R[i.Src]))
f.E[i.Dst][LOW] = softfloat64.Div(f.E[i.Dst][LOW], MaskRegisterExponentMantissa(lo, eMask[LOW]), softfloat64.RoundingMode(f.FPRC))
f.E[i.Dst][HIGH] = softfloat64.Div(f.E[i.Dst][HIGH], MaskRegisterExponentMantissa(hi, eMask[HIGH]), softfloat64.RoundingMode(f.FPRC))
case VM_FSQRT_R:
f.E[i.Dst][LOW] = softfloat64.Sqrt(f.E[i.Dst][LOW], softfloat64.RoundingMode(f.FPRC))
f.E[i.Dst][HIGH] = softfloat64.Sqrt(f.E[i.Dst][HIGH], softfloat64.RoundingMode(f.FPRC))
case VM_CFROUND:
tmp := (bits.RotateLeft64(f.R[i.Src], 0-int(i.Imm))) % 4 // rotate right
SetRoundingMode(f, uint8(tmp))
case VM_CBRANCH:
f.R[i.Dst] += i.Imm
if (f.R[i.Dst] & uint64(i.MemMask)) == 0 {
pc = i.jumpTarget()
}
case VM_ISTORE:
pad.Store64(i.getScratchpadAddress(f.R[i.Dst]), f.R[i.Src])
}
}
}
const lockThreadDueToRoundingMode = false
func SetRoundingMode(f *RegisterFile, mode uint8) {
f.FPRC = mode
}
func ResetRoundingMode(f *RegisterFile) {
f.FPRC = 0
}

View file

@ -30,23 +30,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
"math"
"math/bits"
"unsafe"
)
import "encoding/binary"
//reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding
var Zero uint64 = 0
// since go does not have union, use byte array
type VM_Instruction []byte // it is hardcode 8 bytes
// VM_Instruction since go does not have union, use byte array
type VM_Instruction [8]byte // it is hardcode 8 bytes
func (ins VM_Instruction) IMM() uint32 {
return binary.LittleEndian.Uint32(ins[4:])
}
func (ins VM_Instruction) IMM64() uint64 {
return signExtend2sCompl(ins.IMM())
}
func (ins VM_Instruction) Mod() byte {
return ins[3]
}
@ -60,229 +60,140 @@ func (ins VM_Instruction) Opcode() byte {
return ins[0]
}
type VM_Instruction_Type int
const (
VM_IADD_RS VM_Instruction_Type = 0
VM_IADD_M VM_Instruction_Type = 1
VM_ISUB_R VM_Instruction_Type = 2
VM_ISUB_M VM_Instruction_Type = 3
VM_IMUL_R VM_Instruction_Type = 4
VM_IMUL_M VM_Instruction_Type = 5
VM_IMULH_R VM_Instruction_Type = 6
VM_IMULH_M VM_Instruction_Type = 7
VM_ISMULH_R VM_Instruction_Type = 8
VM_ISMULH_M VM_Instruction_Type = 9
VM_IMUL_RCP VM_Instruction_Type = 10
VM_INEG_R VM_Instruction_Type = 11
VM_IXOR_R VM_Instruction_Type = 12
VM_IXOR_M VM_Instruction_Type = 13
VM_IROR_R VM_Instruction_Type = 14
VM_IROL_R VM_Instruction_Type = 15
VM_ISWAP_R VM_Instruction_Type = 16
VM_FSWAP_R VM_Instruction_Type = 17
VM_FADD_R VM_Instruction_Type = 18
VM_FADD_M VM_Instruction_Type = 19
VM_FSUB_R VM_Instruction_Type = 20
VM_FSUB_M VM_Instruction_Type = 21
VM_FSCAL_R VM_Instruction_Type = 22
VM_FMUL_R VM_Instruction_Type = 23
VM_FDIV_M VM_Instruction_Type = 24
VM_FSQRT_R VM_Instruction_Type = 25
VM_CBRANCH VM_Instruction_Type = 26
VM_CFROUND VM_Instruction_Type = 27
VM_ISTORE VM_Instruction_Type = 28
VM_NOP VM_Instruction_Type = 29
)
var Names = map[VM_Instruction_Type]string{
VM_IADD_RS: "VM_IADD_RS",
VM_IADD_M: "VM_IADD_M",
VM_ISUB_R: "VM_ISUB_R",
VM_ISUB_M: "VM_ISUB_M",
VM_IMUL_R: "VM_IMUL_R",
VM_IMUL_M: "VM_IMUL_M",
VM_IMULH_R: "VM_IMULH_R",
VM_IMULH_M: "VM_IMULH_M",
VM_ISMULH_R: "VM_ISMULH_R",
VM_ISMULH_M: "VM_ISMULH_M",
VM_IMUL_RCP: "VM_IMUL_RCP",
VM_INEG_R: "VM_INEG_R",
VM_IXOR_R: "VM_IXOR_R",
VM_IXOR_M: "VM_IXOR_M",
VM_IROR_R: "VM_IROR_R",
VM_IROL_R: "VM_IROL_R",
VM_ISWAP_R: "VM_ISWAP_R",
VM_FSWAP_R: "VM_FSWAP_R",
VM_FADD_R: "VM_FADD_R",
VM_FADD_M: "VM_FADD_M",
VM_FSUB_R: "VM_FSUB_R",
VM_FSUB_M: "VM_FSUB_M",
VM_FSCAL_R: "VM_FSCAL_R",
VM_FMUL_R: "VM_FMUL_R",
VM_FDIV_M: "VM_FDIV_M",
VM_FSQRT_R: "VM_FSQRT_R",
VM_CBRANCH: "VM_CBRANCH",
VM_CFROUND: "VM_CFROUND",
VM_ISTORE: "VM_ISTORE",
VM_NOP: "VM_NOP",
}
// this will interpret single vm instruction
// CompileProgramToByteCode this will interpret single vm instruction into executable opcodes
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
func (vm *VM) Compile_TO_Bytecode() {
func CompileProgramToByteCode(prog []byte, bc *ByteCode) {
var registerUsage [REGISTERSCOUNT]int
var registerUsage [RegistersCount]int
for i := range registerUsage {
registerUsage[i] = -1
}
for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ {
instr := VM_Instruction(vm.Prog[i*8:])
ibc := &vm.ByteCode[i]
for i := 0; i < len(bc); i++ {
instr := VM_Instruction(prog[i*8:])
ibc := &bc[i]
opcode := instr.Opcode()
dst := instr.Dst() % REGISTERSCOUNT // bit shift optimization
src := instr.Src() % REGISTERSCOUNT
ibc.dst = dst
ibc.src = src
dst := instr.Dst() % RegistersCount // bit shift optimization
src := instr.Src() % RegistersCount
ibc.Dst = dst
ibc.Src = src
switch opcode {
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
ibc.Opcode = VM_IADD_RS
ibc.idst = &vm.reg.r[dst]
if dst != RegisterNeedsDisplacement {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
ibc.imm = 0
//shift
ibc.ImmB = (instr.Mod() >> 2) % 4
ibc.Imm = 0
} else {
ibc.isrc = &vm.reg.r[src]
ibc.shift = (instr.Mod() >> 2) % 4
ibc.imm = signExtend2sCompl(instr.IMM())
//shift
ibc.ImmB = (instr.Mod() >> 2) % 4
ibc.Imm = instr.IMM64()
}
registerUsage[dst] = i
case 16, 17, 18, 19, 20, 21, 22: // 7
ibc.Opcode = VM_IADD_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
ibc.Opcode = VM_IADD_MZ
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
ibc.Opcode = VM_ISUB_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_ISUB_I
}
registerUsage[dst] = i
case 39, 40, 41, 42, 43, 44, 45: // 7
ibc.Opcode = VM_ISUB_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
ibc.Opcode = VM_ISUB_MZ
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IMUL_I
}
registerUsage[dst] = i
case 62, 63, 64, 65: //4
ibc.Opcode = VM_IMUL_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
ibc.Opcode = VM_IMUL_MZ
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 66, 67, 68, 69: //4
ibc.Opcode = VM_IMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 70: //1
ibc.Opcode = VM_IMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
ibc.Opcode = VM_IMULH_MZ
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 71, 72, 73, 74: //4
ibc.Opcode = VM_ISMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 75: //1
ibc.Opcode = VM_ISMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
ibc.Opcode = VM_ISMULH_MZ
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
divisor := instr.IMM()
if !isZeroOrPowerOf2(divisor) {
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
ibc.imm = randomx_reciprocal(divisor)
ibc.isrc = &ibc.imm
ibc.Opcode = VM_IMUL_I
ibc.Imm = reciprocal(divisor)
registerUsage[dst] = i
} else {
ibc.Opcode = VM_NOP
@ -290,66 +201,49 @@ func (vm *VM) Compile_TO_Bytecode() {
case 84, 85: //2
ibc.Opcode = VM_INEG_R
ibc.idst = &vm.reg.r[dst]
registerUsage[dst] = i
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
ibc.Opcode = VM_IXOR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IXOR_I
}
registerUsage[dst] = i
case 101, 102, 103, 104, 105: //5
ibc.Opcode = VM_IXOR_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
ibc.Opcode = VM_IXOR_MZ
ibc.MemMask = ScratchpadL3Mask
ibc.Imm = uint64(ibc.getScratchpadZeroAddress())
}
registerUsage[dst] = i
case 106, 107, 108, 109, 110, 111, 112, 113: //8
ibc.Opcode = VM_IROR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IROR_I
}
registerUsage[dst] = i
case 114, 115: // 2 IROL_R
ibc.Opcode = VM_IROL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
if src == dst {
ibc.Imm = instr.IMM64()
ibc.Opcode = VM_IROL_I
}
registerUsage[dst] = i
case 116, 117, 118, 119: //4
if src != dst {
ibc.Opcode = VM_ISWAP_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
registerUsage[src] = i
} else {
@ -359,111 +253,100 @@ func (vm *VM) Compile_TO_Bytecode() {
// below are floating point instructions
case 120, 121, 122, 123: // 4
ibc.Opcode = VM_FSWAP_R
if dst < REGISTERCOUNTFLT {
ibc.fdst = &vm.reg.f[dst]
//ibc.Opcode = VM_FSWAP_R
if dst < RegistersCountFloat {
ibc.Opcode = VM_FSWAP_RF
} else {
ibc.fdst = &vm.reg.e[dst-REGISTERCOUNTFLT]
ibc.Opcode = VM_FSWAP_RE
ibc.Dst = dst - RegistersCountFloat
}
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FADD_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 140, 141, 142, 143, 144: //5
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FADD_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FSUB_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 161, 162, 163, 164, 165: //5
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSUB_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
case 166, 167, 168, 169, 170, 171: //6
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSCAL_R
ibc.fdst = &vm.reg.f[dst]
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Src = instr.Src() % RegistersCountFloat
ibc.Opcode = VM_FMUL_R
ibc.fdst = &vm.reg.e[dst]
ibc.fsrc = &vm.reg.a[src]
case 204, 205, 206, 207: //4
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FDIV_M
ibc.fdst = &vm.reg.e[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
case 208, 209, 210, 211, 212, 213: //6
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Dst = instr.Dst() % RegistersCountFloat // bit shift optimization
ibc.Opcode = VM_FSQRT_R
ibc.fdst = &vm.reg.e[dst]
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
ibc.Opcode = VM_CBRANCH
reg := instr.Dst() % REGISTERSCOUNT
ibc.isrc = &vm.reg.r[reg]
ibc.target = int16(registerUsage[reg])
//TODO:??? it's +1 on other
ibc.Dst = instr.Dst() % RegistersCount
target := uint16(int16(registerUsage[ibc.Dst]))
// set target!
ibc.Src = uint8(target)
ibc.ImmB = uint8(target >> 8)
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
//conditionmask := CONDITIONMASK << shift
ibc.imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
ibc.Imm = instr.IMM64() | (uint64(1) << shift)
if CONDITIONOFFSET > 0 || shift > 0 {
ibc.imm &= (^(uint64(1) << (shift - 1)))
ibc.Imm &= ^(uint64(1) << (shift - 1))
}
ibc.memMask = CONDITIONMASK << shift
ibc.MemMask = CONDITIONMASK << shift
for j := 0; j < REGISTERSCOUNT; j++ {
for j := 0; j < RegistersCount; j++ {
registerUsage[j] = i
}
case 239: //1
ibc.Opcode = VM_CFROUND
ibc.isrc = &vm.reg.r[src]
ibc.imm = uint64(instr.IMM() & 63)
ibc.Imm = uint64(instr.IMM() & 63)
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
ibc.Opcode = VM_ISTORE
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.Imm = instr.IMM64()
if (instr.Mod() >> 4) < STOREL3CONDITION {
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
ibc.MemMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
ibc.MemMask = ScratchpadL2Mask
}
} else {
ibc.memMask = ScratchpadL3Mask
ibc.MemMask = ScratchpadL3Mask
}
default:
@ -471,145 +354,18 @@ func (vm *VM) Compile_TO_Bytecode() {
}
}
}
type InstructionByteCode struct {
dst, src byte
idst, isrc *uint64
fdst, fsrc *[2]float64
imm uint64
simm int64
Opcode VM_Instruction_Type
target int16
shift uint8
memMask uint32
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
type ScratchPad [ScratchpadSize]byte
func (pad *ScratchPad) Store64(addr uint32, val uint64) {
*(*uint64)(unsafe.Pointer(&pad[addr])) = val
//binary.LittleEndian.PutUint64(pad[addr:], val)
}
func (ibc *InstructionByteCode) getScratchpadAddress() uint64 {
return (*ibc.isrc + ibc.imm) & uint64(ibc.memMask)
func (pad *ScratchPad) Load64(addr uint32) uint64 {
return *(*uint64)(unsafe.Pointer(&pad[addr]))
}
func (ibc *InstructionByteCode) getScratchpadDestAddress() uint64 {
return (*ibc.idst + ibc.imm) & uint64(ibc.memMask)
}
func (vm *VM) Load64(addr uint64) uint64 {
return *(*uint64)(unsafe.Pointer(&vm.ScratchPad[addr]))
}
func (vm *VM) Load32(addr uint64) uint32 {
return *(*uint32)(unsafe.Pointer(&vm.ScratchPad[addr]))
}
func (vm *VM) Load32F(addr uint64) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
return float64(a[LOW]), float64(a[HIGH])
}
func (vm *VM) Load32FA(addr uint64) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
return [2]float64{float64(a[LOW]), float64(a[HIGH])}
}
func (vm *VM) InterpretByteCode() {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
ibc := &vm.ByteCode[pc]
switch ibc.Opcode {
case VM_IADD_RS:
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
case VM_IADD_M:
*ibc.idst += vm.Load64(ibc.getScratchpadAddress())
case VM_ISUB_R:
*ibc.idst -= *ibc.isrc
case VM_ISUB_M:
*ibc.idst -= vm.Load64(ibc.getScratchpadAddress())
case VM_IMUL_R:
// also handles imul_rcp
*ibc.idst *= *ibc.isrc
case VM_IMUL_M:
*ibc.idst *= vm.Load64(ibc.getScratchpadAddress())
case VM_IMULH_R:
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
case VM_IMULH_M:
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadAddress()))
case VM_ISMULH_R:
*ibc.idst = smulh(int64(*ibc.idst), int64(*ibc.isrc))
case VM_ISMULH_M:
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadAddress())))
case VM_INEG_R:
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
case VM_IXOR_R:
*ibc.idst ^= *ibc.isrc
case VM_IXOR_M:
*ibc.idst ^= vm.Load64(ibc.getScratchpadAddress())
case VM_IROR_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
case VM_IROL_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
case VM_ISWAP_R:
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
case VM_FSWAP_R:
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
case VM_FADD_R:
ibc.fdst[LOW] += ibc.fsrc[LOW]
ibc.fdst[HIGH] += ibc.fsrc[HIGH]
case VM_FADD_M:
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
ibc.fdst[LOW] += lo
ibc.fdst[HIGH] += hi
case VM_FSUB_R:
ibc.fdst[LOW] -= ibc.fsrc[LOW]
ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
case VM_FSUB_M:
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
ibc.fdst[LOW] -= lo
ibc.fdst[HIGH] -= hi
case VM_FSCAL_R:
// no dependent on rounding modes
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
case VM_FMUL_R:
ibc.fdst[LOW] *= ibc.fsrc[LOW]
ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
case VM_FDIV_M:
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
ibc.fdst[LOW] /= MaskRegisterExponentMantissa(lo, vm.config.eMask[LOW])
ibc.fdst[HIGH] /= MaskRegisterExponentMantissa(hi, vm.config.eMask[HIGH])
case VM_FSQRT_R:
ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
case VM_CBRANCH:
*ibc.isrc += ibc.imm
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
pc = int(ibc.target)
}
case VM_CFROUND:
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
asm.SetRoundingMode(asm.RoundingMode(tmp))
case VM_ISTORE:
binary.LittleEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], *ibc.isrc)
case VM_NOP: // we do nothing
}
}
func (pad *ScratchPad) Load32(addr uint32) uint32 {
return *(*uint32)(unsafe.Pointer(&pad[addr]))
}

15
vm_instruction_native.go Normal file
View file

@ -0,0 +1,15 @@
//go:build (arm64 || amd64 || 386) && !purego
package randomx
import "unsafe"
func (pad *ScratchPad) Load32F(addr uint32) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return float64(a[LOW]), float64(a[HIGH])
}
func (pad *ScratchPad) Load32FA(addr uint32) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return [2]float64{float64(a[LOW]), float64(a[HIGH])}
}

18
vm_instruction_purego.go Normal file
View file

@ -0,0 +1,18 @@
//go:build (!arm64 && !amd64 && !386) || purego
package randomx
import (
"git.gammaspectra.live/P2Pool/softfloat64"
"unsafe"
)
func (pad *ScratchPad) Load32F(addr uint32) (lo, hi float64) {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return softfloat64.Int32ToFloat64(a[LOW]), softfloat64.Int32ToFloat64(a[HIGH])
}
func (pad *ScratchPad) Load32FA(addr uint32) [2]float64 {
a := *(*[2]int32)(unsafe.Pointer(&pad[addr]))
return [2]float64{softfloat64.Int32ToFloat64(a[LOW]), softfloat64.Int32ToFloat64(a[HIGH])}
}