Compare commits

...

4 commits

Author SHA1 Message Date
DataHoarder 6b20460fbb
Match functionality / API with upstream randomx
Some checks reported errors
continuous-integration/drone/push Build was killed
2024-05-01 22:43:52 +02:00
DataHoarder c50cbc56b5
Reduce heap allocations under VM 2024-05-01 16:58:49 +02:00
DataHoarder 1d83de4880
Split hard/soft AES implementations 2024-05-01 16:25:35 +02:00
DataHoarder 25b7fc4cc0
Move internal packages to internal directory 2024-05-01 11:36:43 +02:00
47 changed files with 752 additions and 600 deletions

View file

@ -28,10 +28,14 @@ For the C++ implementation and design of RandomX, see [github.com/tevador/Random
| VM Execution | **native** | **native+jit** | soft | **native** | soft | soft | soft | soft |
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build tag manually.
[TinyGo](https://github.com/tinygo-org/tinygo) is supported under the `purego` build tag.
Any platform with no hard float support or when enabled manually will use soft float, using [softfloat64](https://git.gammaspectra.live/P2Pool/softfloat64). This will be very slow.
Full mode is NOT recommended in 32-bit systems and is unsupported, although depending on system it might be able to run. You might want to manually run `runtime.GC()` if cleaning up dataset to free memory.
Native hard float can be added with supporting rounding mode under _asm_.
JIT only supported under Unix systems (Linux, *BSD, macOS), and can be hard-disabled via the `disable_jit` build flag, or at runtime.

View file

@ -1,50 +0,0 @@
//go:build amd64 && !purego
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"golang.org/x/sys/cpu"
"unsafe"
)
var supportsAES = cpu.X86.HasAES
func fillAes1Rx4(state *[64]byte, output []byte) {
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
if supportsAES {
asm.FillAes1Rx4(states, &keys.AesGenerator1R_Keys, unsafe.SliceData(output), uint64(len(output)))
return
}
for outptr := 0; outptr < len(output); outptr += len(state) {
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
copy(output[outptr:], state[:])
}
}
func hashAes1Rx4(input []byte, output *[64]byte) {
if supportsAES {
asm.HashAes1Rx4(&keys.AesHash1R_State, &keys.AesHash1R_XKeys, output, unsafe.SliceData(input), uint64(len(input)))
return
}
// states are copied
states := keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
aesroundtrip_encdec(&states, in)
}
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
}

View file

@ -1,36 +0,0 @@
//go:build !amd64 || purego
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"unsafe"
)
func fillAes1Rx4(state *[64]byte, output []byte) {
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
for outptr := 0; outptr < len(output); outptr += len(state) {
aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
copy(output[outptr:], state[:])
}
}
func hashAes1Rx4(input []byte, output *[64]byte) {
// states are copied
states := keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
aesroundtrip_encdec(&states, in)
}
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[0])
aesroundtrip_encdec1(&states, &keys.AesHash1R_XKeys[1])
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
}

View file

@ -1,40 +0,0 @@
//go:build amd64 && !purego
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
)
func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
if supportsAES {
asm.AESRoundTrip_DecEnc(states, keys)
} else {
soft_aesdec(&states[0], &keys[0])
soft_aesenc(&states[1], &keys[1])
soft_aesdec(&states[2], &keys[2])
soft_aesenc(&states[3], &keys[3])
}
}
func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
if supportsAES {
asm.AESRoundTrip_EncDec(states, keys)
} else {
soft_aesenc(&states[0], &keys[0])
soft_aesdec(&states[1], &keys[1])
soft_aesenc(&states[2], &keys[2])
soft_aesdec(&states[3], &keys[3])
}
}
func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
if supportsAES {
asm.AESRoundTrip_EncDec1(states, key)
} else {
soft_aesenc(&states[0], key)
soft_aesdec(&states[1], key)
soft_aesenc(&states[2], key)
soft_aesdec(&states[3], key)
}
}

View file

@ -1,32 +0,0 @@
//go:build !amd64 || purego
package aes
func aesenc(state *[4]uint32, key *[4]uint32) {
soft_aesenc(state, key)
}
func aesdec(state *[4]uint32, key *[4]uint32) {
soft_aesdec(state, key)
}
func aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
aesdec(&states[0], &keys[0])
aesenc(&states[1], &keys[1])
aesdec(&states[2], &keys[2])
aesenc(&states[3], &keys[3])
}
func aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
aesenc(&states[0], &keys[0])
aesdec(&states[1], &keys[1])
aesenc(&states[2], &keys[2])
aesdec(&states[3], &keys[3])
}
func aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
aesenc(&states[0], key)
aesdec(&states[1], key)
aesenc(&states[2], key)
aesdec(&states[3], key)
}

168
cache.go
View file

@ -1,9 +1,9 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/argon2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
"runtime"
"slices"
"unsafe"
@ -17,30 +17,46 @@ func (m *MemoryBlock) GetLine(addr uint64) *RegisterLine {
}
type Cache struct {
Blocks []MemoryBlock
blocks []MemoryBlock
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
JitPrograms [RANDOMX_PROGRAM_COUNT]SuperScalarProgramFunc
jitPrograms [RANDOMX_PROGRAM_COUNT]SuperScalarProgramFunc
Flags uint64
flags Flags
}
func NewCache(flags uint64) *Cache {
if flags == RANDOMX_FLAG_DEFAULT {
flags = RANDOMX_FLAG_JIT
}
// NewCache Creates a randomx_cache structure and allocates memory for RandomX Cache.
// *
// * @param flags is any combination of these 2 flags (each flag can be set or not set):
// * RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
// * RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes
// * subsequent Dataset initialization faster
// * Optionally, one of these two flags may be selected:
// * RANDOMX_FLAG_ARGON2_SSSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set
// * makes subsequent cache initialization faster
// * RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set
// * makes subsequent cache initialization faster
// *
// * @return Pointer to an allocated randomx_cache structure.
// * Returns NULL if:
// * (1) memory allocation fails
// * (2) the RANDOMX_FLAG_JIT is set and JIT compilation is not supported on the current platform
// * (3) an invalid or unsupported RANDOMX_FLAG_ARGON2 value is set
// */
func NewCache(flags Flags) *Cache {
return &Cache{
Flags: flags,
flags: flags,
}
}
func (cache *Cache) HasJIT() bool {
return cache.Flags&RANDOMX_FLAG_JIT > 0 && cache.JitPrograms[0] != nil
func (c *Cache) hasInitializedJIT() bool {
return c.flags.HasJIT() && c.jitPrograms[0] != nil
}
func (cache *Cache) Close() error {
for _, p := range cache.JitPrograms {
// Close Releases all memory occupied by the Cache structure.
func (c *Cache) Close() error {
for _, p := range c.jitPrograms {
if p != nil {
err := p.Close()
if err != nil {
@ -51,28 +67,35 @@ func (cache *Cache) Close() error {
return nil
}
func (cache *Cache) Init(key []byte) {
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
// Lock due to external JIT madness
runtime.LockOSThread()
defer runtime.UnlockOSThread()
}
// Init Initializes the cache memory and SuperscalarHash using the provided key value.
// Does nothing if called again with the same key value.
func (c *Cache) Init(key []byte) {
kkey := slices.Clone(key)
argonBlocks := argon2.BuildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
argonBlocks := argon2.BuildBlocks(kkey, []byte(RANDOMX_ARGON_SALT), RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES)
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argon2.Block{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
cache.Blocks = memoryBlocks
c.blocks = memoryBlocks
const nonce uint32 = 0
gen := blake2.New(key, nonce)
for i := 0; i < 8; i++ {
cache.Programs[i] = BuildSuperScalarProgram(gen) // build a superscalar program
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
cache.JitPrograms[i] = generateSuperscalarCode(cache.Programs[i])
gen := blake2.New(kkey, nonce)
for i := range c.programs {
// build a superscalar program
prog := BuildSuperScalarProgram(gen)
if c.flags.HasJIT() {
c.jitPrograms[i] = generateSuperscalarCode(prog)
// fallback if can't compile program
if c.jitPrograms[i] == nil {
c.programs[i] = prog
} else {
c.programs[i] = SuperScalarProgram{prog[0]}
}
} else {
c.programs[i] = prog
}
}
@ -80,16 +103,20 @@ func (cache *Cache) Init(key []byte) {
const Mask = CacheSize/CacheLineSize - 1
// GetMixBlock fetch a 64 byte block in uint64 form
func (cache *Cache) GetMixBlock(addr uint64) *RegisterLine {
// getMixBlock fetch a 64 byte block in uint64 form
func (c *Cache) getMixBlock(addr uint64) *RegisterLine {
addr = (addr & Mask) * CacheLineSize
block := addr / 1024
return cache.Blocks[block].GetLine(addr % 1024)
return c.blocks[block].GetLine(addr % 1024)
}
func (cache *Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
func (c *Cache) GetMemory() []MemoryBlock {
return c.blocks
}
func (c *Cache) initDataset(rl *RegisterLine, itemNumber uint64) {
registerValue := itemNumber
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
@ -101,54 +128,45 @@ func (cache *Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(registerValue)
program := cache.Programs[i]
executeSuperscalar(program.Program(), rl)
for q := range rl {
rl[q] ^= mix[q]
if c.hasInitializedJIT() {
if c.flags.HasJIT() {
// Lock due to external JIT madness
runtime.LockOSThread()
defer runtime.UnlockOSThread()
}
registerValue = rl[program.AddressRegister()]
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := c.getMixBlock(registerValue)
c.jitPrograms[i].Execute(uintptr(unsafe.Pointer(rl)))
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[c.programs[i].AddressRegister()]
}
} else {
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := c.getMixBlock(registerValue)
program := c.programs[i]
executeSuperscalar(program.Program(), rl)
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[program.AddressRegister()]
}
}
}
func (cache *Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
registerValue := itemNumber
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
rl[1] = rl[0] ^ keys.SuperScalar_Constants[1]
rl[2] = rl[0] ^ keys.SuperScalar_Constants[2]
rl[3] = rl[0] ^ keys.SuperScalar_Constants[3]
rl[4] = rl[0] ^ keys.SuperScalar_Constants[4]
rl[5] = rl[0] ^ keys.SuperScalar_Constants[5]
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(registerValue)
cache.JitPrograms[i].Execute(uintptr(unsafe.Pointer(rl)))
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[cache.Programs[i].AddressRegister()]
}
}
func (cache *Cache) InitDataset(dataset []RegisterLine, startItem, endItem uint64) {
func (c *Cache) datasetInit(dataset []RegisterLine, startItem, endItem uint64) {
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
if cache.HasJIT() {
cache.InitDatasetItemJIT(&dataset[0], itemNumber)
} else {
cache.InitDatasetItem(&dataset[0], itemNumber)
}
c.initDataset(&dataset[0], itemNumber)
}
}

View file

@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import "git.gammaspectra.live/P2Pool/go-randomx/v3/argon2"
import "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/argon2"
// see reference configuration.h
// Cache size in KiB. Must be a power of 2.
@ -81,6 +81,8 @@ const RANDOMX_JUMP_BITS = 8
// Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
const RANDOMX_JUMP_OFFSET = 8
const RANDOMX_HASH_SIZE = 32
const DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
const SuperscalarMaxSize = 3*RANDOMX_SUPERSCALAR_LATENCY + 2

View file

@ -1,27 +1,77 @@
package randomx
import "sync"
import (
"errors"
"sync"
)
type Dataset interface {
InitDataset(startItem, itemCount uint64)
ReadDataset(address uint64, r *RegisterLine)
PrefetchDataset(address uint64)
Flags() uint64
Cache() *Cache
Memory() []RegisterLine
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
const DatasetItemCount = DatasetSize / CacheLineSize
type Dataset struct {
memory []RegisterLine
}
func NewDataset(cache *Cache) Dataset {
if cache.Flags&RANDOMX_FLAG_FULL_MEM > 0 {
if ds := NewFullDataset(cache); ds != nil {
return ds
// NewDataset Creates a randomx_dataset structure and allocates memory for RandomX Dataset.
// Only one flag is supported (can be set or not set): RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
// Returns nil if allocation fails
func NewDataset(flags Flags) (result *Dataset, err error) {
defer func() {
//catch too large memory allocation or unable to allocate, for example on 32-bit targets or out of memory
if r := recover(); r != nil {
result = nil
if e, ok := r.(error); ok && e != nil {
err = e
} else {
err = errors.New("out of memory")
}
}
return nil
}
return NewLightDataset(cache)
}()
//todo: implement large pages, align allocation
alignedMemory := make([]RegisterLine, DatasetItemCount)
//todo: err on not large pages
return &Dataset{
memory: alignedMemory,
}, nil
}
func InitDatasetParallel(dataset Dataset, n int) {
func (d *Dataset) prefetchDataset(address uint64) {
}
func (d *Dataset) readDataset(address uint64, r *RegisterLine) {
cache := &d.memory[address/CacheLineSize]
for i := range r {
r[i] ^= cache[i]
}
}
// Memory Returns a pointer to the internal memory buffer of the dataset structure.
// The size of the internal memory buffer is DatasetItemCount * RANDOMX_DATASET_ITEM_SIZE.
func (d *Dataset) Memory() []RegisterLine {
return d.memory
}
func (d *Dataset) InitDataset(cache *Cache, startItem, itemCount uint64) {
if startItem >= DatasetItemCount || itemCount > DatasetItemCount {
panic("out of range")
}
if startItem+itemCount > DatasetItemCount {
panic("out of range")
}
cache.datasetInit(d.memory[startItem:startItem+itemCount], startItem, startItem+itemCount)
}
func (d *Dataset) Close() error {
return nil
}
func (d *Dataset) InitDatasetParallel(cache *Cache, n int) {
n = max(1, n)
var wg sync.WaitGroup
@ -32,10 +82,10 @@ func InitDatasetParallel(dataset Dataset, n int) {
wg.Add(1)
go func(a, b uint64) {
defer wg.Done()
dataset.InitDataset(a, b-a)
d.InitDataset(cache, a, b-a)
}(a, b)
}
dataset.InitDataset(0, DatasetItemCount/uint64(n))
d.InitDataset(cache, 0, DatasetItemCount/uint64(n))
wg.Wait()
}

View file

@ -1,52 +0,0 @@
//go:build amd64 || arm64 || arm64be || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || sparc64
package randomx
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
const DatasetItemCount = DatasetSize / CacheLineSize
type DatasetFull struct {
cache *Cache
memory [DatasetItemCount]RegisterLine
}
func NewFullDataset(cache *Cache) *DatasetFull {
return &DatasetFull{
cache: cache,
}
}
func (d *DatasetFull) PrefetchDataset(address uint64) {
}
func (d *DatasetFull) ReadDataset(address uint64, r *RegisterLine) {
cache := &d.memory[address/CacheLineSize]
for i := range r {
r[i] ^= cache[i]
}
}
func (d *DatasetFull) Cache() *Cache {
return d.cache
}
func (d *DatasetFull) Flags() uint64 {
return d.cache.Flags
}
func (d *DatasetFull) Memory() []RegisterLine {
return d.memory[:]
}
func (d *DatasetFull) InitDataset(startItem, itemCount uint64) {
if startItem >= DatasetItemCount || itemCount > DatasetItemCount {
panic("out of range")
}
if startItem+itemCount > DatasetItemCount {
panic("out of range")
}
d.cache.InitDataset(d.memory[startItem:startItem+itemCount], startItem, startItem+itemCount)
}

View file

@ -1,38 +0,0 @@
//go:build !(amd64 || arm64 || arm64be || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64 || s390x || sparc64)
package randomx
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
const DatasetItemCount = DatasetSize / CacheLineSize
type DatasetFull struct {
}
func NewFullDataset(cache *Cache) *DatasetFull {
return nil
}
func (d *DatasetFull) PrefetchDataset(address uint64) {
}
func (d *DatasetFull) ReadDataset(address uint64, r *RegisterLine) {
}
func (d *DatasetFull) Cache() *Cache {
return nil
}
func (d *DatasetFull) Flags() uint64 {
return 0
}
func (d *DatasetFull) Memory() []RegisterLine {
return nil
}
func (d *DatasetFull) InitDataset(startItem, itemCount uint64) {
}

View file

@ -1,44 +0,0 @@
package randomx
type DatasetLight struct {
cache *Cache
}
func NewLightDataset(cache *Cache) *DatasetLight {
return &DatasetLight{
cache: cache,
}
}
func (d *DatasetLight) PrefetchDataset(address uint64) {
}
func (d *DatasetLight) ReadDataset(address uint64, r *RegisterLine) {
var cache RegisterLine
if d.cache.HasJIT() {
d.cache.InitDatasetItemJIT(&cache, address/CacheLineSize)
} else {
d.cache.InitDatasetItem(&cache, address/CacheLineSize)
}
for i := range r {
r[i] ^= cache[i]
}
}
func (d *DatasetLight) Flags() uint64 {
return d.cache.Flags
}
func (d *DatasetLight) Cache() *Cache {
return d.cache
}
func (d *DatasetLight) Memory() []RegisterLine {
return nil
}
func (d *DatasetLight) InitDataset(startItem, itemCount uint64) {
}

View file

@ -1,16 +1,27 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
"golang.org/x/sys/cpu"
"runtime"
)
const RANDOMX_FLAG_DEFAULT = 0
type Flags uint64
func (f Flags) Has(flags Flags) bool {
return f&flags == flags
}
func (f Flags) HasJIT() bool {
return f.Has(RANDOMX_FLAG_JIT) && supportsJIT
}
const RANDOMX_FLAG_DEFAULT Flags = 0
const (
// RANDOMX_FLAG_LARGE_PAGES not implemented
RANDOMX_FLAG_LARGE_PAGES = 1 << iota
// RANDOMX_FLAG_HARD_AES not implemented
RANDOMX_FLAG_LARGE_PAGES = Flags(1 << iota)
// RANDOMX_FLAG_HARD_AES Selects between hardware or software AES
RANDOMX_FLAG_HARD_AES
// RANDOMX_FLAG_FULL_MEM Selects between full or light mode dataset
RANDOMX_FLAG_FULL_MEM
@ -20,17 +31,38 @@ const (
RANDOMX_FLAG_SECURE
RANDOMX_FLAG_ARGON2_SSSE3
RANDOMX_FLAG_ARGON2_AVX2
RANDOMX_FLAG_ARGON2
RANDOMX_FLAG_ARGON2 = RANDOMX_FLAG_ARGON2_AVX2 | RANDOMX_FLAG_ARGON2_SSSE3
)
func GetFlags() (flags uint64) {
// GetFlags The recommended flags to be used on the current machine.
// Does not include:
// * RANDOMX_FLAG_LARGE_PAGES
// * RANDOMX_FLAG_FULL_MEM
// * RANDOMX_FLAG_SECURE
// These flags must be added manually if desired.
//
// On OpenBSD RANDOMX_FLAG_SECURE is enabled by default in JIT mode as W^X is enforced by the OS.
func GetFlags() (flags Flags) {
flags = RANDOMX_FLAG_DEFAULT
if runtime.GOARCH == "amd64" {
flags |= RANDOMX_FLAG_JIT
if cpu.X86.HasAES {
if aes.HasHardAESImplementation && cpu.X86.HasAES {
flags |= RANDOMX_FLAG_HARD_AES
}
if cpu.X86.HasSSSE3 {
flags |= RANDOMX_FLAG_ARGON2_SSSE3
}
if cpu.X86.HasAVX2 {
flags |= RANDOMX_FLAG_ARGON2_AVX2
}
}
if runtime.GOOS == "openbsd" || runtime.GOOS == "netbsd" || ((runtime.GOOS == "darwin" || runtime.GOOS == "ios") && runtime.GOARCH == "arm64") {
flags |= RANDOMX_FLAG_SECURE
}
return flags
}

View file

@ -0,0 +1,63 @@
//go:build amd64 && !purego
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
"golang.org/x/sys/cpu"
"runtime"
"unsafe"
)
const HasHardAESImplementation = true
type hardAES struct {
}
func NewHardAES() AES {
if cpu.X86.HasAES {
return hardAES{}
}
return nil
}
func (h hardAES) HashAes1Rx4(input []byte, output *[64]byte) {
if len(input)%len(output) != 0 {
panic("unsupported")
}
asm.HashAes1Rx4(&keys.AesHash1R_State, &keys.AesHash1R_XKeys, output, unsafe.SliceData(input), uint64(len(input)))
}
func (h hardAES) FillAes1Rx4(state *[64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
asm.FillAes1Rx4(states, &keys.AesGenerator1R_Keys, unsafe.SliceData(output), uint64(len(output)))
runtime.KeepAlive(state)
}
func (h hardAES) FillAes4Rx4(state [64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// state is copied on caller
// Copy state
states := (*[4][4]uint32)(unsafe.Pointer(&state))
for outptr := 0; outptr < len(output); outptr += len(state) {
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys0)
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys1)
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys2)
asm.AESRoundTrip_DecEnc(states, &fillAes4Rx4Keys3)
copy(output[outptr:], state[:])
}
}

View file

@ -0,0 +1,9 @@
//go:build !amd64 || purego
package aes
const HasHardAESImplementation = false
func NewHardAES() AES {
return nil
}

View file

@ -30,46 +30,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/keys"
"unsafe"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
)
// HashAes1Rx4
//
// Calculate a 512-bit hash of 'input' using 4 lanes of AES.
// The input is treated as a set of round keys for the encryption
// of the initial state.
//
// 'inputSize' must be a multiple of 64.
//
// For a 2 MiB input, this has the same security as 32768-round
// AES encryption.
//
// Hashing throughput: >20 GiB/s per CPU core with hardware AES
func HashAes1Rx4(input []byte, output *[64]byte) {
if len(input)%64 != 0 {
panic("unsupported")
}
hashAes1Rx4(input, output)
}
// FillAes1Rx4
//
// Fill 'output' with pseudorandom data based on 512-bit 'state'.
// The state is encrypted using a single AES round per 16 bytes of output
// in 4 lanes.
//
// 'output' size must be a multiple of 64.
//
// The modified state is written back to 'state' to allow multiple
// calls to this function.
func FillAes1Rx4(state *[64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
fillAes1Rx4(state, output)
}
var fillAes4Rx4Keys0 = [4][4]uint32{
keys.AesGenerator4R_Keys[0],
keys.AesGenerator4R_Keys[0],
@ -94,25 +57,3 @@ var fillAes4Rx4Keys3 = [4][4]uint32{
keys.AesGenerator4R_Keys[7],
keys.AesGenerator4R_Keys[7],
}
// FillAes4Rx4 used to generate final program
func FillAes4Rx4(state [64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// state is copied on caller
// Copy state
states := (*[4][4]uint32)(unsafe.Pointer(&state))
for outptr := 0; outptr < len(output); outptr += len(state) {
aesroundtrip_decenc(states, &fillAes4Rx4Keys0)
aesroundtrip_decenc(states, &fillAes4Rx4Keys1)
aesroundtrip_decenc(states, &fillAes4Rx4Keys2)
aesroundtrip_decenc(states, &fillAes4Rx4Keys3)
copy(output[outptr:], state[:])
}
}

35
internal/aes/impl.go Normal file
View file

@ -0,0 +1,35 @@
package aes
type AES interface {
// HashAes1Rx4
//
// Calculate a 512-bit hash of 'input' using 4 lanes of AES.
// The input is treated as a set of round keys for the encryption
// of the initial state.
//
// 'input' size must be a multiple of 64.
//
// For a 2 MiB input, this has the same security as 32768-round
// AES encryption.
//
// Hashing throughput: >20 GiB/s per CPU core with hardware AES
HashAes1Rx4(input []byte, output *[64]byte)
// FillAes1Rx4
//
// Fill 'output' with pseudorandom data based on 512-bit 'state'.
// The state is encrypted using a single AES round per 16 bytes of output
// in 4 lanes.
//
// 'output' size must be a multiple of 64.
//
// The modified state is written back to 'state' to allow multiple
// calls to this function.
FillAes1Rx4(state *[64]byte, output []byte)
// FillAes4Rx4 used to generate final program
//
// 'state' is copied when calling
FillAes4Rx4(state [64]byte, output []byte)
}

View file

@ -29,3 +29,24 @@ func soft_aesdec(state *[4]uint32, key *[4]uint32) {
state[2] = key[2] ^ td0[uint8(s2)] ^ td1[uint8(s1>>8)] ^ td2[uint8(s0>>16)] ^ td3[uint8(s3>>24)]
state[3] = key[3] ^ td0[uint8(s3)] ^ td1[uint8(s2>>8)] ^ td2[uint8(s1>>16)] ^ td3[uint8(s0>>24)]
}
func soft_aesroundtrip_decenc(states *[4][4]uint32, keys *[4][4]uint32) {
soft_aesdec(&states[0], &keys[0])
soft_aesenc(&states[1], &keys[1])
soft_aesdec(&states[2], &keys[2])
soft_aesenc(&states[3], &keys[3])
}
func soft_aesroundtrip_encdec(states *[4][4]uint32, keys *[4][4]uint32) {
soft_aesenc(&states[0], &keys[0])
soft_aesdec(&states[1], &keys[1])
soft_aesenc(&states[2], &keys[2])
soft_aesdec(&states[3], &keys[3])
}
func soft_aesroundtrip_encdec1(states *[4][4]uint32, key *[4]uint32) {
soft_aesenc(&states[0], key)
soft_aesdec(&states[1], key)
soft_aesenc(&states[2], key)
soft_aesdec(&states[3], key)
}

69
internal/aes/soft.go Normal file
View file

@ -0,0 +1,69 @@
package aes
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/keys"
"runtime"
"unsafe"
)
type softAES struct {
}
func NewSoftAES() AES {
return softAES{}
}
func (aes softAES) HashAes1Rx4(input []byte, output *[64]byte) {
if len(input)%len(output) != 0 {
panic("unsupported")
}
// states are copied
states := (*[4][4]uint32)(unsafe.Pointer(output))
*states = keys.AesHash1R_State
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
soft_aesroundtrip_encdec(states, in)
}
soft_aesroundtrip_encdec1(states, &keys.AesHash1R_XKeys[0])
soft_aesroundtrip_encdec1(states, &keys.AesHash1R_XKeys[1])
runtime.KeepAlive(output)
}
func (aes softAES) FillAes1Rx4(state *[64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// Reference to state without copying
states := (*[4][4]uint32)(unsafe.Pointer(state))
for outptr := 0; outptr < len(output); outptr += len(state) {
soft_aesroundtrip_decenc(states, &keys.AesGenerator1R_Keys)
copy(output[outptr:], state[:])
}
}
func (aes softAES) FillAes4Rx4(state [64]byte, output []byte) {
if len(output)%len(state) != 0 {
panic("unsupported")
}
// state is copied on caller
// Copy state
states := (*[4][4]uint32)(unsafe.Pointer(&state))
for outptr := 0; outptr < len(output); outptr += len(state) {
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys0)
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys1)
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys2)
soft_aesroundtrip_decenc(states, &fillAes4Rx4Keys3)
copy(output[outptr:], state[:])
}
}

View file

@ -23,7 +23,7 @@ func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []Block
func processBlocks(B []Block, time, memory, threads uint32, mode int)
// BuildBlocks From golang.org/x/crypto/argon2.deriveKey without last deriveKey call
func BuildBlocks(password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []Block {
func BuildBlocks(password, salt []byte, time, memory uint32, threads uint8) []Block {
if time < 1 {
panic("argon2: number of rounds too small")
}
@ -31,7 +31,8 @@ func BuildBlocks(password, salt, secret, data []byte, time, memory uint32, threa
panic("argon2: parallelism degree too low")
}
const mode = 0 /* argon2d */
h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
const keyLen = 0
h0 := initHash(password, salt, nil, nil, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {

View file

@ -4,9 +4,11 @@ package randomx
import (
"encoding/binary"
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
)
const supportsJIT = true
/*
REGISTER ALLOCATION:

View file

@ -2,4 +2,6 @@
package randomx
const supportsJIT = false
var RandomXCodeSize uint64 = 0

View file

@ -30,33 +30,77 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"fmt"
"encoding/hex"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
"os"
"runtime"
"slices"
"strings"
)
import "testing"
var Tests = []struct {
key []byte // key
input []byte // input
expected string // expected result
}{
{[]byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
{[]byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"}, // test a
{[]byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"}, // test b
{[]byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"}, // test c
{[]byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"}, // test d
type testdata struct {
name string
key []byte
input []byte
// expected result, in hex
expected string
}
func mustHex(str string) []byte {
b, err := hex.DecodeString(str)
if err != nil {
panic(err)
}
return b
}
var Tests = []testdata{
{"example", []byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
{"test_a", []byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"},
{"test_b", []byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969"},
{"test_c", []byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"},
{"test_d", []byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"},
{"test_e", []byte("test key 001"), mustHex("0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416"), "c56414121acda1713c2f2a819d8ae38aed7c80c35c2a769298d34f03833cd5f1"},
}
func testFlags(name string, flags Flags) (f Flags, skip bool) {
flags |= GetFlags()
nn := strings.Split(name, "/")
switch nn[len(nn)-1] {
case "interpreter":
flags &^= RANDOMX_FLAG_JIT
case "compiler":
flags |= RANDOMX_FLAG_JIT
if !flags.HasJIT() {
return flags, true
}
case "softaes":
flags &^= RANDOMX_FLAG_HARD_AES
case "hardaes":
flags |= RANDOMX_FLAG_HARD_AES
if aes.NewHardAES() == nil {
return flags, true
}
}
return flags, false
}
func Test_RandomXLight(t *testing.T) {
for _, n := range []string{"interpreter", "compiler", "softaes", "hardaes"} {
t.Run(n, func(t *testing.T) {
tFlags, skip := testFlags(t.Name(), 0)
if skip {
t.Skip("not supported on this platform")
}
c := NewCache(0)
for ix, tt := range Tests {
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
c.Init(tt.key)
c := NewCache(tFlags)
if c == nil {
t.Fatal("nil cache")
}
defer func() {
err := c.Close()
if err != nil {
@ -64,19 +108,35 @@ func Test_RandomXLight(t *testing.T) {
}
}()
dataset := NewLightDataset(c)
dataset.InitDataset(0, DatasetItemCount)
for _, test := range Tests {
t.Run(test.name, func(t *testing.T) {
c.Init(test.key)
vm := NewVM(dataset)
defer vm.Close()
vm, err := NewVM(tFlags, c, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := vm.Close()
if err != nil {
t.Error(err)
}
}()
var output_hash [32]byte
vm.CalculateHash(tt.input, &output_hash)
var outputHash [RANDOMX_HASH_SIZE]byte
actual := fmt.Sprintf("%x", output_hash)
if actual != tt.expected {
t.Errorf("#%d Fib(%v): expected %s, actual %s", ix, tt.key, tt.expected, actual)
vm.CalculateHash(test.input, &outputHash)
outputHex := hex.EncodeToString(outputHash[:])
if outputHex != test.expected {
t.Errorf("key=%v, input=%v", test.key, test.input)
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
t.FailNow()
}
})
}
})
}
}
@ -86,12 +146,18 @@ func Test_RandomXFull(t *testing.T) {
t.Skip("Skipping full mode in CI environment")
}
c := NewCache(0)
for _, n := range []string{"interpreter", "compiler", "softaes", "hardaes"} {
t.Run(n, func(t *testing.T) {
for ix, tt := range Tests {
tFlags, skip := testFlags(t.Name(), RANDOMX_FLAG_FULL_MEM)
if skip {
t.Skip("not supported on this platform")
}
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
c.Init(tt.key)
c := NewCache(tFlags)
if c == nil {
t.Fatal("nil cache")
}
defer func() {
err := c.Close()
if err != nil {
@ -99,44 +165,79 @@ func Test_RandomXFull(t *testing.T) {
}
}()
dataset := NewFullDataset(c)
if dataset == nil {
t.Skip("Skipping full mode in 32-bit environment")
dataset, err := NewDataset(tFlags)
if err != nil {
t.Fatal(err)
}
InitDatasetParallel(dataset, runtime.NumCPU())
defer func() {
err := dataset.Close()
if err != nil {
t.Error(err)
}
}()
vm := NewVM(dataset)
defer vm.Close()
for _, test := range Tests {
t.Run(test.name, func(t *testing.T) {
c.Init(test.key)
dataset.InitDatasetParallel(c, runtime.NumCPU())
var output_hash [32]byte
vm.CalculateHash(tt.input, &output_hash)
vm, err := NewVM(tFlags, nil, dataset)
if err != nil {
t.Fatal(err)
}
defer func() {
err := vm.Close()
if err != nil {
t.Error(err)
}
}()
actual := fmt.Sprintf("%x", output_hash)
if actual != tt.expected {
t.Errorf("#%d Fib(%v): expected %s, actual %s", ix, tt.key, tt.expected, actual)
var outputHash [RANDOMX_HASH_SIZE]byte
vm.CalculateHash(test.input, &outputHash)
outputHex := hex.EncodeToString(outputHash[:])
if outputHex != test.expected {
t.Errorf("key=%v, input=%v", test.key, test.input)
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
t.FailNow()
}
})
// cleanup between runs
runtime.GC()
}
})
// cleanup 2GiB between runs
// cleanup 2 GiB between runs
runtime.GC()
}
}
var BenchmarkTest = Tests[0]
var BenchmarkCache *Cache
var BenchmarkDatasetLight *DatasetLight
var BenchmarkDatasetFull *DatasetFull
var BenchmarkDataset *Dataset
var BenchmarkFlags = GetFlags()
func TestMain(m *testing.M) {
if slices.Contains(os.Args, "-test.bench") {
flags := GetFlags()
flags |= RANDOMX_FLAG_FULL_MEM
var err error
//init light and full dataset
BenchmarkCache = NewCache(0)
BenchmarkCache.Init(BenchmarkTest.key)
BenchmarkDatasetLight = NewLightDataset(BenchmarkCache)
BenchmarkDatasetLight.InitDataset(0, DatasetItemCount)
BenchmarkDatasetFull = NewFullDataset(BenchmarkCache)
InitDatasetParallel(BenchmarkDatasetFull, runtime.NumCPU())
BenchmarkCache = NewCache(flags)
defer BenchmarkCache.Close()
BenchmarkCache.Init(BenchmarkTest.key)
BenchmarkDataset, err = NewDataset(flags | RANDOMX_FLAG_FULL_MEM)
if err != nil {
panic(err)
}
defer BenchmarkDataset.Close()
BenchmarkDataset.InitDatasetParallel(BenchmarkCache, runtime.NumCPU())
}
os.Exit(m.Run())
}
@ -144,7 +245,10 @@ func TestMain(m *testing.M) {
func Benchmark_RandomXLight(b *testing.B) {
b.ReportAllocs()
vm := NewVM(BenchmarkDatasetLight)
vm, err := NewVM(BenchmarkFlags, BenchmarkCache, nil)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
b.ResetTimer()
@ -158,7 +262,10 @@ func Benchmark_RandomXLight(b *testing.B) {
func Benchmark_RandomXFull(b *testing.B) {
b.ReportAllocs()
vm := NewVM(BenchmarkDatasetFull)
vm, err := NewVM(BenchmarkFlags|RANDOMX_FLAG_FULL_MEM, nil, BenchmarkDataset)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
b.ResetTimer()
@ -176,7 +283,10 @@ func Benchmark_RandomXLight_Parallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
var output_hash [32]byte
vm := NewVM(BenchmarkDatasetLight)
vm, err := NewVM(BenchmarkFlags, BenchmarkCache, nil)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
for pb.Next() {
@ -193,7 +303,10 @@ func Benchmark_RandomXFull_Parallel(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
var output_hash [32]byte
vm := NewVM(BenchmarkDatasetFull)
vm, err := NewVM(BenchmarkFlags|RANDOMX_FLAG_FULL_MEM, nil, BenchmarkDataset)
if err != nil {
b.Fatal(err)
}
defer vm.Close()
for pb.Next() {

View file

@ -24,3 +24,6 @@ const RegisterFileSize = RegistersCount*8 + RegistersCountFloat*2*8*3
func (rf *RegisterFile) Memory() *[RegisterFileSize]byte {
return (*[RegisterFileSize]byte)(unsafe.Pointer(rf))
}
func (rf *RegisterFile) Clear() {
clear(rf.Memory()[:])
}

View file

@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
"math/bits"
)
@ -300,11 +300,11 @@ func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *blake2.Gen
type SuperScalarProgram []SuperScalarInstruction
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
func (p SuperScalarProgram) setAddressRegister(addressRegister uint8) {
p[0].Dst = addressRegister
}
func (p SuperScalarProgram) AddressRegister() int {
func (p SuperScalarProgram) AddressRegister() uint8 {
return p[0].Dst
}
func (p SuperScalarProgram) Program() []SuperScalarInstruction {
@ -474,7 +474,7 @@ func BuildSuperScalarProgram(gen *blake2.Generator) SuperScalarProgram {
}
// Set AddressRegister hack
program.setAddressRegister(address_reg)
program.setAddressRegister(uint8(address_reg))
return program
}
@ -555,18 +555,18 @@ const RegisterNeedsDisplacement = 5
const RegisterNeedsSib = 4
func (sins *SuperScalarInstruction) SelectSource(cycle int, registers *[8]Register, gen *blake2.Generator) bool {
availableRegisters := make([]int, 0, 8)
availableRegisters := make([]uint8, 0, 8)
for i := range registers {
if registers[i].Latency <= cycle {
availableRegisters = append(availableRegisters, i)
availableRegisters = append(availableRegisters, uint8(i))
}
}
if len(availableRegisters) == 2 && sins.Opcode == S_IADD_RS {
if availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement {
sins.Src = RegisterNeedsDisplacement
sins.OpGroupPar = sins.Src
sins.OpGroupPar = int(sins.Src)
return true
}
}
@ -576,7 +576,7 @@ func (sins *SuperScalarInstruction) SelectSource(cycle int, registers *[8]Regist
if sins.GroupParIsSource == 0 {
} else {
sins.OpGroupPar = sins.Src
sins.OpGroupPar = int(sins.Src)
}
return true
}
@ -584,21 +584,21 @@ func (sins *SuperScalarInstruction) SelectSource(cycle int, registers *[8]Regist
}
func (sins *SuperScalarInstruction) SelectDestination(cycle int, allowChainedMul bool, Registers *[8]Register, gen *blake2.Generator) bool {
var availableRegisters = make([]int, 0, 8)
var availableRegisters = make([]uint8, 0, 8)
for i := range Registers {
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src) &&
if Registers[i].Latency <= cycle && (sins.CanReuse || uint8(i) != sins.Src) &&
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
availableRegisters = append(availableRegisters, i)
availableRegisters = append(availableRegisters, uint8(i))
}
}
return selectRegister(availableRegisters, gen, &sins.Dst)
}
func selectRegister(availableRegisters []int, gen *blake2.Generator, reg *int) bool {
func selectRegister(availableRegisters []uint8, gen *blake2.Generator, reg *uint8) bool {
index := 0
if len(availableRegisters) == 0 {
return false
@ -617,6 +617,7 @@ func selectRegister(availableRegisters []int, gen *blake2.Generator, reg *int) b
// executeSuperscalar execute the superscalar program
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
//TODO: produce around (14 * 8 * 8) = 896 different opcodes with hardcoded registers
for i := range p {
ins := &p[i]

View file

@ -1,12 +1,12 @@
package randomx
import "git.gammaspectra.live/P2Pool/go-randomx/v3/blake2"
import "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/blake2"
// SuperScalarInstruction superscalar program is built with superscalar instructions
type SuperScalarInstruction struct {
Opcode byte
Dst int
Src int
Dst uint8
Src uint8
Mod byte
Imm32 uint32
Imm64 uint64

View file

@ -41,7 +41,7 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
program = append(program, REX_LEA...)
program = append(program,
byte(0x04+8*dst),
genSIB(int(instr.Imm32), src, dst),
genSIB(int(instr.Imm32), int(src), int(dst)),
)
case S_IMUL_R:
program = append(program, REX_IMUL_RR...)

208
vm.go
View file

@ -30,56 +30,108 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/aes"
"errors"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes"
"math"
"runtime"
"unsafe"
)
import "golang.org/x/crypto/blake2b"
type REG struct {
Hi uint64
Lo uint64
}
type VM struct {
ScratchPad ScratchPad
pad ScratchPad
Dataset Dataset
flags Flags
// buffer first 128 bytes are entropy below rest are program bytes
buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte
hashState [blake2b.Size]byte
registerFile RegisterFile
AES aes.AES
Cache *Cache
Dataset *Dataset
program ByteCode
jitProgram VMProgramFunc
}
func NewVM(dataset Dataset) *VM {
vm := &VM{
Dataset: dataset,
// NewVM Creates and initializes a RandomX virtual machine.
// *
// * @param flags is any combination of these 5 flags (each flag can be set or not set):
// * RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages
// * RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES
// * RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset
// * RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler
// * RANDOMX_FLAG_SECURE - when combined with RANDOMX_FLAG_JIT, the JIT pages are never
// * writable and executable at the same time (W^X policy)
// * The numeric values of the first 4 flags are ordered so that a higher value will provide
// * faster hash calculation and a lower numeric value will provide higher portability.
// * Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest.
// * @param cache is a pointer to an initialized randomx_cache structure. Can be
// * NULL if RANDOMX_FLAG_FULL_MEM is set.
// * @param dataset is a pointer to a randomx_dataset structure. Can be NULL
// * if RANDOMX_FLAG_FULL_MEM is not set.
// *
// * @return Pointer to an initialized randomx_vm structure.
// * Returns NULL if:
// * (1) Scratchpad memory allocation fails.
// * (2) The requested initialization flags are not supported on the current platform.
// * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set
// * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set
// */
func NewVM(flags Flags, cache *Cache, dataset *Dataset) (*VM, error) {
if cache == nil && !flags.Has(RANDOMX_FLAG_FULL_MEM) {
return nil, errors.New("nil cache in light mode")
}
if dataset.Cache().HasJIT() {
if dataset == nil && flags.Has(RANDOMX_FLAG_FULL_MEM) {
return nil, errors.New("nil dataset in full mode")
}
vm := &VM{
Cache: cache,
Dataset: dataset,
flags: flags,
}
if flags.Has(RANDOMX_FLAG_HARD_AES) {
vm.AES = aes.NewHardAES()
}
// fallback
if vm.AES == nil {
vm.AES = aes.NewSoftAES()
}
if flags.HasJIT() {
vm.jitProgram = mapProgram(nil, int(RandomXCodeSize))
if dataset.Flags()&RANDOMX_FLAG_SECURE == 0 {
if !flags.Has(RANDOMX_FLAG_SECURE) {
mapProgramRWX(vm.jitProgram)
}
}
return vm
return vm, nil
}
// run calculate hash based on input. Not thread safe.
// Warning: Underlying callers will run float64 SetRoundingMode directly
// It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions
// Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes
func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
reg.FPRC = roundingMode
func (vm *VM) run() {
// buffer first 128 bytes are entropy below rest are program bytes
var buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte
aes.FillAes4Rx4(inputHash, buffer[:])
vm.AES.FillAes4Rx4(vm.hashState, vm.buffer[:])
entropy := (*[16]uint64)(unsafe.Pointer(&buffer))
entropy := (*[16]uint64)(unsafe.Pointer(&vm.buffer))
// do more initialization before we run
reg := &vm.registerFile
reg.Clear()
// initialize constant registers
for i := range entropy[:8] {
reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i])
}
@ -102,16 +154,14 @@ func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
eMask := [2]uint64{ExponentMask(entropy[14]), ExponentMask(entropy[15])}
prog := buffer[len(entropy)*8:]
prog := vm.buffer[len(entropy)*8:]
CompileProgramToByteCode(prog, &vm.program)
datasetMemory := vm.Dataset.Memory()
var jitProgram VMProgramFunc
if vm.jitProgram != nil {
if datasetMemory == nil {
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
if vm.Dataset == nil { //light mode
if vm.flags.Has(RANDOMX_FLAG_SECURE) {
mapProgramRW(vm.jitProgram)
jitProgram = vm.program.generateCode(vm.jitProgram, nil)
mapProgramRX(vm.jitProgram)
@ -120,7 +170,7 @@ func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
}
} else {
// full mode and we have JIT
if vm.Dataset.Flags()&RANDOMX_FLAG_SECURE > 0 {
if vm.flags.Has(RANDOMX_FLAG_SECURE) {
mapProgramRW(vm.jitProgram)
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
mapProgramRX(vm.jitProgram)
@ -128,14 +178,16 @@ func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
}
vm.jitProgram.ExecuteFull(&reg, &vm.ScratchPad, &datasetMemory[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
return reg
vm.jitProgram.ExecuteFull(reg, &vm.pad, &vm.Dataset.Memory()[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
return
}
}
spAddr0 := uint64(mx)
spAddr1 := uint64(ma)
var rlCache RegisterLine
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]]
@ -146,15 +198,15 @@ func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
//TODO: optimize these loads!
for i := uint64(0); i < RegistersCount; i++ {
reg.R[i] ^= vm.ScratchPad.Load64(uint32(spAddr0 + 8*i))
reg.R[i] ^= vm.pad.Load64(uint32(spAddr0 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
reg.F[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*i))
reg.F[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*i))
}
for i := uint64(0); i < RegistersCountFloat; i++ {
reg.E[i] = vm.ScratchPad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
reg.E[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat)))
reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], eMask[LOW])
reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH])
@ -163,97 +215,119 @@ func (vm *VM) run(inputHash [64]byte, roundingMode uint8) (reg RegisterFile) {
// run the actual bytecode
if jitProgram != nil {
// light mode
jitProgram.Execute(&reg, &vm.ScratchPad, eMask)
jitProgram.Execute(reg, &vm.pad, eMask)
} else {
vm.program.Execute(&reg, &vm.ScratchPad, eMask)
vm.program.Execute(reg, &vm.pad, eMask)
}
mx ^= uint32(reg.R[readReg[2]] ^ reg.R[readReg[3]])
mx &= uint32(CacheLineAlignMask)
vm.Dataset.PrefetchDataset(datasetOffset + uint64(mx))
// execute / load output from diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(datasetOffset+uint64(ma), &reg.R)
if vm.Dataset != nil {
// full mode
vm.Dataset.prefetchDataset(datasetOffset + uint64(mx))
// load output from superscalar program to get dataset 64 bytes
vm.Dataset.readDataset(datasetOffset+uint64(ma), &reg.R)
} else {
// light mode
// execute output from superscalar program to get dataset 64 bytes
vm.Cache.initDataset(&rlCache, (datasetOffset+uint64(ma))/CacheLineSize)
for i := range reg.R {
reg.R[i] ^= rlCache[i]
}
}
// swap the elements
mx, ma = ma, mx
for i := uint64(0); i < RegistersCount; i++ {
vm.ScratchPad.Store64(uint32(spAddr1+8*i), reg.R[i])
vm.pad.Store64(uint32(spAddr1+8*i), reg.R[i])
}
for i := uint64(0); i < RegistersCountFloat; i++ {
reg.F[i][LOW] = Xor(reg.F[i][LOW], reg.E[i][LOW])
reg.F[i][HIGH] = Xor(reg.F[i][HIGH], reg.E[i][HIGH])
vm.ScratchPad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW]))
vm.ScratchPad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH]))
vm.pad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW]))
vm.pad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH]))
}
spAddr0 = 0
spAddr1 = 0
}
runtime.KeepAlive(buffer)
return reg
}
func (vm *VM) initScratchpad(seed *[64]byte) {
vm.ScratchPad.Init(seed)
clear(vm.pad[:])
vm.AES.FillAes1Rx4(seed, vm.pad[:])
}
func (vm *VM) runLoops(tempHash [64]byte) RegisterFile {
func (vm *VM) runLoops() {
if lockThreadDueToRoundingMode {
// Lock thread due to rounding mode flags
runtime.LockOSThread()
defer runtime.UnlockOSThread()
}
roundingMode := uint8(0)
// always force a restore
ResetRoundingMode(&vm.registerFile)
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
reg := vm.run(tempHash, roundingMode)
roundingMode = reg.FPRC
vm.run()
// write R, F, E, A registers
tempHash = blake2b.Sum512(reg.Memory()[:])
runtime.KeepAlive(reg)
vm.hashState = blake2b.Sum512(vm.registerFile.Memory()[:])
}
// final loop executes here
reg := vm.run(tempHash, roundingMode)
// always force a restore
reg.FPRC = 0xff
vm.run()
// restore rounding mode to 0
SetRoundingMode(&reg, 0)
return reg
// restore rounding mode
ResetRoundingMode(&vm.registerFile)
}
// CalculateHash Not thread safe.
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
tempHash := blake2b.Sum512(input)
// SetCache Reinitializes a virtual machine with a new Cache.
// This function should be called anytime the Cache is reinitialized with a new key.
// Does nothing if called with a Cache containing the same key value as already set.
// VM must be initialized without RANDOMX_FLAG_FULL_MEM.
func (vm *VM) SetCache(cache *Cache) {
if vm.flags.Has(RANDOMX_FLAG_FULL_MEM) {
panic("unsupported")
}
vm.Cache = cache
//todo
}
vm.initScratchpad(&tempHash)
// SetDataset Reinitializes a virtual machine with a new Dataset.
// VM must be initialized with RANDOMX_FLAG_FULL_MEM.
func (vm *VM) SetDataset(dataset *Dataset) {
if !vm.flags.Has(RANDOMX_FLAG_FULL_MEM) {
panic("unsupported")
}
vm.Dataset = dataset
}
reg := vm.runLoops(tempHash)
// CalculateHash Calculates a RandomX hash value.
func (vm *VM) CalculateHash(input []byte, output *[RANDOMX_HASH_SIZE]byte) {
vm.hashState = blake2b.Sum512(input)
vm.initScratchpad(&vm.hashState)
vm.runLoops()
// now hash the scratch pad as it will act as register A
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
regMem := reg.Memory()
regMem := vm.registerFile.Memory()
// write hash onto register A
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], tempHash[:])
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:])
// write R, F, E, A registers
*output = blake2b.Sum256(regMem[:])
runtime.KeepAlive(reg)
}
// Close Releases all memory occupied by the structure.
func (vm *VM) Close() error {
if vm.jitProgram != nil {
return vm.jitProgram.Close()

View file

@ -3,7 +3,7 @@
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/asm"
"git.gammaspectra.live/P2Pool/go-randomx/v3/internal/asm"
"math"
"math/bits"
)
@ -130,3 +130,8 @@ func SetRoundingMode(f *RegisterFile, mode uint8) {
f.FPRC = mode
asm.SetRoundingMode(mode)
}
func ResetRoundingMode(f *RegisterFile) {
f.FPRC = 0
asm.SetRoundingMode(uint8(0))
}

View file

@ -125,3 +125,7 @@ const lockThreadDueToRoundingMode = false
func SetRoundingMode(f *RegisterFile, mode uint8) {
f.FPRC = mode
}
func ResetRoundingMode(f *RegisterFile) {
f.FPRC = 0
}

View file

@ -30,7 +30,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import (
"git.gammaspectra.live/P2Pool/go-randomx/v3/aes"
"unsafe"
)
import "encoding/binary"
@ -359,15 +358,11 @@ func CompileProgramToByteCode(prog []byte, bc *ByteCode) {
type ScratchPad [ScratchpadSize]byte
func (pad *ScratchPad) Init(seed *[64]byte) {
// calculate and fill scratchpad
clear(pad[:])
aes.FillAes1Rx4(seed, pad[:])
}
func (pad *ScratchPad) Store64(addr uint32, val uint64) {
*(*uint64)(unsafe.Pointer(&pad[addr])) = val
//binary.LittleEndian.PutUint64(pad[addr:], val)
}
func (pad *ScratchPad) Load64(addr uint32) uint64 {
return *(*uint64)(unsafe.Pointer(&pad[addr]))
}