Include superscalar JIT for amd64, optimize memory allocations

This commit is contained in:
DataHoarder 2024-04-12 02:00:46 +02:00
parent 9214202406
commit 0a681cd2da
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
15 changed files with 411 additions and 83 deletions

View file

@ -8,4 +8,17 @@ This package implements RandomX without CGO, using only Golang code, pure float6
All test cases pass properly.
Supports `386` `amd64` `arm64` platforms due to rounding mode set via assembly. More can be added with supporting rounding mode under _asm_.
Uses minimal Go assembly due to having to set rounding mode natively. Support can be added with supporting rounding mode under _asm_.
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime.
| Platform | Supported | SuperScalar JIT | Notes |
|:-----------:|:---------:|:---------------:|:----------------:|
| **386** | ✅ | ❌ | |
| **amd64** | ✅ | ✅* | JIT only on Unix |
| **arm** | ❌ | - | |
| **arm64** | ✅ | ❌ | |
| **mips** | ❌ | - | |
| **mips64** | ❌ | - | |
| **riscv64** | ❌ | - | |
| **wasm** | ❌ | - | |

View file

@ -16,10 +16,19 @@ type Randomx_Cache struct {
Blocks []MemoryBlock
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
JitPrograms [RANDOMX_PROGRAM_COUNT]ProgramFunc
Flags uint64
}
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
return &Randomx_Cache{}
if flags == RANDOMX_FLAG_DEFAULT {
flags = RANDOMX_FLAG_JIT
}
return &Randomx_Cache{
Flags: flags,
}
}
func (cache *Randomx_Cache) VM_Initialize() *VM {
@ -31,6 +40,18 @@ func (cache *Randomx_Cache) VM_Initialize() *VM {
}
}
func (cache *Randomx_Cache) Close() error {
for _, p := range cache.JitPrograms {
if p != nil {
err := p.Close()
if err != nil {
return err
}
}
}
return nil
}
func (cache *Randomx_Cache) Init(key []byte) {
//fmt.Printf("appending null byte is not necessary but only done for testing")
kkey := append([]byte{}, key...)
@ -41,6 +62,16 @@ func (cache *Randomx_Cache) Init(key []byte) {
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
cache.Blocks = memoryBlocks
nonce := uint32(0) //uint32(len(key))
gen := Init_Blake2Generator(key, nonce)
for i := 0; i < 8; i++ {
cache.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
cache.JitPrograms[i] = generateSuperscalarCode(cache.Programs[i])
}
}
}
// GetMixBlock fetch a 64 byte block in uint64 form
@ -64,8 +95,7 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
const superscalarAdd6 uint64 = 3398623926847679864
const superscalarAdd7 uint64 = 9549104520008361294
register_value := itemNumber
_ = register_value
registerValue := itemNumber
rl[0] = (itemNumber + 1) * superscalarMul0
rl[1] = rl[0] ^ superscalarAdd1
@ -76,23 +106,39 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
rl[6] = rl[0] ^ superscalarAdd6
rl[7] = rl[0] ^ superscalarAdd7
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(register_value)
if cache.JitPrograms[0] != nil {
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(registerValue)
program := cache.Programs[i]
cache.JitPrograms[i].Execute(rl)
executeSuperscalar(program.Program(), rl)
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[cache.Programs[i].AddressRegister()]
for q := range rl {
rl[q] ^= mix[q]
}
} else {
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(registerValue)
register_value = rl[program.AddressRegister()]
program := cache.Programs[i]
executeSuperscalar(program.Program(), rl)
for q := range rl {
rl[q] ^= mix[q]
}
registerValue = rl[program.AddressRegister()]
}
}
}
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
panic("todo")
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
cache.InitDatasetItem(&dataset[0], itemNumber)
}

View file

@ -67,7 +67,7 @@ const RANDOMX_JUMP_OFFSET = 8
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
const SuperscalarMaxSize int = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
const SuperscalarMaxSize = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64
const CacheLineSize uint64 = RANDOMX_DATASET_ITEM_SIZE
const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3
@ -102,9 +102,8 @@ const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1
const RANDOMX_FLAG_DEFAULT = 0
const RANDOMX_FLAG_JIT = 1
const RANDOMX_FLAG_LARGE_PAGES = 2
const RANDOMX_FLAG_DEFAULT = uint64(0)
const RANDOMX_FLAG_JIT = uint64(1 << iota)
func isZeroOrPowerOf2(x uint32) bool {
return (x & (x - 1)) == 0

View file

@ -2,6 +2,6 @@ package randomx
type Randomx_Dataset interface {
InitDataset(startItem, endItem uint64)
ReadDataset(address uint64, r *RegisterLine)
ReadDataset(address uint64, r, cache *RegisterLine)
PrefetchDataset(address uint64)
}

View file

@ -9,13 +9,11 @@ func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
}
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *RegisterLine) {
var out RegisterLine
d.Cache.InitDatasetItem(&out, address/CacheLineSize)
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLine) {
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
for i := range r {
r[i] ^= out[i]
r[i] ^= cache[i]
}
}

3
exec.go Normal file
View file

@ -0,0 +1,3 @@
package randomx
type ProgramFunc []byte

11
exec_generic.go Normal file
View file

@ -0,0 +1,11 @@
//go:build !unix || disable_jit
package randomx
func (f ProgramFunc) Execute(rl *RegisterLine) {
}
func (f ProgramFunc) Close() error {
}

41
exec_mmap_unix.go Normal file
View file

@ -0,0 +1,41 @@
//go:build unix && !disable_jit
package randomx
import (
"golang.org/x/sys/unix"
"unsafe"
)
func (f ProgramFunc) Execute(rl *RegisterLine) {
memoryPtr := &f
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
fun(rl)
}
func (f ProgramFunc) Close() error {
return unix.Munmap(f)
}
func mapProgram(program []byte) ProgramFunc {
execFunc, err := unix.Mmap(
-1,
0,
len(program),
unix.PROT_READ|unix.PROT_WRITE|unix.PROT_EXEC,
unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
if err != nil {
panic(err)
}
copy(execFunc, program)
// Remove PROT_WRITE
err = unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_EXEC)
if err != nil {
panic(err)
}
return execFunc
}

2
go.mod
View file

@ -4,4 +4,4 @@ go 1.21
require golang.org/x/crypto v0.22.0
require golang.org/x/sys v0.19.0 // indirect
require golang.org/x/sys v0.19.0

40
go.sum
View file

@ -1,44 +1,4 @@
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View file

@ -55,16 +55,17 @@ func Test_Randomx(t *testing.T) {
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
c.Init(tt.key)
defer func() {
err := c.Close()
if err != nil {
t.Error(err)
}
}()
nonce := uint32(0) //uint32(len(key))
gen := Init_Blake2Generator(tt.key, nonce)
for i := 0; i < 8; i++ {
c.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
}
vm := c.VM_Initialize()
var output_hash [32]byte
vm.CalculateHash(tt.input, output_hash[:])
vm.CalculateHash(tt.input, &output_hash)
actual := fmt.Sprintf("%x", output_hash)
if actual != tt.expected {
@ -83,18 +84,19 @@ func Benchmark_RandomX(b *testing.B) {
c := Randomx_alloc_cache(0)
c.Init(tt.key)
defer func() {
err := c.Close()
if err != nil {
b.Error(err)
}
}()
nonce := uint32(0) //uint32(len(key))
gen := Init_Blake2Generator(tt.key, nonce)
for i := 0; i < 8; i++ {
c.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
}
vm := c.VM_Initialize()
b.ResetTimer()
for i := 0; i < b.N; i++ {
var output_hash [32]byte
vm.CalculateHash(tt.input, output_hash[:])
vm.CalculateHash(tt.input, &output_hash)
runtime.KeepAlive(output_hash)
}
}

View file

@ -398,7 +398,8 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
//fmt.Printf("q %s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = gen.GetByte()
sins.Imm32 = uint32((sins.Mod & 0b1100) >> 2) // bits 2-3
// set modshift on Imm32
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
//sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1

248
superscalar_amd64.go Normal file
View file

@ -0,0 +1,248 @@
//go:build unix && amd64 && !disable_jit
package randomx
import (
"encoding/binary"
)
const MaxRandomXInstrCodeSize = 32 //FDIV_M requires up to 32 bytes of x86 code
const MaxSuperscalarInstrSize = 14 //IMUL_RCP requires 14 bytes of x86 code
const SuperscalarProgramHeader = 128 //overhead per superscalar program
const CodeAlign = 4096 //align code size to a multiple of 4 KiB
const ReserveCodeSize = CodeAlign //function prologue/epilogue + reserve
func alignSize[T ~uintptr | ~uint32 | ~uint64 | ~int64 | ~int32 | ~int](pos, align T) T {
return ((pos-1)/align + 1) * align
}
var RandomXCodeSize = alignSize[uint64](ReserveCodeSize+MaxRandomXInstrCodeSize*RANDOMX_PROGRAM_SIZE, CodeAlign)
var SuperscalarSize = alignSize[uint64](ReserveCodeSize+(SuperscalarProgramHeader+MaxSuperscalarInstrSize*SuperscalarMaxSize)*RANDOMX_CACHE_ACCESSES, CodeAlign)
var CodeSize = uint32(RandomXCodeSize + SuperscalarSize)
var superScalarHashOffset = int32(RandomXCodeSize)
var REX_ADD_RR = []byte{0x4d, 0x03}
var REX_ADD_RM = []byte{0x4c, 0x03}
var REX_SUB_RR = []byte{0x4d, 0x2b}
var REX_SUB_RM = []byte{0x4c, 0x2b}
var REX_MOV_RR = []byte{0x41, 0x8b}
var REX_MOV_RR64 = []byte{0x49, 0x8b}
var REX_MOV_R64R = []byte{0x4c, 0x8b}
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
var REX_IMUL_RRI = []byte{0x4d, 0x69}
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
var REX_MUL_R = []byte{0x49, 0xf7}
var REX_MUL_M = []byte{0x48, 0xf7}
var REX_81 = []byte{0x49, 0x81}
var AND_EAX_I byte = 0x25
var MOV_EAX_I byte = 0xb8
var MOV_RAX_I = []byte{0x48, 0xb8}
var MOV_RCX_I = []byte{0x48, 0xb9}
var REX_LEA = []byte{0x4f, 0x8d}
var REX_MUL_MEM = []byte{0x48, 0xf7, 0x24, 0x0e}
var REX_IMUL_MEM = []byte{0x48, 0xf7, 0x2c, 0x0e}
var REX_SHR_RAX = []byte{0x48, 0xc1, 0xe8}
var RAX_ADD_SBB_1 = []byte{0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00}
var MUL_RCX = []byte{0x48, 0xf7, 0xe1}
var REX_SHR_RDX = []byte{0x48, 0xc1, 0xea}
var REX_SH = []byte{0x49, 0xc1}
var MOV_RCX_RAX_SAR_RCX_63 = []byte{0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f}
var AND_ECX_I = []byte{0x81, 0xe1}
var ADD_RAX_RCX = []byte{0x48, 0x01, 0xC8}
var SAR_RAX_I8 = []byte{0x48, 0xC1, 0xF8}
var NEG_RAX = []byte{0x48, 0xF7, 0xD8}
var ADD_R_RAX = []byte{0x4C, 0x03}
var XOR_EAX_EAX = []byte{0x33, 0xC0}
var ADD_RDX_R = []byte{0x4c, 0x01}
var SUB_RDX_R = []byte{0x4c, 0x29}
var SAR_RDX_I8 = []byte{0x48, 0xC1, 0xFA}
var TEST_RDX_RDX = []byte{0x48, 0x85, 0xD2}
var SETS_AL_ADD_RDX_RAX = []byte{0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0}
var REX_NEG = []byte{0x49, 0xF7}
var REX_XOR_RR = []byte{0x4D, 0x33}
var REX_XOR_RI = []byte{0x49, 0x81}
var REX_XOR_RM = []byte{0x4c, 0x33}
var REX_ROT_CL = []byte{0x49, 0xd3}
var REX_ROT_I8 = []byte{0x49, 0xc1}
var SHUFPD = []byte{0x66, 0x0f, 0xc6}
var REX_ADDPD = []byte{0x66, 0x41, 0x0f, 0x58}
var REX_CVTDQ2PD_XMM12 = []byte{0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06}
var REX_SUBPD = []byte{0x66, 0x41, 0x0f, 0x5c}
var REX_XORPS = []byte{0x41, 0x0f, 0x57}
var REX_MULPD = []byte{0x66, 0x41, 0x0f, 0x59}
var REX_MAXPD = []byte{0x66, 0x41, 0x0f, 0x5f}
var REX_DIVPD = []byte{0x66, 0x41, 0x0f, 0x5e}
var SQRTPD = []byte{0x66, 0x0f, 0x51}
var AND_OR_MOV_LDMXCSR = []byte{0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58}
var ROL_RAX = []byte{0x48, 0xc1, 0xc0}
var XOR_ECX_ECX = []byte{0x33, 0xC9}
var REX_CMP_R32I = []byte{0x41, 0x81}
var REX_CMP_M32I = []byte{0x81, 0x3c, 0x06}
var MOVAPD = []byte{0x66, 0x0f, 0x29}
var REX_MOV_MR = []byte{0x4c, 0x89}
var REX_XOR_EAX = []byte{0x41, 0x33}
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
var JNZ = []byte{0x0f, 0x85}
var JMP = 0xe9
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
var REX_XCHG = []byte{0x4d, 0x87}
var REX_ANDPS_XMM12 = []byte{0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6}
var REX_PADD = []byte{0x66, 0x44, 0x0f}
var PADD_OPCODES = []byte{0xfc, 0xfd, 0xfe, 0xd4}
var CALL = 0xe8
var REX_ADD_I = []byte{0x49, 0x81}
var REX_TEST = []byte{0x49, 0xF7}
var JZ = []byte{0x0f, 0x84}
var JZ_SHORT = 0x74
var RET byte = 0xc3
var LEA_32 = []byte{0x41, 0x8d}
var MOVNTI = []byte{0x4c, 0x0f, 0xc3}
var ADD_EBX_I = []byte{0x81, 0xc3}
var NOP1 = []byte{0x90}
var NOP2 = []byte{0x66, 0x90}
var NOP3 = []byte{0x66, 0x66, 0x90}
var NOP4 = []byte{0x0F, 0x1F, 0x40, 0x00}
var NOP5 = []byte{0x0F, 0x1F, 0x44, 0x00, 0x00}
var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
func genSIB(scale, index, base int) byte {
return byte((scale << 6) | (index << 3) | base)
}
/*
push rbp
push rbx
push rsi
push r12
push r13
push r14
push r15
mov rbp,rsp
sub rsp,(0x8*7)
mov rsi, rax; # register dataset
prefetchnta byte ptr [rsi]
mov r8, qword ptr [rsi+0]
mov r9, qword ptr [rsi+8]
mov r10, qword ptr [rsi+16]
mov r11, qword ptr [rsi+24]
mov r12, qword ptr [rsi+32]
mov r13, qword ptr [rsi+40]
mov r14, qword ptr [rsi+48]
mov r15, qword ptr [rsi+56]
*/
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
/*
prefetchw byte ptr [rsi]
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9
mov qword ptr [rsi+16], r10
mov qword ptr [rsi+24], r11
mov qword ptr [rsi+32], r12
mov qword ptr [rsi+40], r13
mov qword ptr [rsi+48], r14
mov qword ptr [rsi+56], r15
add rsp,(0x8*7)
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rbx
pop rbp
ret
*/
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
// generateSuperscalarCode
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
var program []byte
program = append(program, codeInitBlock...)
p := scalarProgram.Program()
for i := range p {
instr := &p[i]
dst := instr.Dst_Reg % REGISTERSCOUNT
src := instr.Src_Reg % REGISTERSCOUNT
switch instr.Opcode {
case S_ISUB_R:
program = append(program, REX_SUB_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IXOR_R:
program = append(program, REX_XOR_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IADD_RS:
program = append(program, REX_LEA...)
program = append(program,
byte(0x04+8*dst),
genSIB(int(instr.Imm32), src, dst),
)
case S_IMUL_R:
program = append(program, REX_IMUL_RR...)
program = append(program, byte(0xc0+8*dst+src))
case S_IROR_C:
program = append(program, REX_ROT_I8...)
program = append(program,
byte(0xc8+dst),
byte(instr.Imm32&63),
)
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
program = append(program, REX_81...)
program = append(program, byte(0xc0+dst))
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
//TODO: align NOP on C8/C9
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
program = append(program, REX_XOR_RI...)
program = append(program, byte(0xf0+dst))
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
//TODO: align NOP on C8/C9
case S_IMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, byte(0xc0+dst))
program = append(program, REX_MUL_R...)
program = append(program, byte(0xe0+src))
program = append(program, REX_MOV_R64R...)
program = append(program, byte(0xc2+8*dst))
case S_ISMULH_R:
program = append(program, REX_MOV_RR64...)
program = append(program, byte(0xc0+dst))
program = append(program, REX_MUL_R...)
program = append(program, byte(0xe8+src))
program = append(program, REX_MOV_R64R...)
program = append(program, byte(0xc2+8*dst))
case S_IMUL_RCP:
program = append(program, MOV_RAX_I...)
program = binary.LittleEndian.AppendUint64(program, randomx_reciprocal(instr.Imm32))
program = append(program, REX_IMUL_RM...)
program = append(program, byte(0xc0+8*instr.Dst_Reg))
default:
panic("unreachable")
}
}
program = append(program, codeRetBlock...)
return mapProgram(program)
}

8
superscalar_noasm.go Normal file
View file

@ -0,0 +1,8 @@
//go:build !unix || !amd64 || disable_jit
package randomx
// generateSuperscalarCode
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
return nil
}

14
vm.go
View file

@ -137,6 +137,8 @@ func (vm *VM) Run(input_hash []byte) {
spAddr0 := vm.mem.mx
spAddr1 := vm.mem.ma
var rlCache RegisterLine
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := vm.reg.r[vm.config.readReg0] ^ vm.reg.r[vm.config.readReg1]
@ -169,7 +171,7 @@ func (vm *VM) Run(input_hash []byte) {
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
// execute diffuser superscalar program to get dataset 64 bytes
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r)
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r, &rlCache)
// swap the elements
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
@ -193,7 +195,7 @@ func (vm *VM) Run(input_hash []byte) {
}
func (vm *VM) CalculateHash(input []byte, output []byte) {
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
var buf [8]byte
// Lock thread due to rounding mode flags
@ -244,7 +246,7 @@ func (vm *VM) CalculateHash(input []byte, output []byte) {
hash512.Write(buf[:])
}
temp_hash = hash512.Sum(nil)
temp_hash = hash512.Sum(input_hash[:0])
//fmt.Printf("%d temphash %x\n", chain, temp_hash)
}
@ -280,11 +282,7 @@ func (vm *VM) CalculateHash(input []byte, output []byte) {
// copy temp_hash as it first copied to register and then hashed
hash256.Write(temp_hash)
final_hash := hash256.Sum(nil)
copy(output, final_hash)
//fmt.Printf("final %x\n", final_hash)
hash256.Sum(output[:0])
}
/*