Include superscalar JIT for amd64, optimize memory allocations
This commit is contained in:
parent
9214202406
commit
0a681cd2da
15
README.md
15
README.md
|
@ -8,4 +8,17 @@ This package implements RandomX without CGO, using only Golang code, pure float6
|
|||
|
||||
All test cases pass properly.
|
||||
|
||||
Supports `386` `amd64` `arm64` platforms due to rounding mode set via assembly. More can be added with supporting rounding mode under _asm_.
|
||||
Uses minimal Go assembly due to having to set rounding mode natively. Support can be added with supporting rounding mode under _asm_.
|
||||
|
||||
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime.
|
||||
|
||||
| Platform | Supported | SuperScalar JIT | Notes |
|
||||
|:-----------:|:---------:|:---------------:|:----------------:|
|
||||
| **386** | ✅ | ❌ | |
|
||||
| **amd64** | ✅ | ✅* | JIT only on Unix |
|
||||
| **arm** | ❌ | - | |
|
||||
| **arm64** | ✅ | ❌ | |
|
||||
| **mips** | ❌ | - | |
|
||||
| **mips64** | ❌ | - | |
|
||||
| **riscv64** | ❌ | - | |
|
||||
| **wasm** | ❌ | - | |
|
||||
|
|
66
cache.go
66
cache.go
|
@ -16,10 +16,19 @@ type Randomx_Cache struct {
|
|||
Blocks []MemoryBlock
|
||||
|
||||
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
|
||||
|
||||
JitPrograms [RANDOMX_PROGRAM_COUNT]ProgramFunc
|
||||
|
||||
Flags uint64
|
||||
}
|
||||
|
||||
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
|
||||
return &Randomx_Cache{}
|
||||
if flags == RANDOMX_FLAG_DEFAULT {
|
||||
flags = RANDOMX_FLAG_JIT
|
||||
}
|
||||
return &Randomx_Cache{
|
||||
Flags: flags,
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) VM_Initialize() *VM {
|
||||
|
@ -31,6 +40,18 @@ func (cache *Randomx_Cache) VM_Initialize() *VM {
|
|||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Close() error {
|
||||
for _, p := range cache.JitPrograms {
|
||||
if p != nil {
|
||||
err := p.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Init(key []byte) {
|
||||
//fmt.Printf("appending null byte is not necessary but only done for testing")
|
||||
kkey := append([]byte{}, key...)
|
||||
|
@ -41,6 +62,16 @@ func (cache *Randomx_Cache) Init(key []byte) {
|
|||
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
|
||||
|
||||
cache.Blocks = memoryBlocks
|
||||
|
||||
nonce := uint32(0) //uint32(len(key))
|
||||
gen := Init_Blake2Generator(key, nonce)
|
||||
for i := 0; i < 8; i++ {
|
||||
cache.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
|
||||
if cache.Flags&RANDOMX_FLAG_JIT > 0 {
|
||||
cache.JitPrograms[i] = generateSuperscalarCode(cache.Programs[i])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// GetMixBlock fetch a 64 byte block in uint64 form
|
||||
|
@ -64,8 +95,7 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
|
|||
const superscalarAdd6 uint64 = 3398623926847679864
|
||||
const superscalarAdd7 uint64 = 9549104520008361294
|
||||
|
||||
register_value := itemNumber
|
||||
_ = register_value
|
||||
registerValue := itemNumber
|
||||
|
||||
rl[0] = (itemNumber + 1) * superscalarMul0
|
||||
rl[1] = rl[0] ^ superscalarAdd1
|
||||
|
@ -76,23 +106,39 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
|
|||
rl[6] = rl[0] ^ superscalarAdd6
|
||||
rl[7] = rl[0] ^ superscalarAdd7
|
||||
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(register_value)
|
||||
if cache.JitPrograms[0] != nil {
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
program := cache.Programs[i]
|
||||
cache.JitPrograms[i].Execute(rl)
|
||||
|
||||
executeSuperscalar(program.Program(), rl)
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
registerValue = rl[cache.Programs[i].AddressRegister()]
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
register_value = rl[program.AddressRegister()]
|
||||
program := cache.Programs[i]
|
||||
|
||||
executeSuperscalar(program.Program(), rl)
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
registerValue = rl[program.AddressRegister()]
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
|
||||
panic("todo")
|
||||
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
|
||||
cache.InitDatasetItem(&dataset[0], itemNumber)
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ const RANDOMX_JUMP_OFFSET = 8
|
|||
|
||||
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
|
||||
|
||||
const SuperscalarMaxSize int = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
|
||||
const SuperscalarMaxSize = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
|
||||
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64
|
||||
const CacheLineSize uint64 = RANDOMX_DATASET_ITEM_SIZE
|
||||
const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3
|
||||
|
@ -102,9 +102,8 @@ const staticExponentBits = 4
|
|||
const constExponentBits uint64 = 0x300
|
||||
const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1
|
||||
|
||||
const RANDOMX_FLAG_DEFAULT = 0
|
||||
const RANDOMX_FLAG_JIT = 1
|
||||
const RANDOMX_FLAG_LARGE_PAGES = 2
|
||||
const RANDOMX_FLAG_DEFAULT = uint64(0)
|
||||
const RANDOMX_FLAG_JIT = uint64(1 << iota)
|
||||
|
||||
func isZeroOrPowerOf2(x uint32) bool {
|
||||
return (x & (x - 1)) == 0
|
||||
|
|
|
@ -2,6 +2,6 @@ package randomx
|
|||
|
||||
type Randomx_Dataset interface {
|
||||
InitDataset(startItem, endItem uint64)
|
||||
ReadDataset(address uint64, r *RegisterLine)
|
||||
ReadDataset(address uint64, r, cache *RegisterLine)
|
||||
PrefetchDataset(address uint64)
|
||||
}
|
||||
|
|
|
@ -9,13 +9,11 @@ func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
|
|||
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *RegisterLine) {
|
||||
var out RegisterLine
|
||||
|
||||
d.Cache.InitDatasetItem(&out, address/CacheLineSize)
|
||||
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLine) {
|
||||
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
|
||||
|
||||
for i := range r {
|
||||
r[i] ^= out[i]
|
||||
r[i] ^= cache[i]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
11
exec_generic.go
Normal file
11
exec_generic.go
Normal file
|
@ -0,0 +1,11 @@
|
|||
//go:build !unix || disable_jit
|
||||
|
||||
package randomx
|
||||
|
||||
func (f ProgramFunc) Execute(rl *RegisterLine) {
|
||||
|
||||
}
|
||||
|
||||
func (f ProgramFunc) Close() error {
|
||||
|
||||
}
|
41
exec_mmap_unix.go
Normal file
41
exec_mmap_unix.go
Normal file
|
@ -0,0 +1,41 @@
|
|||
//go:build unix && !disable_jit
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func (f ProgramFunc) Execute(rl *RegisterLine) {
|
||||
memoryPtr := &f
|
||||
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
|
||||
|
||||
fun(rl)
|
||||
}
|
||||
|
||||
func (f ProgramFunc) Close() error {
|
||||
return unix.Munmap(f)
|
||||
}
|
||||
|
||||
func mapProgram(program []byte) ProgramFunc {
|
||||
execFunc, err := unix.Mmap(
|
||||
-1,
|
||||
0,
|
||||
len(program),
|
||||
unix.PROT_READ|unix.PROT_WRITE|unix.PROT_EXEC,
|
||||
unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
copy(execFunc, program)
|
||||
|
||||
// Remove PROT_WRITE
|
||||
err = unix.Mprotect(execFunc, unix.PROT_READ|unix.PROT_EXEC)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return execFunc
|
||||
}
|
2
go.mod
2
go.mod
|
@ -4,4 +4,4 @@ go 1.21
|
|||
|
||||
require golang.org/x/crypto v0.22.0
|
||||
|
||||
require golang.org/x/sys v0.19.0 // indirect
|
||||
require golang.org/x/sys v0.19.0
|
||||
|
|
40
go.sum
40
go.sum
|
@ -1,44 +1,4 @@
|
|||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||
golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
|
||||
golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
|
||||
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||
golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
|
|
|
@ -55,16 +55,17 @@ func Test_Randomx(t *testing.T) {
|
|||
|
||||
t.Run(string(tt.key)+"_____"+string(tt.input), func(t *testing.T) {
|
||||
c.Init(tt.key)
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
nonce := uint32(0) //uint32(len(key))
|
||||
gen := Init_Blake2Generator(tt.key, nonce)
|
||||
for i := 0; i < 8; i++ {
|
||||
c.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
|
||||
}
|
||||
vm := c.VM_Initialize()
|
||||
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, output_hash[:])
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
|
||||
actual := fmt.Sprintf("%x", output_hash)
|
||||
if actual != tt.expected {
|
||||
|
@ -83,18 +84,19 @@ func Benchmark_RandomX(b *testing.B) {
|
|||
c := Randomx_alloc_cache(0)
|
||||
|
||||
c.Init(tt.key)
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
nonce := uint32(0) //uint32(len(key))
|
||||
gen := Init_Blake2Generator(tt.key, nonce)
|
||||
for i := 0; i < 8; i++ {
|
||||
c.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
|
||||
}
|
||||
vm := c.VM_Initialize()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, output_hash[:])
|
||||
vm.CalculateHash(tt.input, &output_hash)
|
||||
runtime.KeepAlive(output_hash)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -398,7 +398,8 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
|
|||
//fmt.Printf("q %s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = gen.GetByte()
|
||||
sins.Imm32 = uint32((sins.Mod & 0b1100) >> 2) // bits 2-3
|
||||
// set modshift on Imm32
|
||||
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
|
||||
//sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
|
|
248
superscalar_amd64.go
Normal file
248
superscalar_amd64.go
Normal file
|
@ -0,0 +1,248 @@
|
|||
//go:build unix && amd64 && !disable_jit
|
||||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
const MaxRandomXInstrCodeSize = 32 //FDIV_M requires up to 32 bytes of x86 code
|
||||
const MaxSuperscalarInstrSize = 14 //IMUL_RCP requires 14 bytes of x86 code
|
||||
const SuperscalarProgramHeader = 128 //overhead per superscalar program
|
||||
const CodeAlign = 4096 //align code size to a multiple of 4 KiB
|
||||
const ReserveCodeSize = CodeAlign //function prologue/epilogue + reserve
|
||||
|
||||
func alignSize[T ~uintptr | ~uint32 | ~uint64 | ~int64 | ~int32 | ~int](pos, align T) T {
|
||||
return ((pos-1)/align + 1) * align
|
||||
}
|
||||
|
||||
var RandomXCodeSize = alignSize[uint64](ReserveCodeSize+MaxRandomXInstrCodeSize*RANDOMX_PROGRAM_SIZE, CodeAlign)
|
||||
var SuperscalarSize = alignSize[uint64](ReserveCodeSize+(SuperscalarProgramHeader+MaxSuperscalarInstrSize*SuperscalarMaxSize)*RANDOMX_CACHE_ACCESSES, CodeAlign)
|
||||
|
||||
var CodeSize = uint32(RandomXCodeSize + SuperscalarSize)
|
||||
|
||||
var superScalarHashOffset = int32(RandomXCodeSize)
|
||||
|
||||
var REX_ADD_RR = []byte{0x4d, 0x03}
|
||||
var REX_ADD_RM = []byte{0x4c, 0x03}
|
||||
var REX_SUB_RR = []byte{0x4d, 0x2b}
|
||||
var REX_SUB_RM = []byte{0x4c, 0x2b}
|
||||
var REX_MOV_RR = []byte{0x41, 0x8b}
|
||||
var REX_MOV_RR64 = []byte{0x49, 0x8b}
|
||||
var REX_MOV_R64R = []byte{0x4c, 0x8b}
|
||||
var REX_IMUL_RR = []byte{0x4d, 0x0f, 0xaf}
|
||||
var REX_IMUL_RRI = []byte{0x4d, 0x69}
|
||||
var REX_IMUL_RM = []byte{0x4c, 0x0f, 0xaf}
|
||||
var REX_MUL_R = []byte{0x49, 0xf7}
|
||||
var REX_MUL_M = []byte{0x48, 0xf7}
|
||||
var REX_81 = []byte{0x49, 0x81}
|
||||
var AND_EAX_I byte = 0x25
|
||||
|
||||
var MOV_EAX_I byte = 0xb8
|
||||
|
||||
var MOV_RAX_I = []byte{0x48, 0xb8}
|
||||
var MOV_RCX_I = []byte{0x48, 0xb9}
|
||||
var REX_LEA = []byte{0x4f, 0x8d}
|
||||
var REX_MUL_MEM = []byte{0x48, 0xf7, 0x24, 0x0e}
|
||||
var REX_IMUL_MEM = []byte{0x48, 0xf7, 0x2c, 0x0e}
|
||||
var REX_SHR_RAX = []byte{0x48, 0xc1, 0xe8}
|
||||
var RAX_ADD_SBB_1 = []byte{0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00}
|
||||
var MUL_RCX = []byte{0x48, 0xf7, 0xe1}
|
||||
var REX_SHR_RDX = []byte{0x48, 0xc1, 0xea}
|
||||
var REX_SH = []byte{0x49, 0xc1}
|
||||
var MOV_RCX_RAX_SAR_RCX_63 = []byte{0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f}
|
||||
var AND_ECX_I = []byte{0x81, 0xe1}
|
||||
var ADD_RAX_RCX = []byte{0x48, 0x01, 0xC8}
|
||||
var SAR_RAX_I8 = []byte{0x48, 0xC1, 0xF8}
|
||||
var NEG_RAX = []byte{0x48, 0xF7, 0xD8}
|
||||
var ADD_R_RAX = []byte{0x4C, 0x03}
|
||||
var XOR_EAX_EAX = []byte{0x33, 0xC0}
|
||||
var ADD_RDX_R = []byte{0x4c, 0x01}
|
||||
var SUB_RDX_R = []byte{0x4c, 0x29}
|
||||
var SAR_RDX_I8 = []byte{0x48, 0xC1, 0xFA}
|
||||
var TEST_RDX_RDX = []byte{0x48, 0x85, 0xD2}
|
||||
var SETS_AL_ADD_RDX_RAX = []byte{0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0}
|
||||
var REX_NEG = []byte{0x49, 0xF7}
|
||||
var REX_XOR_RR = []byte{0x4D, 0x33}
|
||||
var REX_XOR_RI = []byte{0x49, 0x81}
|
||||
var REX_XOR_RM = []byte{0x4c, 0x33}
|
||||
var REX_ROT_CL = []byte{0x49, 0xd3}
|
||||
var REX_ROT_I8 = []byte{0x49, 0xc1}
|
||||
var SHUFPD = []byte{0x66, 0x0f, 0xc6}
|
||||
var REX_ADDPD = []byte{0x66, 0x41, 0x0f, 0x58}
|
||||
var REX_CVTDQ2PD_XMM12 = []byte{0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06}
|
||||
var REX_SUBPD = []byte{0x66, 0x41, 0x0f, 0x5c}
|
||||
var REX_XORPS = []byte{0x41, 0x0f, 0x57}
|
||||
var REX_MULPD = []byte{0x66, 0x41, 0x0f, 0x59}
|
||||
var REX_MAXPD = []byte{0x66, 0x41, 0x0f, 0x5f}
|
||||
var REX_DIVPD = []byte{0x66, 0x41, 0x0f, 0x5e}
|
||||
var SQRTPD = []byte{0x66, 0x0f, 0x51}
|
||||
var AND_OR_MOV_LDMXCSR = []byte{0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58}
|
||||
var ROL_RAX = []byte{0x48, 0xc1, 0xc0}
|
||||
var XOR_ECX_ECX = []byte{0x33, 0xC9}
|
||||
var REX_CMP_R32I = []byte{0x41, 0x81}
|
||||
var REX_CMP_M32I = []byte{0x81, 0x3c, 0x06}
|
||||
var MOVAPD = []byte{0x66, 0x0f, 0x29}
|
||||
var REX_MOV_MR = []byte{0x4c, 0x89}
|
||||
var REX_XOR_EAX = []byte{0x41, 0x33}
|
||||
var SUB_EBX = []byte{0x83, 0xEB, 0x01}
|
||||
var JNZ = []byte{0x0f, 0x85}
|
||||
var JMP = 0xe9
|
||||
|
||||
var REX_XOR_RAX_R64 = []byte{0x49, 0x33}
|
||||
var REX_XCHG = []byte{0x4d, 0x87}
|
||||
var REX_ANDPS_XMM12 = []byte{0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6}
|
||||
var REX_PADD = []byte{0x66, 0x44, 0x0f}
|
||||
var PADD_OPCODES = []byte{0xfc, 0xfd, 0xfe, 0xd4}
|
||||
var CALL = 0xe8
|
||||
|
||||
var REX_ADD_I = []byte{0x49, 0x81}
|
||||
var REX_TEST = []byte{0x49, 0xF7}
|
||||
var JZ = []byte{0x0f, 0x84}
|
||||
var JZ_SHORT = 0x74
|
||||
|
||||
var RET byte = 0xc3
|
||||
|
||||
var LEA_32 = []byte{0x41, 0x8d}
|
||||
var MOVNTI = []byte{0x4c, 0x0f, 0xc3}
|
||||
var ADD_EBX_I = []byte{0x81, 0xc3}
|
||||
|
||||
var NOP1 = []byte{0x90}
|
||||
var NOP2 = []byte{0x66, 0x90}
|
||||
var NOP3 = []byte{0x66, 0x66, 0x90}
|
||||
var NOP4 = []byte{0x0F, 0x1F, 0x40, 0x00}
|
||||
var NOP5 = []byte{0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP6 = []byte{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}
|
||||
var NOP7 = []byte{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}
|
||||
var NOP8 = []byte{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||
|
||||
func genSIB(scale, index, base int) byte {
|
||||
return byte((scale << 6) | (index << 3) | base)
|
||||
}
|
||||
|
||||
/*
|
||||
push rbp
|
||||
push rbx
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rbp,rsp
|
||||
sub rsp,(0x8*7)
|
||||
|
||||
mov rsi, rax; # register dataset
|
||||
|
||||
prefetchnta byte ptr [rsi]
|
||||
|
||||
mov r8, qword ptr [rsi+0]
|
||||
mov r9, qword ptr [rsi+8]
|
||||
mov r10, qword ptr [rsi+16]
|
||||
mov r11, qword ptr [rsi+24]
|
||||
mov r12, qword ptr [rsi+32]
|
||||
mov r13, qword ptr [rsi+40]
|
||||
mov r14, qword ptr [rsi+48]
|
||||
mov r15, qword ptr [rsi+56]
|
||||
*/
|
||||
var codeInitBlock = []byte{0x55, 0x53, 0x56, 0x41, 0x54, 0x41, 0x55, 0x41, 0x56, 0x41, 0x57, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0xC6, 0x0F, 0x18, 0x06, 0x4C, 0x8B, 0x06, 0x4C, 0x8B, 0x4E, 0x08, 0x4C, 0x8B, 0x56, 0x10, 0x4C, 0x8B, 0x5E, 0x18, 0x4C, 0x8B, 0x66, 0x20, 0x4C, 0x8B, 0x6E, 0x28, 0x4C, 0x8B, 0x76, 0x30, 0x4C, 0x8B, 0x7E, 0x38}
|
||||
|
||||
/*
|
||||
prefetchw byte ptr [rsi]
|
||||
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
|
||||
add rsp,(0x8*7)
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rbx
|
||||
pop rbp
|
||||
ret
|
||||
*/
|
||||
var codeRetBlock = []byte{0x0F, 0x0D, 0x0E, 0x4C, 0x89, 0x06, 0x4C, 0x89, 0x4E, 0x08, 0x4C, 0x89, 0x56, 0x10, 0x4C, 0x89, 0x5E, 0x18, 0x4C, 0x89, 0x66, 0x20, 0x4C, 0x89, 0x6E, 0x28, 0x4C, 0x89, 0x76, 0x30, 0x4C, 0x89, 0x7E, 0x38, 0x48, 0x83, 0xC4, 0x38, 0x41, 0x5F, 0x41, 0x5E, 0x41, 0x5D, 0x41, 0x5C, 0x5E, 0x5B, 0x5D, 0xC3}
|
||||
|
||||
// generateSuperscalarCode
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
|
||||
|
||||
var program []byte
|
||||
|
||||
program = append(program, codeInitBlock...)
|
||||
|
||||
p := scalarProgram.Program()
|
||||
for i := range p {
|
||||
instr := &p[i]
|
||||
|
||||
dst := instr.Dst_Reg % REGISTERSCOUNT
|
||||
src := instr.Src_Reg % REGISTERSCOUNT
|
||||
|
||||
switch instr.Opcode {
|
||||
case S_ISUB_R:
|
||||
program = append(program, REX_SUB_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IXOR_R:
|
||||
program = append(program, REX_XOR_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IADD_RS:
|
||||
program = append(program, REX_LEA...)
|
||||
program = append(program,
|
||||
byte(0x04+8*dst),
|
||||
genSIB(int(instr.Imm32), src, dst),
|
||||
)
|
||||
case S_IMUL_R:
|
||||
program = append(program, REX_IMUL_RR...)
|
||||
program = append(program, byte(0xc0+8*dst+src))
|
||||
case S_IROR_C:
|
||||
program = append(program, REX_ROT_I8...)
|
||||
program = append(program,
|
||||
byte(0xc8+dst),
|
||||
byte(instr.Imm32&63),
|
||||
)
|
||||
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
program = append(program, REX_81...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
|
||||
//TODO: align NOP on C8/C9
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
program = append(program, REX_XOR_RI...)
|
||||
program = append(program, byte(0xf0+dst))
|
||||
program = binary.LittleEndian.AppendUint32(program, instr.Imm32)
|
||||
//TODO: align NOP on C8/C9
|
||||
|
||||
case S_IMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, byte(0xe0+src))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, byte(0xc2+8*dst))
|
||||
case S_ISMULH_R:
|
||||
program = append(program, REX_MOV_RR64...)
|
||||
program = append(program, byte(0xc0+dst))
|
||||
program = append(program, REX_MUL_R...)
|
||||
program = append(program, byte(0xe8+src))
|
||||
program = append(program, REX_MOV_R64R...)
|
||||
program = append(program, byte(0xc2+8*dst))
|
||||
case S_IMUL_RCP:
|
||||
program = append(program, MOV_RAX_I...)
|
||||
program = binary.LittleEndian.AppendUint64(program, randomx_reciprocal(instr.Imm32))
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, byte(0xc0+8*instr.Dst_Reg))
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
program = append(program, codeRetBlock...)
|
||||
|
||||
return mapProgram(program)
|
||||
}
|
8
superscalar_noasm.go
Normal file
8
superscalar_noasm.go
Normal file
|
@ -0,0 +1,8 @@
|
|||
//go:build !unix || !amd64 || disable_jit
|
||||
|
||||
package randomx
|
||||
|
||||
// generateSuperscalarCode
|
||||
func generateSuperscalarCode(scalarProgram SuperScalarProgram) ProgramFunc {
|
||||
return nil
|
||||
}
|
14
vm.go
14
vm.go
|
@ -137,6 +137,8 @@ func (vm *VM) Run(input_hash []byte) {
|
|||
spAddr0 := vm.mem.mx
|
||||
spAddr1 := vm.mem.ma
|
||||
|
||||
var rlCache RegisterLine
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := vm.reg.r[vm.config.readReg0] ^ vm.reg.r[vm.config.readReg1]
|
||||
|
||||
|
@ -169,7 +171,7 @@ func (vm *VM) Run(input_hash []byte) {
|
|||
|
||||
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
|
||||
// execute diffuser superscalar program to get dataset 64 bytes
|
||||
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r)
|
||||
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r, &rlCache)
|
||||
|
||||
// swap the elements
|
||||
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
|
||||
|
@ -193,7 +195,7 @@ func (vm *VM) Run(input_hash []byte) {
|
|||
|
||||
}
|
||||
|
||||
func (vm *VM) CalculateHash(input []byte, output []byte) {
|
||||
func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
||||
var buf [8]byte
|
||||
|
||||
// Lock thread due to rounding mode flags
|
||||
|
@ -244,7 +246,7 @@ func (vm *VM) CalculateHash(input []byte, output []byte) {
|
|||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
temp_hash = hash512.Sum(nil)
|
||||
temp_hash = hash512.Sum(input_hash[:0])
|
||||
//fmt.Printf("%d temphash %x\n", chain, temp_hash)
|
||||
}
|
||||
|
||||
|
@ -280,11 +282,7 @@ func (vm *VM) CalculateHash(input []byte, output []byte) {
|
|||
// copy temp_hash as it first copied to register and then hashed
|
||||
hash256.Write(temp_hash)
|
||||
|
||||
final_hash := hash256.Sum(nil)
|
||||
|
||||
copy(output, final_hash)
|
||||
|
||||
//fmt.Printf("final %x\n", final_hash)
|
||||
hash256.Sum(output[:0])
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue