Speedup superscalar / argon via less pointers and copies

This commit is contained in:
DataHoarder 2024-04-11 18:10:24 +02:00
parent ece28ee558
commit 9214202406
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
6 changed files with 52 additions and 42 deletions

View file

@ -1,22 +1,21 @@
package randomx
import (
"slices"
"unsafe"
)
type MemoryBlock [128]uint64
func (m *MemoryBlock) getLine(addr uint64) *registerLine {
func (m *MemoryBlock) GetLine(addr uint64) *RegisterLine {
addr >>= 3
//[addr : addr+8 : addr+8]
return (*registerLine)(unsafe.Add(unsafe.Pointer(m), addr*8))
return (*RegisterLine)(unsafe.Add(unsafe.Pointer(m), addr*8))
}
type Randomx_Cache struct {
Blocks []MemoryBlock
Programs [RANDOMX_PROGRAM_COUNT]*SuperScalarProgram
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
}
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
@ -41,21 +40,21 @@ func (cache *Randomx_Cache) Init(key []byte) {
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
cache.Blocks = slices.Clone(memoryBlocks)
cache.Blocks = memoryBlocks
}
// GetMixBlock fetch a 64 byte block in uint64 form
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *registerLine {
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
mask := CacheSize/CacheLineSize - 1
addr = (addr & mask) * CacheLineSize
block := addr / 1024
return cache.Blocks[block].getLine(addr % 1024)
return cache.Blocks[block].GetLine(addr % 1024)
}
func (cache *Randomx_Cache) InitDatasetItem(out *registerLine, itemNumber uint64) {
func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
const superscalarMul0 uint64 = 6364136223846793005
const superscalarAdd1 uint64 = 9298411001130361340
const superscalarAdd2 uint64 = 12065312585734608966
@ -65,8 +64,6 @@ func (cache *Randomx_Cache) InitDatasetItem(out *registerLine, itemNumber uint64
const superscalarAdd6 uint64 = 3398623926847679864
const superscalarAdd7 uint64 = 9549104520008361294
var rl registerLine
register_value := itemNumber
_ = register_value
@ -84,22 +81,18 @@ func (cache *Randomx_Cache) InitDatasetItem(out *registerLine, itemNumber uint64
program := cache.Programs[i]
executeSuperscalar(program, &rl)
executeSuperscalar(program.Program(), rl)
for q := range rl {
rl[q] ^= mix[q]
}
register_value = rl[program.AddressRegister]
register_value = rl[program.AddressRegister()]
}
for q := range rl {
out[q] = rl[q]
}
}
func (cache *Randomx_Cache) initDataset(dataset []registerLine, startItem, endItem uint64) {
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
cache.InitDatasetItem(&dataset[0], itemNumber)
}

View file

@ -1,6 +1,7 @@
package randomx
type Randomx_Dataset interface {
ReadDataset(address uint64, r *registerLine)
InitDataset(startItem, endItem uint64)
ReadDataset(address uint64, r *RegisterLine)
PrefetchDataset(address uint64)
}

View file

@ -1,15 +1,16 @@
package randomx
type Randomx_DatasetLight struct {
Cache *Randomx_Cache
Cache *Randomx_Cache
Memory []uint64
}
func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
}
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *registerLine) {
var out registerLine
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *RegisterLine) {
var out RegisterLine
d.Cache.InitDatasetItem(&out, address/CacheLineSize)
@ -17,3 +18,7 @@ func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *registerLine) {
r[i] ^= out[i]
}
}
func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) {
//d.Cache.initDataset(d.Cache.Programs)
}

View file

@ -1,3 +1,3 @@
package randomx
type registerLine [8]uint64
type RegisterLine [8]uint64

View file

@ -508,12 +508,20 @@ func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Gener
}
type SuperScalarProgram struct {
Instructions []SuperScalarInstruction // all instructions of program
AddressRegister int
type SuperScalarProgram []SuperScalarInstruction
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
p[0].Dst_Reg = addressRegister
}
func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
func (p SuperScalarProgram) AddressRegister() int {
return p[0].Dst_Reg
}
func (p SuperScalarProgram) Program() []SuperScalarInstruction {
return p[1:]
}
func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
cycle := 0
depcycle := 0
//retire_cycle := 0
@ -525,7 +533,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
macro_op_count := 0
throwAwayCount := 0
code_size := 0
var program SuperScalarProgram
program := make(SuperScalarProgram, 1, 512)
preAllocatedRegisters := gen.allocRegIndex[:]
@ -671,7 +679,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
// when all uops of current instruction have been issued, add the instruction to supercalara program
if macro_op_index >= sins.ins.GetUOPCount() {
sins.FixSrcReg() // fix src register once and for all
program.Instructions = append(program.Instructions, *sins)
program = append(program, *sins)
if sins.ins.Name == "IMUL_R" || sins.ins.Name == "IMULH_R" || sins.ins.Name == "ISMULH_R" || sins.ins.Name == "IMUL_RCP" {
mulcount++
@ -696,14 +704,17 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
var asic_latencies [8]int
for i := range program.Instructions {
//fmt.Printf("%d %s\n",i ,program[i].String() )
lastdst := asic_latencies[program.Instructions[i].Dst_Reg] + 1
lastsrc := 0
if program.Instructions[i].Dst_Reg != program.Instructions[i].Src_Reg {
lastsrc = asic_latencies[program.Instructions[i].Src_Reg] + 1
for i := range program {
if i == 0 {
continue
}
asic_latencies[program.Instructions[i].Dst_Reg] = Max(lastdst, lastsrc)
//fmt.Printf("%d %s\n",i ,program[i].String() )
lastdst := asic_latencies[program[i].Dst_Reg] + 1
lastsrc := 0
if program[i].Dst_Reg != program[i].Src_Reg {
lastsrc = asic_latencies[program[i].Src_Reg] + 1
}
asic_latencies[program[i].Dst_Reg] = Max(lastdst, lastsrc)
}
asic_latency_max := 0
@ -717,12 +728,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
}
}
program.AddressRegister = address_reg
// Set AddressRegister hack
program.setAddressRegister(address_reg)
//fmt.Printf("address_reg %d\n", address_reg)
return &program
return program
}
const CYCLE_MAP_SIZE int = RANDOMX_SUPERSCALAR_LATENCY + 4
@ -892,10 +903,10 @@ func getMixBlock(register_value uint64, memory []byte) uint64 {
}
// executeSuperscalar execute the superscalar program
func executeSuperscalar(p *SuperScalarProgram, r *registerLine) {
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
for i := range p.Instructions {
ins := &p.Instructions[i]
for i := range p {
ins := &p[i]
switch ins.Opcode {
case S_ISUB_R:
r[ins.Dst_Reg] -= r[ins.Src_Reg]

2
vm.go
View file

@ -76,7 +76,7 @@ type Config struct {
}
type REGISTER_FILE struct {
r registerLine
r RegisterLine
f [4][2]float64
e [4][2]float64
a [4][2]float64