Speedup superscalar / argon via less pointers and copies
This commit is contained in:
parent
ece28ee558
commit
9214202406
27
cache.go
27
cache.go
|
@ -1,22 +1,21 @@
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type MemoryBlock [128]uint64
|
||||
|
||||
func (m *MemoryBlock) getLine(addr uint64) *registerLine {
|
||||
func (m *MemoryBlock) GetLine(addr uint64) *RegisterLine {
|
||||
addr >>= 3
|
||||
//[addr : addr+8 : addr+8]
|
||||
return (*registerLine)(unsafe.Add(unsafe.Pointer(m), addr*8))
|
||||
return (*RegisterLine)(unsafe.Add(unsafe.Pointer(m), addr*8))
|
||||
}
|
||||
|
||||
type Randomx_Cache struct {
|
||||
Blocks []MemoryBlock
|
||||
|
||||
Programs [RANDOMX_PROGRAM_COUNT]*SuperScalarProgram
|
||||
Programs [RANDOMX_PROGRAM_COUNT]SuperScalarProgram
|
||||
}
|
||||
|
||||
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
|
||||
|
@ -41,21 +40,21 @@ func (cache *Randomx_Cache) Init(key []byte) {
|
|||
|
||||
memoryBlocks := unsafe.Slice((*MemoryBlock)(unsafe.Pointer(unsafe.SliceData(argonBlocks))), int(unsafe.Sizeof(argonBlock{}))/int(unsafe.Sizeof(MemoryBlock{}))*len(argonBlocks))
|
||||
|
||||
cache.Blocks = slices.Clone(memoryBlocks)
|
||||
cache.Blocks = memoryBlocks
|
||||
}
|
||||
|
||||
// GetMixBlock fetch a 64 byte block in uint64 form
|
||||
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *registerLine {
|
||||
func (cache *Randomx_Cache) GetMixBlock(addr uint64) *RegisterLine {
|
||||
|
||||
mask := CacheSize/CacheLineSize - 1
|
||||
|
||||
addr = (addr & mask) * CacheLineSize
|
||||
|
||||
block := addr / 1024
|
||||
return cache.Blocks[block].getLine(addr % 1024)
|
||||
return cache.Blocks[block].GetLine(addr % 1024)
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) InitDatasetItem(out *registerLine, itemNumber uint64) {
|
||||
func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64) {
|
||||
const superscalarMul0 uint64 = 6364136223846793005
|
||||
const superscalarAdd1 uint64 = 9298411001130361340
|
||||
const superscalarAdd2 uint64 = 12065312585734608966
|
||||
|
@ -65,8 +64,6 @@ func (cache *Randomx_Cache) InitDatasetItem(out *registerLine, itemNumber uint64
|
|||
const superscalarAdd6 uint64 = 3398623926847679864
|
||||
const superscalarAdd7 uint64 = 9549104520008361294
|
||||
|
||||
var rl registerLine
|
||||
|
||||
register_value := itemNumber
|
||||
_ = register_value
|
||||
|
||||
|
@ -84,22 +81,18 @@ func (cache *Randomx_Cache) InitDatasetItem(out *registerLine, itemNumber uint64
|
|||
|
||||
program := cache.Programs[i]
|
||||
|
||||
executeSuperscalar(program, &rl)
|
||||
executeSuperscalar(program.Program(), rl)
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
register_value = rl[program.AddressRegister]
|
||||
register_value = rl[program.AddressRegister()]
|
||||
|
||||
}
|
||||
|
||||
for q := range rl {
|
||||
out[q] = rl[q]
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) initDataset(dataset []registerLine, startItem, endItem uint64) {
|
||||
func (cache *Randomx_Cache) initDataset(dataset []RegisterLine, startItem, endItem uint64) {
|
||||
for itemNumber := startItem; itemNumber < endItem; itemNumber, dataset = itemNumber+1, dataset[1:] {
|
||||
cache.InitDatasetItem(&dataset[0], itemNumber)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package randomx
|
||||
|
||||
type Randomx_Dataset interface {
|
||||
ReadDataset(address uint64, r *registerLine)
|
||||
InitDataset(startItem, endItem uint64)
|
||||
ReadDataset(address uint64, r *RegisterLine)
|
||||
PrefetchDataset(address uint64)
|
||||
}
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
package randomx
|
||||
|
||||
type Randomx_DatasetLight struct {
|
||||
Cache *Randomx_Cache
|
||||
Cache *Randomx_Cache
|
||||
Memory []uint64
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
|
||||
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *registerLine) {
|
||||
var out registerLine
|
||||
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *RegisterLine) {
|
||||
var out RegisterLine
|
||||
|
||||
d.Cache.InitDatasetItem(&out, address/CacheLineSize)
|
||||
|
||||
|
@ -17,3 +18,7 @@ func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *registerLine) {
|
|||
r[i] ^= out[i]
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) InitDataset(startItem, endItem uint64) {
|
||||
//d.Cache.initDataset(d.Cache.Programs)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
package randomx
|
||||
|
||||
type registerLine [8]uint64
|
||||
type RegisterLine [8]uint64
|
||||
|
|
|
@ -508,12 +508,20 @@ func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Gener
|
|||
|
||||
}
|
||||
|
||||
type SuperScalarProgram struct {
|
||||
Instructions []SuperScalarInstruction // all instructions of program
|
||||
AddressRegister int
|
||||
type SuperScalarProgram []SuperScalarInstruction
|
||||
|
||||
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
|
||||
p[0].Dst_Reg = addressRegister
|
||||
}
|
||||
|
||||
func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
|
||||
func (p SuperScalarProgram) AddressRegister() int {
|
||||
return p[0].Dst_Reg
|
||||
}
|
||||
func (p SuperScalarProgram) Program() []SuperScalarInstruction {
|
||||
return p[1:]
|
||||
}
|
||||
|
||||
func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
||||
cycle := 0
|
||||
depcycle := 0
|
||||
//retire_cycle := 0
|
||||
|
@ -525,7 +533,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
|
|||
macro_op_count := 0
|
||||
throwAwayCount := 0
|
||||
code_size := 0
|
||||
var program SuperScalarProgram
|
||||
program := make(SuperScalarProgram, 1, 512)
|
||||
|
||||
preAllocatedRegisters := gen.allocRegIndex[:]
|
||||
|
||||
|
@ -671,7 +679,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
|
|||
// when all uops of current instruction have been issued, add the instruction to supercalara program
|
||||
if macro_op_index >= sins.ins.GetUOPCount() {
|
||||
sins.FixSrcReg() // fix src register once and for all
|
||||
program.Instructions = append(program.Instructions, *sins)
|
||||
program = append(program, *sins)
|
||||
|
||||
if sins.ins.Name == "IMUL_R" || sins.ins.Name == "IMULH_R" || sins.ins.Name == "ISMULH_R" || sins.ins.Name == "IMUL_RCP" {
|
||||
mulcount++
|
||||
|
@ -696,14 +704,17 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
|
|||
|
||||
var asic_latencies [8]int
|
||||
|
||||
for i := range program.Instructions {
|
||||
//fmt.Printf("%d %s\n",i ,program[i].String() )
|
||||
lastdst := asic_latencies[program.Instructions[i].Dst_Reg] + 1
|
||||
lastsrc := 0
|
||||
if program.Instructions[i].Dst_Reg != program.Instructions[i].Src_Reg {
|
||||
lastsrc = asic_latencies[program.Instructions[i].Src_Reg] + 1
|
||||
for i := range program {
|
||||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
asic_latencies[program.Instructions[i].Dst_Reg] = Max(lastdst, lastsrc)
|
||||
//fmt.Printf("%d %s\n",i ,program[i].String() )
|
||||
lastdst := asic_latencies[program[i].Dst_Reg] + 1
|
||||
lastsrc := 0
|
||||
if program[i].Dst_Reg != program[i].Src_Reg {
|
||||
lastsrc = asic_latencies[program[i].Src_Reg] + 1
|
||||
}
|
||||
asic_latencies[program[i].Dst_Reg] = Max(lastdst, lastsrc)
|
||||
}
|
||||
|
||||
asic_latency_max := 0
|
||||
|
@ -717,12 +728,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
|
|||
}
|
||||
}
|
||||
|
||||
program.AddressRegister = address_reg
|
||||
// Set AddressRegister hack
|
||||
program.setAddressRegister(address_reg)
|
||||
|
||||
//fmt.Printf("address_reg %d\n", address_reg)
|
||||
|
||||
return &program
|
||||
|
||||
return program
|
||||
}
|
||||
|
||||
const CYCLE_MAP_SIZE int = RANDOMX_SUPERSCALAR_LATENCY + 4
|
||||
|
@ -892,10 +903,10 @@ func getMixBlock(register_value uint64, memory []byte) uint64 {
|
|||
}
|
||||
|
||||
// executeSuperscalar execute the superscalar program
|
||||
func executeSuperscalar(p *SuperScalarProgram, r *registerLine) {
|
||||
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
|
||||
|
||||
for i := range p.Instructions {
|
||||
ins := &p.Instructions[i]
|
||||
for i := range p {
|
||||
ins := &p[i]
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
r[ins.Dst_Reg] -= r[ins.Src_Reg]
|
||||
|
|
Loading…
Reference in a new issue