Reorganize cache/dataset

This commit is contained in:
DataHoarder 2024-04-11 16:04:59 +02:00
parent 252d9ed9d3
commit 5e7d8ea35f
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
7 changed files with 163 additions and 170 deletions

98
cache.go Normal file
View file

@ -0,0 +1,98 @@
package randomx
type Randomx_Cache struct {
Blocks []block
Programs [RANDOMX_PROGRAM_COUNT]*SuperScalarProgram
}
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
return &Randomx_Cache{}
}
func (cache *Randomx_Cache) VM_Initialize() *VM {
return &VM{
Dataset: &Randomx_DatasetLight{
Cache: cache,
},
}
}
func (cache *Randomx_Cache) Randomx_init_cache(key []byte) {
//fmt.Printf("appending null byte is not necessary but only done for testing")
kkey := append([]byte{}, key...)
//kkey = append(kkey,0)
//cache->initialize(cache, key, keySize);
cache.Blocks = buildBlocks(argon2d, kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
}
// fetch a 64 byte block in uint64 form
func (cache *Randomx_Cache) GetMixBlock(addr uint64) []uint64 {
mask := CacheSize/CacheLineSize - 1
addr = (addr & mask) * CacheLineSize
block := addr / 1024
index_within_block := (addr % 1024) / 8
return cache.Blocks[block][index_within_block : index_within_block+8]
}
func (cache *Randomx_Cache) InitDatasetItem(out *registerLine, itemnumber uint64) {
const superscalarMul0 uint64 = 6364136223846793005
const superscalarAdd1 uint64 = 9298411001130361340
const superscalarAdd2 uint64 = 12065312585734608966
const superscalarAdd3 uint64 = 9306329213124626780
const superscalarAdd4 uint64 = 5281919268842080866
const superscalarAdd5 uint64 = 10536153434571861004
const superscalarAdd6 uint64 = 3398623926847679864
const superscalarAdd7 uint64 = 9549104520008361294
var rl registerLine
register_value := itemnumber
_ = register_value
rl[0] = (itemnumber + 1) * superscalarMul0
rl[1] = rl[0] ^ superscalarAdd1
rl[2] = rl[0] ^ superscalarAdd2
rl[3] = rl[0] ^ superscalarAdd3
rl[4] = rl[0] ^ superscalarAdd4
rl[5] = rl[0] ^ superscalarAdd5
rl[6] = rl[0] ^ superscalarAdd6
rl[7] = rl[0] ^ superscalarAdd7
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
mix := cache.GetMixBlock(register_value)
program := cache.Programs[i]
executeSuperscalar(program, &rl)
for q := range rl {
rl[q] ^= mix[q]
}
register_value = rl[program.AddressRegister]
}
for q := range rl {
out[q] = rl[q]
}
}
func (cache *Randomx_Cache) initDataset(start_item, end_item uint64) {
for itemnumber := start_item; itemnumber < end_item; itemnumber++ {
cache.InitDatasetItem(nil, itemnumber)
// dataset_index += CacheLineSize
//fmt.Printf("exiting dataset item\n")
break
}
}

View file

@ -182,39 +182,6 @@ func (b *Blake2Generator) GetUint32() uint32 {
return ret
}
type Randomx_Cache struct {
Blocks []block
Programs [RANDOMX_PROGRAM_COUNT]*SuperScalarProgram
}
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
return &Randomx_Cache{}
}
func (cache *Randomx_Cache) Randomx_init_cache(key []byte) {
//fmt.Printf("appending null byte is not necessary but only done for testing")
kkey := append([]byte{}, key...)
//kkey = append(kkey,0)
//cache->initialize(cache, key, keySize);
cache.Blocks = buildBlocks(argon2d, kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
}
// fetch a 64 byte block in uint64 form
func (cache *Randomx_Cache) GetBlock(addr uint64, out []uint64) {
mask := CacheSize/CacheLineSize - 1
addr = (addr & mask) * CacheLineSize
block := addr / 1024
index_within_block := (addr % 1024) / 8
copy(out, cache.Blocks[block][index_within_block:])
}
// some constants for argon
const (
argon2d = iota

6
dataset.go Normal file
View file

@ -0,0 +1,6 @@
package randomx
type Randomx_Dataset interface {
ReadDataset(address uint64, r *registerLine)
PrefetchDataset(address uint64)
}

21
dataset_light.go Normal file
View file

@ -0,0 +1,21 @@
package randomx
type Randomx_DatasetLight struct {
Cache *Randomx_Cache
}
func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
}
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r *registerLine) {
itemnumber := address / CacheLineSize
var out registerLine
d.Cache.InitDatasetItem(&out, itemnumber)
for i := range r {
r[i] ^= out[i]
}
}

3
register.go Normal file
View file

@ -0,0 +1,3 @@
package randomx
type registerLine [8]uint64

View file

@ -29,7 +29,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package randomx
import "fmt"
import (
"fmt"
)
import "math/bits"
type ExecutionPort byte
@ -220,12 +222,14 @@ var buffer3 = []int{4, 9, 3}
var buffer4 = []int{4, 4, 4, 4}
var buffer5 = []int{3, 3, 10}
var Decoder_To_Instruction_Length = [][]int{{4, 8, 4},
{7, 3, 3, 3},
{3, 7, 3, 3},
{4, 9, 3},
{4, 4, 4, 4},
{3, 3, 10}}
var Decoder_To_Instruction_Length = [][]int{
buffer0,
buffer1,
buffer2,
buffer3,
buffer4,
buffer5,
}
type DecoderType int
@ -505,8 +509,8 @@ func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Gener
}
type SuperScalarProgram struct {
Ins []SuperScalarInstruction // all instructions of program
AddressReg int
Instructions []SuperScalarInstruction // all instructions of program
AddressRegister int
}
func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
@ -667,7 +671,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
// when all uops of current instruction have been issued, add the instruction to supercalara program
if macro_op_index >= sins.ins.GetUOPCount() {
sins.FixSrcReg() // fix src register once and for all
program.Ins = append(program.Ins, *sins)
program.Instructions = append(program.Instructions, *sins)
if sins.ins.Name == "IMUL_R" || sins.ins.Name == "IMULH_R" || sins.ins.Name == "ISMULH_R" || sins.ins.Name == "IMUL_RCP" {
mulcount++
@ -684,22 +688,22 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
}
/*
for i := range program.Ins {
fmt.Printf("%d %s\n", i, program.Ins[i].String())
for i := range program.Instructions {
fmt.Printf("%d %s\n", i, program.Instructions[i].String())
}
*/
var asic_latencies [8]int
for i := range program.Ins {
for i := range program.Instructions {
//fmt.Printf("%d %s\n",i ,program[i].String() )
lastdst := asic_latencies[program.Ins[i].Dst_Reg] + 1
lastdst := asic_latencies[program.Instructions[i].Dst_Reg] + 1
lastsrc := 0
if program.Ins[i].Dst_Reg != program.Ins[i].Src_Reg {
lastsrc = asic_latencies[program.Ins[i].Src_Reg] + 1
if program.Instructions[i].Dst_Reg != program.Instructions[i].Src_Reg {
lastsrc = asic_latencies[program.Instructions[i].Src_Reg] + 1
}
asic_latencies[program.Ins[i].Dst_Reg] = Max(lastdst, lastsrc)
asic_latencies[program.Instructions[i].Dst_Reg] = Max(lastdst, lastsrc)
}
asic_latency_max := 0
@ -713,7 +717,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
}
}
program.AddressReg = address_reg
program.AddressRegister = address_reg
//fmt.Printf("address_reg %d\n", address_reg)
@ -887,70 +891,11 @@ func getMixBlock(register_value uint64, memory []byte) uint64 {
return (register_value * Mask) * CacheLineSize
}
const superscalarMul0 uint64 = 6364136223846793005
const superscalarAdd1 uint64 = 9298411001130361340
const superscalarAdd2 uint64 = 12065312585734608966
const superscalarAdd3 uint64 = 9306329213124626780
const superscalarAdd4 uint64 = 5281919268842080866
const superscalarAdd5 uint64 = 10536153434571861004
const superscalarAdd6 uint64 = 3398623926847679864
const superscalarAdd7 uint64 = 9549104520008361294
// executeSuperscalar execute the superscalar program
func executeSuperscalar(p *SuperScalarProgram, r *registerLine) {
func (cache *Randomx_Cache) InitDatasetItem(out []uint64, itemnumber uint64) {
var rl_array, mix_array [8]uint64
rl := rl_array
mix_block := mix_array[:]
register_value := itemnumber
_ = register_value
rl[0] = (itemnumber + 1) * superscalarMul0
rl[1] = rl[0] ^ superscalarAdd1
rl[2] = rl[0] ^ superscalarAdd2
rl[3] = rl[0] ^ superscalarAdd3
rl[4] = rl[0] ^ superscalarAdd4
rl[5] = rl[0] ^ superscalarAdd5
rl[6] = rl[0] ^ superscalarAdd6
rl[7] = rl[0] ^ superscalarAdd7
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
//mix_block_index := getMixBlock(register_value,nil)
cache.Programs[i].executeSuperscalar_nocache(rl[:])
cache.GetBlock(register_value, mix_block)
//TODO: this can be optimized with xorBytes
for q := range rl {
// fmt.Printf("%d rl[%d] %16x mix %16x\n",i, q,rl[q], mix_block[q])
rl[q] ^= mix_block[q]
}
register_value = rl[cache.Programs[i].AddressReg]
// fmt.Printf("%d\n",i)
}
for q := range rl {
out[q] = rl[q]
}
}
func (cache *Randomx_Cache) initDataset(start_item, end_item uint64) {
for itemnumber := start_item; itemnumber < end_item; itemnumber++ {
cache.InitDatasetItem(nil, itemnumber)
// dataset_index += CacheLineSize
//fmt.Printf("exiting dataset item\n")
break
}
}
// execute the superscalar program
func (p *SuperScalarProgram) executeSuperscalar_nocache(r []uint64) {
_ = r[7] // bounds check hint to compiler; see golang.org/issue/14808
for i := range p.Ins {
ins := &p.Ins[i]
for i := range p.Instructions {
ins := &p.Instructions[i]
switch ins.Opcode {
case S_ISUB_R:
r[ins.Dst_Reg] -= r[ins.Src_Reg]
@ -979,19 +924,14 @@ func (p *SuperScalarProgram) executeSuperscalar_nocache(r []uint64) {
func smulh(a, b int64) uint64 {
hi_, _ := bits.Mul64(uint64(a), uint64(b))
hi := int64(hi_)
if a < 0 {
hi -= b
}
if b < 0 {
hi -= a
}
return uint64(hi)
t1 := (a >> 63) & b
t2 := (b >> 63) & a
return uint64(int64(hi_) - t1 - t2)
}
func randomx_reciprocal(divisor uint32) uint64 {
const p2exp63 uint64 = uint64(1) << 63
const p2exp63 = uint64(1) << 63
quotient := p2exp63 / uint64(divisor)
remainder := p2exp63 % uint64(divisor)
@ -1003,13 +943,4 @@ func randomx_reciprocal(divisor uint32) uint64 {
func signExtend2sCompl(x uint32) uint64 {
return uint64(int64(int32(x)))
/*
if -1 == (^0) {
return
} else if x > math.MaxInt32 {
return uint64(x) | 0xffffffff00000000
} else {
return uint64(x)
}
*/
}

43
vm.go
View file

@ -70,18 +70,13 @@ func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
return math.Float64frombits((math.Float64bits(f) & dynamicMantissaMask) | mode)
}
func (cache *Randomx_Cache) VM_Initialize() *VM {
return &VM{Cache: cache} //// setup the cache
}
type Config struct {
eMask [2]uint64
readReg0, readReg1, readReg2, readReg3 uint64
}
type REGISTER_FILE struct {
r [8]uint64
r registerLine
f [4][2]float64
e [4][2]float64
a [4][2]float64
@ -97,8 +92,6 @@ const HIGH = 1
// calculate hash based on input
func (vm *VM) Run(input_hash []byte) {
var mix_block [8]uint64
//fmt.Printf("%x \n", input_hash)
fillAes4Rx4(input_hash[:], vm.buffer[:])
@ -152,61 +145,37 @@ func (vm *VM) Run(input_hash []byte) {
spAddr1 ^= spMix >> 32
spAddr1 &= ScratchpadL3Mask64
//fmt.Printf("spAddr0 %x %x\n", spAddr0,spAddr1)
for i := uint64(0); i < REGISTERSCOUNT; i++ {
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
//fmt.Printf("r[%d] %x \n", i,vm.reg.r[i]);
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.f[i][LOW] = vm.Load32F(spAddr1 + 8*i)
vm.reg.f[i][HIGH] = vm.Load32F(spAddr1 + 8*i + 4)
//fmt.Printf("lo %f %f\n", vm.reg.f[i][LOW] , vm.reg.f[i][HIGH] )
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.e[i][LOW] = vm.Load32F(spAddr1 + 8*(i+REGISTERCOUNTFLT))
vm.reg.e[i][HIGH] = vm.Load32F(spAddr1 + 8*(i+REGISTERCOUNTFLT) + 4)
// fmt.Printf("OR %x %x\n", (math.Float64bits(vm.reg.e[i][LOW]) & dynamicMantissaMask) | vm.config.eMask[LOW] , (math.Float64bits(vm.reg.e[i][HIGH]) & dynamicMantissaMask)| vm.config.eMask[HIGH] )
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
//fmt.Printf("lo e %f %f\n", vm.reg.e[i][LOW] , vm.reg.e[i][HIGH] )
}
//for i := uint64(0); i < REGISTERCOUNTFLT; i++{
//fmt.Printf("a low %f high %f\n", vm.reg.a[i][LOW] , vm.reg.a[i][HIGH] )
//}
vm.InterpretByteCode()
vm.mem.mx ^= vm.reg.r[vm.config.readReg2] ^ vm.reg.r[vm.config.readReg3]
vm.mem.mx &= CacheLineAlignMask
//fmt.Printf("mx %x\n",vm.mem.mx )
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
// execute diffuser superscalar program to get dataset 64 bytes
{
itemnumber := (vm.datasetOffset + vm.mem.ma) / CacheLineSize
//fmt.Printf("qitem number %x\n", itemnumber)
vm.Dataset.ReadDataset(vm.datasetOffset+vm.mem.ma, &vm.reg.r)
vm.Cache.InitDatasetItem(mix_block[:], itemnumber)
for i := range vm.reg.r {
vm.reg.r[i] ^= mix_block[i]
}
}
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx // swap the elements
// swap the elements
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx
for i := uint64(0); i < REGISTERSCOUNT; i++ {
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr1+(8*i):], bits.RotateLeft64(vm.reg.r[i], 32))
//fmt.Printf("reg r[%d] %x\n", i,vm.reg.r[i])
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
@ -215,8 +184,6 @@ func (vm *VM) Run(input_hash []byte) {
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr0+(16*i):], bits.RotateLeft64(math.Float64bits(vm.reg.f[i][LOW]), 32))
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr0+(16*i)+8:], bits.RotateLeft64(math.Float64bits(vm.reg.f[i][HIGH]), 32))
// fmt.Printf("%d %+v\n", i, vm.reg.f[i])
}
spAddr0 = 0