Cleanup readme, superscalar

This commit is contained in:
DataHoarder 2024-04-20 20:22:05 +02:00
parent d20dd880ce
commit 4903cd7407
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
3 changed files with 60 additions and 53 deletions

View file

@ -1,29 +1,36 @@
# RandomX (Golang Implementation) # RandomX (Golang Implementation)
RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs.
RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware.
---
Fork from [git.dero.io/DERO_Foundation/RandomX](https://git.dero.io/DERO_Foundation/RandomX). Also related, their [Analysis of RandomX writeup](https://medium.com/deroproject/analysis-of-randomx-dde9dfe9bbc6). Fork from [git.dero.io/DERO_Foundation/RandomX](https://git.dero.io/DERO_Foundation/RandomX). Also related, their [Analysis of RandomX writeup](https://medium.com/deroproject/analysis-of-randomx-dde9dfe9bbc6).
Original code failed RandomX testcases and was implemented using big.Float. Original code failed RandomX testcases and was implemented using big.Float.
This package implements RandomX without CGO, using only Golang code, pure float64 ops and two small assembly sections to implement CFROUND modes, with optional soft float implementation. ---
This package implements RandomX without CGO, using only Golang code, native float64 ops, some assembly, but with optional soft float _purego_ implementation.
All test cases pass properly. All test cases pass properly.
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime. For the C++ implementation and design of RandomX, see [github.com/tevador/RandomX](https://github.com/tevador/RandomX)
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|:----------------------------:|:---:|:-----:|:---:|:-----:|:----:|:------:|:-------:|:----:|
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Hardware Float Operations | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| Hardware AES Operations | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Native Superscalar Execution | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Superscalar JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Native VM Execution | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| VM JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually. A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.
| Platform | Hard Float | Hard AES | JIT | Native | purego | Notes | Any platform with no hard float support or when enabled manually will use soft float, using [softfloat64](https://git.gammaspectra.live/P2Pool/softfloat64). This will be very slow.
|:-----------:|:----------:|:--------:|:---:|:------:|:------:|:----------------:|
| **386** | ✅ | ❌ | ❌ | ✅ | ✅ | |
| **amd64** | ✅ | ✅ | ✅* | ✅ | ✅ | JIT only on Unix |
| **arm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **arm64** | ✅ | ❌ | ❌ | ✅ | ✅ | |
| **mips** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **mips64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **riscv64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **wasm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
Native hard float can be added with supporting rounding mode under _asm_.
Any platform with no hard float support (soft float using [softfloat64](git.gammaspectra.live/P2Pool/softfloat64)) will be vastly slow. JIT only supported under Unix systems (Linux, *BSD, macOS), and can be hard-disabled via the `disable_jit` build flag, or at runtime.
Native hard float can be added with supporting rounding mode under _asm_.

View file

@ -307,11 +307,11 @@ var slot10 = []*Instruction{&IMUL_RCP}
// SuperScalarInstruction superscalar program is built with superscalar instructions // SuperScalarInstruction superscalar program is built with superscalar instructions
type SuperScalarInstruction struct { type SuperScalarInstruction struct {
Opcode byte Opcode byte
Dst_Reg int Dst int
Src_Reg int Src int
Mod byte Mod byte
Imm32 uint32 Imm32 uint32
Type int Imm64 uint64
OpGroup int OpGroup int
OpGroupPar int OpGroupPar int
GroupParIsSource int GroupParIsSource int
@ -320,17 +320,15 @@ type SuperScalarInstruction struct {
} }
func (sins *SuperScalarInstruction) FixSrcReg() { func (sins *SuperScalarInstruction) FixSrcReg() {
if sins.Src_Reg >= 0 { if sins.Src == 0xff {
// do nothing sins.Src = sins.Dst
} else {
sins.Src_Reg = sins.Dst_Reg
} }
} }
func (sins *SuperScalarInstruction) Reset() { func (sins *SuperScalarInstruction) Reset() {
sins.Opcode = 99 sins.Opcode = 99
sins.Src_Reg = -1 sins.Src = 0xff
sins.Dst_Reg = -1 sins.Dst = 0xff
sins.CanReuse = false sins.CanReuse = false
sins.GroupParIsSource = 0 sins.GroupParIsSource = 0
} }
@ -406,6 +404,8 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
} }
} }
sins.Imm64 = randomx_reciprocal(sins.Imm32)
sins.OpGroup = S_IMUL_RCP sins.OpGroup = S_IMUL_RCP
default: default:
@ -450,11 +450,11 @@ func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Gener
type SuperScalarProgram []SuperScalarInstruction type SuperScalarProgram []SuperScalarInstruction
func (p SuperScalarProgram) setAddressRegister(addressRegister int) { func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
p[0].Dst_Reg = addressRegister p[0].Dst = addressRegister
} }
func (p SuperScalarProgram) AddressRegister() int { func (p SuperScalarProgram) AddressRegister() int {
return p[0].Dst_Reg return p[0].Dst
} }
func (p SuperScalarProgram) Program() []SuperScalarInstruction { func (p SuperScalarProgram) Program() []SuperScalarInstruction {
return p[1:] return p[1:]
@ -569,9 +569,9 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
if macro_op_index == sins.ins.ResultOP { // fix me if macro_op_index == sins.ins.ResultOP { // fix me
registers[sins.Dst_Reg].Latency = depcycle registers[sins.Dst].Latency = depcycle
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup registers[sins.Dst].LastOpGroup = sins.OpGroup
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar registers[sins.Dst].LastOpPar = sins.OpGroupPar
} }
@ -609,12 +609,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if i == 0 { if i == 0 {
continue continue
} }
lastdst := asic_latencies[program[i].Dst_Reg] + 1 lastdst := asic_latencies[program[i].Dst] + 1
lastsrc := 0 lastsrc := 0
if program[i].Dst_Reg != program[i].Src_Reg { if program[i].Dst != program[i].Src {
lastsrc = asic_latencies[program[i].Src_Reg] + 1 lastsrc = asic_latencies[program[i].Src] + 1
} }
asic_latencies[program[i].Dst_Reg] = max(lastdst, lastsrc) asic_latencies[program[i].Dst] = max(lastdst, lastsrc)
} }
asic_latency_max := 0 asic_latency_max := 0
@ -719,18 +719,18 @@ func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS { if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement { if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
sins.Src_Reg = RegisterNeedsDisplacement sins.Src = RegisterNeedsDisplacement
sins.OpGroupPar = sins.Src_Reg sins.OpGroupPar = sins.Src
return true return true
} }
} }
if selectRegister(available_registers, gen, &sins.Src_Reg) { if selectRegister(available_registers, gen, &sins.Src) {
if sins.GroupParIsSource == 0 { if sins.GroupParIsSource == 0 {
} else { } else {
sins.OpGroupPar = sins.Src_Reg sins.OpGroupPar = sins.Src
} }
return true return true
} }
@ -741,7 +741,7 @@ func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegis
preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0] preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0]
for i := range Registers { for i := range Registers {
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src_Reg) && if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src) &&
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) && (allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) && (Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) { (sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
@ -749,7 +749,7 @@ func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegis
} }
} }
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst_Reg) return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst)
} }
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool { func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
@ -776,25 +776,25 @@ func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
ins := &p[i] ins := &p[i]
switch ins.Opcode { switch ins.Opcode {
case S_ISUB_R: case S_ISUB_R:
r[ins.Dst_Reg] -= r[ins.Src_Reg] r[ins.Dst] -= r[ins.Src]
case S_IXOR_R: case S_IXOR_R:
r[ins.Dst_Reg] ^= r[ins.Src_Reg] r[ins.Dst] ^= r[ins.Src]
case S_IADD_RS: case S_IADD_RS:
r[ins.Dst_Reg] += r[ins.Src_Reg] << ins.Imm32 r[ins.Dst] += r[ins.Src] << ins.Imm32
case S_IMUL_R: case S_IMUL_R:
r[ins.Dst_Reg] *= r[ins.Src_Reg] r[ins.Dst] *= r[ins.Src]
case S_IROR_C: case S_IROR_C:
r[ins.Dst_Reg] = bits.RotateLeft64(r[ins.Dst_Reg], 0-int(ins.Imm32)) r[ins.Dst] = bits.RotateLeft64(r[ins.Dst], 0-int(ins.Imm32))
case S_IADD_C7, S_IADD_C8, S_IADD_C9: case S_IADD_C7, S_IADD_C8, S_IADD_C9:
r[ins.Dst_Reg] += signExtend2sCompl(ins.Imm32) r[ins.Dst] += signExtend2sCompl(ins.Imm32)
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9: case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
r[ins.Dst_Reg] ^= signExtend2sCompl(ins.Imm32) r[ins.Dst] ^= signExtend2sCompl(ins.Imm32)
case S_IMULH_R: case S_IMULH_R:
r[ins.Dst_Reg], _ = bits.Mul64(r[ins.Dst_Reg], r[ins.Src_Reg]) r[ins.Dst], _ = bits.Mul64(r[ins.Dst], r[ins.Src])
case S_ISMULH_R: case S_ISMULH_R:
r[ins.Dst_Reg] = smulh(int64(r[ins.Dst_Reg]), int64(r[ins.Src_Reg])) r[ins.Dst] = smulh(int64(r[ins.Dst]), int64(r[ins.Src]))
case S_IMUL_RCP: case S_IMUL_RCP:
r[ins.Dst_Reg] *= randomx_reciprocal(ins.Imm32) r[ins.Dst] *= ins.Imm64
} }
} }

View file

@ -27,8 +27,8 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
for i := range p { for i := range p {
instr := &p[i] instr := &p[i]
dst := instr.Dst_Reg % RegistersCount dst := instr.Dst % RegistersCount
src := instr.Src_Reg % RegistersCount src := instr.Src % RegistersCount
switch instr.Opcode { switch instr.Opcode {
case S_ISUB_R: case S_ISUB_R:
@ -80,9 +80,9 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
program = append(program, byte(0xc2+8*dst)) program = append(program, byte(0xc2+8*dst))
case S_IMUL_RCP: case S_IMUL_RCP:
program = append(program, MOV_RAX_I...) program = append(program, MOV_RAX_I...)
program = binary.LittleEndian.AppendUint64(program, randomx_reciprocal(instr.Imm32)) program = binary.LittleEndian.AppendUint64(program, instr.Imm64)
program = append(program, REX_IMUL_RM...) program = append(program, REX_IMUL_RM...)
program = append(program, byte(0xc0+8*instr.Dst_Reg)) program = append(program, byte(0xc0+8*instr.Dst))
default: default:
panic("unreachable") panic("unreachable")
} }