Cleanup readme, superscalar

This commit is contained in:
DataHoarder 2024-04-20 20:22:05 +02:00
parent d20dd880ce
commit 4903cd7407
Signed by: DataHoarder
SSH key fingerprint: SHA256:OLTRf6Fl87G52SiR7sWLGNzlJt4WOX+tfI2yxo0z7xk
3 changed files with 60 additions and 53 deletions

View file

@ -1,29 +1,36 @@
# RandomX (Golang Implementation)
RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs.
RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware.
---
Fork from [git.dero.io/DERO_Foundation/RandomX](https://git.dero.io/DERO_Foundation/RandomX). Also related, their [Analysis of RandomX writeup](https://medium.com/deroproject/analysis-of-randomx-dde9dfe9bbc6).
Original code failed RandomX testcases and was implemented using big.Float.
This package implements RandomX without CGO, using only Golang code, pure float64 ops and two small assembly sections to implement CFROUND modes, with optional soft float implementation.
---
This package implements RandomX without CGO, using only Golang code, native float64 ops, some assembly, but with optional soft float _purego_ implementation.
All test cases pass properly.
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime.
For the C++ implementation and design of RandomX, see [github.com/tevador/RandomX](https://github.com/tevador/RandomX)
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|:----------------------------:|:---:|:-----:|:---:|:-----:|:----:|:------:|:-------:|:----:|
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Hardware Float Operations | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| Hardware AES Operations | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Native Superscalar Execution | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Superscalar JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| Native VM Execution | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
| VM JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.
| Platform | Hard Float | Hard AES | JIT | Native | purego | Notes |
|:-----------:|:----------:|:--------:|:---:|:------:|:------:|:----------------:|
| **386** | ✅ | ❌ | ❌ | ✅ | ✅ | |
| **amd64** | ✅ | ✅ | ✅* | ✅ | ✅ | JIT only on Unix |
| **arm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **arm64** | ✅ | ❌ | ❌ | ✅ | ✅ | |
| **mips** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **mips64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **riscv64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
| **wasm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
Any platform with no hard float support or when enabled manually will use soft float, using [softfloat64](https://git.gammaspectra.live/P2Pool/softfloat64). This will be very slow.
Native hard float can be added with supporting rounding mode under _asm_.
Any platform with no hard float support (soft float using [softfloat64](git.gammaspectra.live/P2Pool/softfloat64)) will be vastly slow.
Native hard float can be added with supporting rounding mode under _asm_.
JIT only supported under Unix systems (Linux, *BSD, macOS), and can be hard-disabled via the `disable_jit` build flag, or at runtime.

View file

@ -307,11 +307,11 @@ var slot10 = []*Instruction{&IMUL_RCP}
// SuperScalarInstruction superscalar program is built with superscalar instructions
type SuperScalarInstruction struct {
Opcode byte
Dst_Reg int
Src_Reg int
Dst int
Src int
Mod byte
Imm32 uint32
Type int
Imm64 uint64
OpGroup int
OpGroupPar int
GroupParIsSource int
@ -320,17 +320,15 @@ type SuperScalarInstruction struct {
}
func (sins *SuperScalarInstruction) FixSrcReg() {
if sins.Src_Reg >= 0 {
// do nothing
} else {
sins.Src_Reg = sins.Dst_Reg
if sins.Src == 0xff {
sins.Src = sins.Dst
}
}
func (sins *SuperScalarInstruction) Reset() {
sins.Opcode = 99
sins.Src_Reg = -1
sins.Dst_Reg = -1
sins.Src = 0xff
sins.Dst = 0xff
sins.CanReuse = false
sins.GroupParIsSource = 0
}
@ -406,6 +404,8 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
}
}
sins.Imm64 = randomx_reciprocal(sins.Imm32)
sins.OpGroup = S_IMUL_RCP
default:
@ -450,11 +450,11 @@ func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Gener
type SuperScalarProgram []SuperScalarInstruction
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
p[0].Dst_Reg = addressRegister
p[0].Dst = addressRegister
}
func (p SuperScalarProgram) AddressRegister() int {
return p[0].Dst_Reg
return p[0].Dst
}
func (p SuperScalarProgram) Program() []SuperScalarInstruction {
return p[1:]
@ -569,9 +569,9 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
if macro_op_index == sins.ins.ResultOP { // fix me
registers[sins.Dst_Reg].Latency = depcycle
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar
registers[sins.Dst].Latency = depcycle
registers[sins.Dst].LastOpGroup = sins.OpGroup
registers[sins.Dst].LastOpPar = sins.OpGroupPar
}
@ -609,12 +609,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
if i == 0 {
continue
}
lastdst := asic_latencies[program[i].Dst_Reg] + 1
lastdst := asic_latencies[program[i].Dst] + 1
lastsrc := 0
if program[i].Dst_Reg != program[i].Src_Reg {
lastsrc = asic_latencies[program[i].Src_Reg] + 1
if program[i].Dst != program[i].Src {
lastsrc = asic_latencies[program[i].Src] + 1
}
asic_latencies[program[i].Dst_Reg] = max(lastdst, lastsrc)
asic_latencies[program[i].Dst] = max(lastdst, lastsrc)
}
asic_latency_max := 0
@ -719,18 +719,18 @@ func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
sins.Src_Reg = RegisterNeedsDisplacement
sins.OpGroupPar = sins.Src_Reg
sins.Src = RegisterNeedsDisplacement
sins.OpGroupPar = sins.Src
return true
}
}
if selectRegister(available_registers, gen, &sins.Src_Reg) {
if selectRegister(available_registers, gen, &sins.Src) {
if sins.GroupParIsSource == 0 {
} else {
sins.OpGroupPar = sins.Src_Reg
sins.OpGroupPar = sins.Src
}
return true
}
@ -741,7 +741,7 @@ func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegis
preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0]
for i := range Registers {
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src_Reg) &&
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src) &&
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
@ -749,7 +749,7 @@ func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegis
}
}
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst_Reg)
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst)
}
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
@ -776,25 +776,25 @@ func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
ins := &p[i]
switch ins.Opcode {
case S_ISUB_R:
r[ins.Dst_Reg] -= r[ins.Src_Reg]
r[ins.Dst] -= r[ins.Src]
case S_IXOR_R:
r[ins.Dst_Reg] ^= r[ins.Src_Reg]
r[ins.Dst] ^= r[ins.Src]
case S_IADD_RS:
r[ins.Dst_Reg] += r[ins.Src_Reg] << ins.Imm32
r[ins.Dst] += r[ins.Src] << ins.Imm32
case S_IMUL_R:
r[ins.Dst_Reg] *= r[ins.Src_Reg]
r[ins.Dst] *= r[ins.Src]
case S_IROR_C:
r[ins.Dst_Reg] = bits.RotateLeft64(r[ins.Dst_Reg], 0-int(ins.Imm32))
r[ins.Dst] = bits.RotateLeft64(r[ins.Dst], 0-int(ins.Imm32))
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
r[ins.Dst_Reg] += signExtend2sCompl(ins.Imm32)
r[ins.Dst] += signExtend2sCompl(ins.Imm32)
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
r[ins.Dst_Reg] ^= signExtend2sCompl(ins.Imm32)
r[ins.Dst] ^= signExtend2sCompl(ins.Imm32)
case S_IMULH_R:
r[ins.Dst_Reg], _ = bits.Mul64(r[ins.Dst_Reg], r[ins.Src_Reg])
r[ins.Dst], _ = bits.Mul64(r[ins.Dst], r[ins.Src])
case S_ISMULH_R:
r[ins.Dst_Reg] = smulh(int64(r[ins.Dst_Reg]), int64(r[ins.Src_Reg]))
r[ins.Dst] = smulh(int64(r[ins.Dst]), int64(r[ins.Src]))
case S_IMUL_RCP:
r[ins.Dst_Reg] *= randomx_reciprocal(ins.Imm32)
r[ins.Dst] *= ins.Imm64
}
}

View file

@ -27,8 +27,8 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
for i := range p {
instr := &p[i]
dst := instr.Dst_Reg % RegistersCount
src := instr.Src_Reg % RegistersCount
dst := instr.Dst % RegistersCount
src := instr.Src % RegistersCount
switch instr.Opcode {
case S_ISUB_R:
@ -80,9 +80,9 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
program = append(program, byte(0xc2+8*dst))
case S_IMUL_RCP:
program = append(program, MOV_RAX_I...)
program = binary.LittleEndian.AppendUint64(program, randomx_reciprocal(instr.Imm32))
program = binary.LittleEndian.AppendUint64(program, instr.Imm64)
program = append(program, REX_IMUL_RM...)
program = append(program, byte(0xc0+8*instr.Dst_Reg))
program = append(program, byte(0xc0+8*instr.Dst))
default:
panic("unreachable")
}