Cleanup readme, superscalar
This commit is contained in:
parent
d20dd880ce
commit
4903cd7407
37
README.md
37
README.md
|
@ -1,29 +1,36 @@
|
|||
# RandomX (Golang Implementation)
|
||||
RandomX is a proof-of-work (PoW) algorithm that is optimized for general-purpose CPUs.
|
||||
RandomX uses random code execution (hence the name) together with several memory-hard techniques to minimize the efficiency advantage of specialized hardware.
|
||||
|
||||
---
|
||||
|
||||
Fork from [git.dero.io/DERO_Foundation/RandomX](https://git.dero.io/DERO_Foundation/RandomX). Also related, their [Analysis of RandomX writeup](https://medium.com/deroproject/analysis-of-randomx-dde9dfe9bbc6).
|
||||
|
||||
Original code failed RandomX testcases and was implemented using big.Float.
|
||||
|
||||
This package implements RandomX without CGO, using only Golang code, pure float64 ops and two small assembly sections to implement CFROUND modes, with optional soft float implementation.
|
||||
---
|
||||
|
||||
This package implements RandomX without CGO, using only Golang code, native float64 ops, some assembly, but with optional soft float _purego_ implementation.
|
||||
|
||||
All test cases pass properly.
|
||||
|
||||
JIT is supported on a few platforms but can be hard-disabled via the `disable_jit` build flag, or at runtime.
|
||||
For the C++ implementation and design of RandomX, see [github.com/tevador/RandomX](https://github.com/tevador/RandomX)
|
||||
|
||||
| Feature | 386 | amd64 | arm | arm64 | mips | mips64 | riscv64 | wasm |
|
||||
|:----------------------------:|:---:|:-----:|:---:|:-----:|:----:|:------:|:-------:|:----:|
|
||||
| purego | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| Hardware Float Operations | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Hardware AES Operations | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Native Superscalar Execution | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| Superscalar JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| Native VM Execution | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| VM JIT Execution | ❌ | ✅* | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
|
||||
|
||||
A pure Golang implementation can be used on platforms without hard float support or via the `purego` build flag manually.
|
||||
|
||||
| Platform | Hard Float | Hard AES | JIT | Native | purego | Notes |
|
||||
|:-----------:|:----------:|:--------:|:---:|:------:|:------:|:----------------:|
|
||||
| **386** | ✅ | ❌ | ❌ | ✅ | ✅ | |
|
||||
| **amd64** | ✅ | ✅ | ✅* | ✅ | ✅ | JIT only on Unix |
|
||||
| **arm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **arm64** | ✅ | ❌ | ❌ | ✅ | ✅ | |
|
||||
| **mips** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **mips64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **riscv64** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
| **wasm** | ❌ | ❌ | ❌ | ❌ | ✅ | |
|
||||
Any platform with no hard float support or when enabled manually will use soft float, using [softfloat64](https://git.gammaspectra.live/P2Pool/softfloat64). This will be very slow.
|
||||
|
||||
Native hard float can be added with supporting rounding mode under _asm_.
|
||||
|
||||
Any platform with no hard float support (soft float using [softfloat64](git.gammaspectra.live/P2Pool/softfloat64)) will be vastly slow.
|
||||
|
||||
Native hard float can be added with supporting rounding mode under _asm_.
|
||||
JIT only supported under Unix systems (Linux, *BSD, macOS), and can be hard-disabled via the `disable_jit` build flag, or at runtime.
|
|
@ -307,11 +307,11 @@ var slot10 = []*Instruction{&IMUL_RCP}
|
|||
// SuperScalarInstruction superscalar program is built with superscalar instructions
|
||||
type SuperScalarInstruction struct {
|
||||
Opcode byte
|
||||
Dst_Reg int
|
||||
Src_Reg int
|
||||
Dst int
|
||||
Src int
|
||||
Mod byte
|
||||
Imm32 uint32
|
||||
Type int
|
||||
Imm64 uint64
|
||||
OpGroup int
|
||||
OpGroupPar int
|
||||
GroupParIsSource int
|
||||
|
@ -320,17 +320,15 @@ type SuperScalarInstruction struct {
|
|||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) FixSrcReg() {
|
||||
if sins.Src_Reg >= 0 {
|
||||
// do nothing
|
||||
} else {
|
||||
sins.Src_Reg = sins.Dst_Reg
|
||||
if sins.Src == 0xff {
|
||||
sins.Src = sins.Dst
|
||||
}
|
||||
|
||||
}
|
||||
func (sins *SuperScalarInstruction) Reset() {
|
||||
sins.Opcode = 99
|
||||
sins.Src_Reg = -1
|
||||
sins.Dst_Reg = -1
|
||||
sins.Src = 0xff
|
||||
sins.Dst = 0xff
|
||||
sins.CanReuse = false
|
||||
sins.GroupParIsSource = 0
|
||||
}
|
||||
|
@ -406,6 +404,8 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
|
|||
}
|
||||
}
|
||||
|
||||
sins.Imm64 = randomx_reciprocal(sins.Imm32)
|
||||
|
||||
sins.OpGroup = S_IMUL_RCP
|
||||
|
||||
default:
|
||||
|
@ -450,11 +450,11 @@ func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Gener
|
|||
type SuperScalarProgram []SuperScalarInstruction
|
||||
|
||||
func (p SuperScalarProgram) setAddressRegister(addressRegister int) {
|
||||
p[0].Dst_Reg = addressRegister
|
||||
p[0].Dst = addressRegister
|
||||
}
|
||||
|
||||
func (p SuperScalarProgram) AddressRegister() int {
|
||||
return p[0].Dst_Reg
|
||||
return p[0].Dst
|
||||
}
|
||||
func (p SuperScalarProgram) Program() []SuperScalarInstruction {
|
||||
return p[1:]
|
||||
|
@ -569,9 +569,9 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
|
||||
|
||||
if macro_op_index == sins.ins.ResultOP { // fix me
|
||||
registers[sins.Dst_Reg].Latency = depcycle
|
||||
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup
|
||||
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar
|
||||
registers[sins.Dst].Latency = depcycle
|
||||
registers[sins.Dst].LastOpGroup = sins.OpGroup
|
||||
registers[sins.Dst].LastOpPar = sins.OpGroupPar
|
||||
|
||||
}
|
||||
|
||||
|
@ -609,12 +609,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
lastdst := asic_latencies[program[i].Dst_Reg] + 1
|
||||
lastdst := asic_latencies[program[i].Dst] + 1
|
||||
lastsrc := 0
|
||||
if program[i].Dst_Reg != program[i].Src_Reg {
|
||||
lastsrc = asic_latencies[program[i].Src_Reg] + 1
|
||||
if program[i].Dst != program[i].Src {
|
||||
lastsrc = asic_latencies[program[i].Src] + 1
|
||||
}
|
||||
asic_latencies[program[i].Dst_Reg] = max(lastdst, lastsrc)
|
||||
asic_latencies[program[i].Dst] = max(lastdst, lastsrc)
|
||||
}
|
||||
|
||||
asic_latency_max := 0
|
||||
|
@ -719,18 +719,18 @@ func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters
|
|||
|
||||
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
|
||||
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
|
||||
sins.Src_Reg = RegisterNeedsDisplacement
|
||||
sins.OpGroupPar = sins.Src_Reg
|
||||
sins.Src = RegisterNeedsDisplacement
|
||||
sins.OpGroupPar = sins.Src
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if selectRegister(available_registers, gen, &sins.Src_Reg) {
|
||||
if selectRegister(available_registers, gen, &sins.Src) {
|
||||
|
||||
if sins.GroupParIsSource == 0 {
|
||||
|
||||
} else {
|
||||
sins.OpGroupPar = sins.Src_Reg
|
||||
sins.OpGroupPar = sins.Src
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -741,7 +741,7 @@ func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegis
|
|||
preAllocatedAvailableRegisters = preAllocatedAvailableRegisters[:0]
|
||||
|
||||
for i := range Registers {
|
||||
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src_Reg) &&
|
||||
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src) &&
|
||||
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
|
||||
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
|
||||
(sins.Opcode != S_IADD_RS || i != RegisterNeedsDisplacement) {
|
||||
|
@ -749,7 +749,7 @@ func (sins *SuperScalarInstruction) SelectDestination(preAllocatedAvailableRegis
|
|||
}
|
||||
}
|
||||
|
||||
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst_Reg)
|
||||
return selectRegister(preAllocatedAvailableRegisters, gen, &sins.Dst)
|
||||
}
|
||||
|
||||
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
|
||||
|
@ -776,25 +776,25 @@ func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
|
|||
ins := &p[i]
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
r[ins.Dst_Reg] -= r[ins.Src_Reg]
|
||||
r[ins.Dst] -= r[ins.Src]
|
||||
case S_IXOR_R:
|
||||
r[ins.Dst_Reg] ^= r[ins.Src_Reg]
|
||||
r[ins.Dst] ^= r[ins.Src]
|
||||
case S_IADD_RS:
|
||||
r[ins.Dst_Reg] += r[ins.Src_Reg] << ins.Imm32
|
||||
r[ins.Dst] += r[ins.Src] << ins.Imm32
|
||||
case S_IMUL_R:
|
||||
r[ins.Dst_Reg] *= r[ins.Src_Reg]
|
||||
r[ins.Dst] *= r[ins.Src]
|
||||
case S_IROR_C:
|
||||
r[ins.Dst_Reg] = bits.RotateLeft64(r[ins.Dst_Reg], 0-int(ins.Imm32))
|
||||
r[ins.Dst] = bits.RotateLeft64(r[ins.Dst], 0-int(ins.Imm32))
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
r[ins.Dst_Reg] += signExtend2sCompl(ins.Imm32)
|
||||
r[ins.Dst] += signExtend2sCompl(ins.Imm32)
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
r[ins.Dst_Reg] ^= signExtend2sCompl(ins.Imm32)
|
||||
r[ins.Dst] ^= signExtend2sCompl(ins.Imm32)
|
||||
case S_IMULH_R:
|
||||
r[ins.Dst_Reg], _ = bits.Mul64(r[ins.Dst_Reg], r[ins.Src_Reg])
|
||||
r[ins.Dst], _ = bits.Mul64(r[ins.Dst], r[ins.Src])
|
||||
case S_ISMULH_R:
|
||||
r[ins.Dst_Reg] = smulh(int64(r[ins.Dst_Reg]), int64(r[ins.Src_Reg]))
|
||||
r[ins.Dst] = smulh(int64(r[ins.Dst]), int64(r[ins.Src]))
|
||||
case S_IMUL_RCP:
|
||||
r[ins.Dst_Reg] *= randomx_reciprocal(ins.Imm32)
|
||||
r[ins.Dst] *= ins.Imm64
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,8 +27,8 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
|
|||
for i := range p {
|
||||
instr := &p[i]
|
||||
|
||||
dst := instr.Dst_Reg % RegistersCount
|
||||
src := instr.Src_Reg % RegistersCount
|
||||
dst := instr.Dst % RegistersCount
|
||||
src := instr.Src % RegistersCount
|
||||
|
||||
switch instr.Opcode {
|
||||
case S_ISUB_R:
|
||||
|
@ -80,9 +80,9 @@ func generateSuperscalarCode(scalarProgram SuperScalarProgram) SuperScalarProgra
|
|||
program = append(program, byte(0xc2+8*dst))
|
||||
case S_IMUL_RCP:
|
||||
program = append(program, MOV_RAX_I...)
|
||||
program = binary.LittleEndian.AppendUint64(program, randomx_reciprocal(instr.Imm32))
|
||||
program = binary.LittleEndian.AppendUint64(program, instr.Imm64)
|
||||
program = append(program, REX_IMUL_RM...)
|
||||
program = append(program, byte(0xc0+8*instr.Dst_Reg))
|
||||
program = append(program, byte(0xc0+8*instr.Dst))
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue