Version v2.1.0, cleaned up useless printf and string comparisons
This commit is contained in:
parent
1bb1da8bbc
commit
78b0645034
|
@ -38,7 +38,6 @@ func (b *Blake2Generator) checkdata(bytesNeeded int) {
|
|||
func (b *Blake2Generator) GetByte() byte {
|
||||
b.checkdata(1)
|
||||
ret := b.data[b.dataindex]
|
||||
//fmt.Printf("returning byte %02x\n", ret)
|
||||
b.dataindex++
|
||||
return ret
|
||||
}
|
||||
|
|
245
superscalar.go
245
superscalar.go
|
@ -29,9 +29,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
package randomx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
import "math/bits"
|
||||
|
||||
type ExecutionPort byte
|
||||
|
@ -81,7 +78,6 @@ func (m *MacroOP) IsDependent() bool {
|
|||
}
|
||||
|
||||
// 3 byte instructions
|
||||
var M_NOP = MacroOP{"NOP", 0, 0, Null, Null, false}
|
||||
var M_Add_rr = MacroOP{"add r,r", 3, 1, P015, Null, false}
|
||||
var M_Sub_rr = MacroOP{"sub r,r", 3, 1, P015, Null, false}
|
||||
var M_Xor_rr = MacroOP{"xor r,r", 3, 1, P015, Null, false}
|
||||
|
@ -107,7 +103,6 @@ var M_Mov_ri64 = MacroOP{"mov rax,i64", 10, 1, P015, Null, false}
|
|||
// unused are not implemented
|
||||
|
||||
type Instruction struct {
|
||||
Name string
|
||||
Opcode byte
|
||||
UOP MacroOP
|
||||
SrcOP int
|
||||
|
@ -120,7 +115,8 @@ func (ins *Instruction) GetUOPCount() int {
|
|||
if len(ins.UOP_Array) != 0 {
|
||||
return len(ins.UOP_Array)
|
||||
} else {
|
||||
if ins.Name == "NOP" { // nop is assumed to be zero bytes
|
||||
// NOP
|
||||
if ins.Opcode == S_NOP { // nop is assumed to be zero bytes
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
|
@ -160,59 +156,42 @@ func (ins *Instruction) GetLatency() int {
|
|||
}
|
||||
|
||||
const (
|
||||
S_INVALID int = -1
|
||||
S_ISUB_R = 0
|
||||
S_IXOR_R = 1
|
||||
S_IADD_RS = 2
|
||||
S_IMUL_R = 3
|
||||
S_IROR_C = 4
|
||||
S_IADD_C7 = 5
|
||||
S_IXOR_C7 = 6
|
||||
S_IADD_C8 = 7
|
||||
S_IXOR_C8 = 8
|
||||
S_IADD_C9 = 9
|
||||
S_IXOR_C9 = 10
|
||||
S_IMULH_R = 11
|
||||
S_ISMULH_R = 12
|
||||
S_IMUL_RCP = 13
|
||||
S_INVALID = 0xFF
|
||||
|
||||
S_NOP = 0xFE
|
||||
S_ISUB_R = 0
|
||||
S_IXOR_R = 1
|
||||
S_IADD_RS = 2
|
||||
S_IMUL_R = 3
|
||||
S_IROR_C = 4
|
||||
S_IADD_C7 = 5
|
||||
S_IXOR_C7 = 6
|
||||
S_IADD_C8 = 7
|
||||
S_IXOR_C8 = 8
|
||||
S_IADD_C9 = 9
|
||||
S_IXOR_C9 = 10
|
||||
S_IMULH_R = 11
|
||||
S_ISMULH_R = 12
|
||||
S_IMUL_RCP = 13
|
||||
)
|
||||
|
||||
var Opcode_To_String = map[int]string{S_INVALID: "INVALID",
|
||||
S_ISUB_R: "ISUB_R",
|
||||
S_IXOR_R: "IXOR_R",
|
||||
S_IADD_RS: "IADD_RS",
|
||||
S_IMUL_R: "IMUL_R",
|
||||
S_IROR_C: "IROR_C",
|
||||
S_IADD_C7: "IADD_C7",
|
||||
S_IXOR_C7: "IXOR_C7",
|
||||
S_IADD_C8: "IADD_C8",
|
||||
S_IXOR_C8: "IXOR_C8",
|
||||
S_IADD_C9: "IADD_C9",
|
||||
S_IXOR_C9: "IXOR_C9",
|
||||
S_IMULH_R: "IMULH_R",
|
||||
S_ISMULH_R: "ISMULH_R",
|
||||
S_IMUL_RCP: "IMUL_RCP",
|
||||
}
|
||||
|
||||
// SrcOP/DstOp are used to selected registers
|
||||
var ISUB_R = Instruction{Name: "ISUB_R", Opcode: S_ISUB_R, UOP: M_Sub_rr, SrcOP: 0}
|
||||
var IXOR_R = Instruction{Name: "IXOR_R", Opcode: S_IXOR_R, UOP: M_Xor_rr, SrcOP: 0}
|
||||
var IADD_RS = Instruction{Name: "IADD_RS", Opcode: S_IADD_RS, UOP: M_Lea_SIB, SrcOP: 0}
|
||||
var IMUL_R = Instruction{Name: "IMUL_R", Opcode: S_IMUL_R, UOP: M_Imul_rr, SrcOP: 0}
|
||||
var IROR_C = Instruction{Name: "IROR_C", Opcode: S_IROR_C, UOP: M_Ror_ri, SrcOP: -1}
|
||||
var ISUB_R = Instruction{Opcode: S_ISUB_R, UOP: M_Sub_rr, SrcOP: 0}
|
||||
var IXOR_R = Instruction{Opcode: S_IXOR_R, UOP: M_Xor_rr, SrcOP: 0}
|
||||
var IADD_RS = Instruction{Opcode: S_IADD_RS, UOP: M_Lea_SIB, SrcOP: 0}
|
||||
var IMUL_R = Instruction{Opcode: S_IMUL_R, UOP: M_Imul_rr, SrcOP: 0}
|
||||
var IROR_C = Instruction{Opcode: S_IROR_C, UOP: M_Ror_ri, SrcOP: -1}
|
||||
|
||||
var IADD_C7 = Instruction{Name: "IADD_C7", Opcode: S_IADD_C7, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C7 = Instruction{Name: "IXOR_C7", Opcode: S_IXOR_C7, UOP: M_Xor_ri, SrcOP: -1}
|
||||
var IADD_C8 = Instruction{Name: "IADD_C8", Opcode: S_IADD_C8, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C8 = Instruction{Name: "IXOR_C8", Opcode: S_IXOR_C8, UOP: M_Xor_ri, SrcOP: -1}
|
||||
var IADD_C9 = Instruction{Name: "IADD_C9", Opcode: S_IADD_C9, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C9 = Instruction{Name: "IXOR_C9", Opcode: S_IXOR_C9, UOP: M_Xor_ri, SrcOP: -1}
|
||||
var IADD_C7 = Instruction{Opcode: S_IADD_C7, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C7 = Instruction{Opcode: S_IXOR_C7, UOP: M_Xor_ri, SrcOP: -1}
|
||||
var IADD_C8 = Instruction{Opcode: S_IADD_C8, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C8 = Instruction{Opcode: S_IXOR_C8, UOP: M_Xor_ri, SrcOP: -1}
|
||||
var IADD_C9 = Instruction{Opcode: S_IADD_C9, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C9 = Instruction{Opcode: S_IXOR_C9, UOP: M_Xor_ri, SrcOP: -1}
|
||||
|
||||
var IMULH_R = Instruction{Name: "IMULH_R", Opcode: S_IMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Mul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
|
||||
var ISMULH_R = Instruction{Name: "ISMULH_R", Opcode: S_ISMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Imul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
|
||||
var IMUL_RCP = Instruction{Name: "IMUL_RCP", Opcode: S_IMUL_RCP, UOP_Array: []MacroOP{M_Mov_ri64, M_Imul_r_dependent}, ResultOP: 1, DstOP: 1, SrcOP: -1}
|
||||
|
||||
var INOP = Instruction{Name: "NOP", UOP: M_NOP}
|
||||
var IMULH_R = Instruction{Opcode: S_IMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Mul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
|
||||
var ISMULH_R = Instruction{Opcode: S_ISMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Imul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
|
||||
var IMUL_RCP = Instruction{Opcode: S_IMUL_RCP, UOP_Array: []MacroOP{M_Mov_ri64, M_Imul_r_dependent}, ResultOP: 1, DstOP: 1, SrcOP: -1}
|
||||
|
||||
// how random 16 bytes are split into instructions
|
||||
var buffer0 = []int{4, 8, 4}
|
||||
|
@ -281,7 +260,7 @@ func (d DecoderType) String() string {
|
|||
|
||||
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Generator) DecoderType {
|
||||
|
||||
if ins.Name == IMULH_R.Name || ins.Name == ISMULH_R.Name {
|
||||
if ins.Opcode == S_IMULH_R || ins.Opcode == S_ISMULH_R {
|
||||
return Decoder3310
|
||||
}
|
||||
|
||||
|
@ -290,7 +269,7 @@ func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Gene
|
|||
return Decoder4444
|
||||
}
|
||||
|
||||
if ins.Name == IMUL_RCP.Name {
|
||||
if ins.Opcode == S_IMUL_RCP {
|
||||
if gen.GetByte()&1 == 1 {
|
||||
return Decoder484
|
||||
} else {
|
||||
|
@ -325,7 +304,7 @@ var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
|
|||
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
|
||||
var slot10 = []*Instruction{&IMUL_RCP}
|
||||
|
||||
// superscalar program is built with superscalara instructions
|
||||
// SuperScalarInstruction superscalar program is built with superscalar instructions
|
||||
type SuperScalarInstruction struct {
|
||||
Opcode byte
|
||||
Dst_Reg int
|
||||
|
@ -333,7 +312,6 @@ type SuperScalarInstruction struct {
|
|||
Mod byte
|
||||
Imm32 uint32
|
||||
Type int
|
||||
Name string
|
||||
OpGroup int
|
||||
OpGroupPar int
|
||||
GroupParIsSource int
|
||||
|
@ -341,22 +319,6 @@ type SuperScalarInstruction struct {
|
|||
CanReuse bool
|
||||
}
|
||||
|
||||
func (sins SuperScalarInstruction) String() string {
|
||||
result := fmt.Sprintf("; %10s %2d ", sins.Name, sins.Opcode)
|
||||
result += fmt.Sprintf("dst r%d ", sins.Dst_Reg)
|
||||
|
||||
if sins.Src_Reg >= 0 {
|
||||
result += fmt.Sprintf("src r%d ", sins.Src_Reg)
|
||||
} else {
|
||||
result += fmt.Sprintf("src r%d ", sins.Dst_Reg)
|
||||
}
|
||||
|
||||
result += fmt.Sprintf("Mod %08x ", sins.Mod)
|
||||
result += fmt.Sprintf("Imm %08x ", sins.Imm32)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) FixSrcReg() {
|
||||
if sins.Src_Reg >= 0 {
|
||||
// do nothing
|
||||
|
@ -375,44 +337,33 @@ func (sins *SuperScalarInstruction) Reset() {
|
|||
func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator) {
|
||||
sins.Reset()
|
||||
sins.ins = ins
|
||||
sins.Name = ins.Name
|
||||
sins.OpGroupPar = -1
|
||||
sins.Opcode = ins.Opcode
|
||||
|
||||
switch ins.Name {
|
||||
case ISUB_R.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case IXOR_R.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IXOR_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IXOR_R
|
||||
sins.GroupParIsSource = 1
|
||||
case IADD_RS.Name:
|
||||
//fmt.Printf("q %s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IADD_RS:
|
||||
sins.Mod = gen.GetByte()
|
||||
// set modshift on Imm32
|
||||
sins.Imm32 = uint32((sins.Mod >> 2) % 4) // bits 2-3
|
||||
//sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case IMUL_R.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IMUL_R:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMUL_R
|
||||
sins.GroupParIsSource = 1
|
||||
case IROR_C.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IROR_C:
|
||||
sins.Mod = 0
|
||||
|
||||
for sins.Imm32 = 0; sins.Imm32 == 0; {
|
||||
|
@ -421,41 +372,31 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
|
|||
|
||||
sins.OpGroup = S_IROR_C
|
||||
sins.OpGroupPar = -1
|
||||
case IADD_C7.Name, IADD_C8.Name, IADD_C9.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IADD_C7
|
||||
sins.OpGroupPar = -1
|
||||
case IXOR_C7.Name, IXOR_C8.Name, IXOR_C9.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IXOR_C7
|
||||
sins.OpGroupPar = -1
|
||||
|
||||
case IMULH_R.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
case ISMULH_R.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_ISMULH_R:
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_ISMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
|
||||
case IMUL_RCP.Name:
|
||||
//fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
case S_IMUL_RCP:
|
||||
|
||||
sins.Mod = 0
|
||||
for {
|
||||
|
@ -468,7 +409,6 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
|
|||
sins.OpGroup = S_IMUL_RCP
|
||||
|
||||
default:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
panic("should not occur")
|
||||
|
||||
}
|
||||
|
@ -476,7 +416,6 @@ func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator
|
|||
}
|
||||
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Generator, instruction_len int, decoder_type int, islast, isfirst bool) {
|
||||
|
||||
//fmt.Printf("instruction len %d\n", instruction_len)
|
||||
switch instruction_len {
|
||||
case 3:
|
||||
if islast {
|
||||
|
@ -495,7 +434,6 @@ func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Gener
|
|||
create(sins, slot7[gen.GetByte()&1], gen)
|
||||
|
||||
case 8:
|
||||
//fmt.Printf("creating 8\n")
|
||||
create(sins, slot8[gen.GetByte()&1], gen)
|
||||
|
||||
case 9:
|
||||
|
@ -544,7 +482,7 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
}
|
||||
|
||||
sins := &SuperScalarInstruction{}
|
||||
sins.ins = &Instruction{Name: "NOP"}
|
||||
sins.ins = &Instruction{Opcode: S_NOP}
|
||||
|
||||
portbusy := make([][]int, CYCLE_MAP_SIZE)
|
||||
for i := range portbusy {
|
||||
|
@ -557,28 +495,17 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
|
||||
decoder := FetchNextDecoder(sins.ins, decode_cycle, mulcount, gen)
|
||||
|
||||
//fmt.Printf("; ------------- fetch cycle %d (%s)\n", cycle, decoder)
|
||||
|
||||
if cycle == 51 {
|
||||
// break
|
||||
}
|
||||
|
||||
/* for i := range portbusy {
|
||||
for j := range portbusy[i]{
|
||||
portbusy[i][j]=false
|
||||
}
|
||||
}*/
|
||||
|
||||
buffer_index := 0
|
||||
|
||||
for buffer_index < decoder.GetSize() { // generate instructions for the current decoder
|
||||
top_cycle := cycle
|
||||
|
||||
//fmt.Printf("macro_op_index %d current_instruction %s actual instruction uop %d\n", macro_op_index, current_instruction.Name, sins.ins.GetUOPCount())
|
||||
|
||||
if macro_op_index >= sins.ins.GetUOPCount() {
|
||||
if ports_saturated || program_size >= SuperscalarMaxSize {
|
||||
//panic("breaking off") program built successfully
|
||||
break
|
||||
}
|
||||
CreateSuperScalarInstruction(sins, gen, Decoder_To_Instruction_Length[int(decoder)][buffer_index], int(decoder), len(Decoder_To_Instruction_Length[decoder]) == (buffer_index+1), buffer_index == 0)
|
||||
|
@ -593,23 +520,16 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
mop = sins.ins.UOP_Array[macro_op_index]
|
||||
}
|
||||
|
||||
//fmt.Printf("MOP name %s depcycle %d\n", mop.Name, depcycle)
|
||||
|
||||
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
|
||||
scheduleCycle := ScheduleMop(&mop, portbusy, cycle, depcycle, false)
|
||||
if scheduleCycle < 0 {
|
||||
//fmt.Printf("Unable to map operation %s to execution port (cycle %d)", mop.Name, cycle)
|
||||
//__debugbreak();
|
||||
ports_saturated = true
|
||||
break
|
||||
}
|
||||
|
||||
//fmt.Printf("scheduleCycle %d\n", scheduleCycle)
|
||||
|
||||
if macro_op_index == sins.ins.SrcOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(preAllocatedRegisters, scheduleCycle, registers, gen); forward++ {
|
||||
//fmt.Printf(";src STALL at cycle %d\n", cycle)
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
@ -618,21 +538,16 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
if throwAwayCount < MAX_THROWAWAY_COUNT {
|
||||
throwAwayCount++
|
||||
macro_op_index = sins.ins.GetUOPCount()
|
||||
//fmt.Printf(";throwAway %s\n", sins.Name)
|
||||
continue
|
||||
}
|
||||
//fmt.Printf("aborting at cycle %d source registers not available", cycle)
|
||||
break
|
||||
}
|
||||
|
||||
//fmt.Printf("; src = r%d\n", sins.Src_Reg)
|
||||
|
||||
}
|
||||
|
||||
if macro_op_index == sins.ins.DstOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(preAllocatedRegisters, scheduleCycle, throwAwayCount > 0, registers, gen); forward++ {
|
||||
//fmt.Printf(";dst STALL at cycle %d\n", cycle)
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
@ -641,15 +556,11 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
if throwAwayCount < MAX_THROWAWAY_COUNT {
|
||||
throwAwayCount++
|
||||
macro_op_index = sins.ins.GetUOPCount()
|
||||
//fmt.Printf(";throwAway %s\n", sins.Name)
|
||||
continue
|
||||
}
|
||||
//fmt.Printf("aborting at cycle %d destination registers not available", cycle)
|
||||
break
|
||||
}
|
||||
|
||||
//fmt.Printf("; dst = r%d\n", sins.Dst_Reg)
|
||||
|
||||
}
|
||||
throwAwayCount = 0
|
||||
// recalculate when the instruction can be scheduled based on operand availability
|
||||
|
@ -658,8 +569,6 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
|
||||
|
||||
if macro_op_index == sins.ins.ResultOP { // fix me
|
||||
//retire_cycle = depcycle
|
||||
//fmt.Printf("; RETIRED at cycle %d Dst_Reg %d\n", retire_cycle, sins.Dst_Reg)
|
||||
registers[sins.Dst_Reg].Latency = depcycle
|
||||
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup
|
||||
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar
|
||||
|
@ -677,12 +586,12 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
}
|
||||
cycle = top_cycle
|
||||
|
||||
// when all uops of current instruction have been issued, add the instruction to supercalara program
|
||||
// when all uops of current instruction have been issued, add the instruction to superscalar program
|
||||
if macro_op_index >= sins.ins.GetUOPCount() {
|
||||
sins.FixSrcReg() // fix src register once and for all
|
||||
program = append(program, *sins)
|
||||
|
||||
if sins.ins.Name == "IMUL_R" || sins.ins.Name == "IMULH_R" || sins.ins.Name == "ISMULH_R" || sins.ins.Name == "IMUL_RCP" {
|
||||
if sins.ins.Opcode == S_IMUL_R || sins.ins.Opcode == S_IMULH_R || sins.ins.Opcode == S_ISMULH_R || sins.ins.Opcode == S_IMUL_RCP {
|
||||
mulcount++
|
||||
}
|
||||
|
||||
|
@ -690,39 +599,28 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
|
||||
done++
|
||||
|
||||
// if done >= 20 {break}
|
||||
|
||||
}
|
||||
cycle++
|
||||
}
|
||||
|
||||
/*
|
||||
for i := range program.Instructions {
|
||||
fmt.Printf("%d %s\n", i, program.Instructions[i].String())
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
var asic_latencies [8]int
|
||||
|
||||
for i := range program {
|
||||
if i == 0 {
|
||||
continue
|
||||
}
|
||||
//fmt.Printf("%d %s\n",i ,program[i].String() )
|
||||
lastdst := asic_latencies[program[i].Dst_Reg] + 1
|
||||
lastsrc := 0
|
||||
if program[i].Dst_Reg != program[i].Src_Reg {
|
||||
lastsrc = asic_latencies[program[i].Src_Reg] + 1
|
||||
}
|
||||
asic_latencies[program[i].Dst_Reg] = Max(lastdst, lastsrc)
|
||||
asic_latencies[program[i].Dst_Reg] = max(lastdst, lastsrc)
|
||||
}
|
||||
|
||||
asic_latency_max := 0
|
||||
address_reg := 0
|
||||
|
||||
for i := range asic_latencies {
|
||||
//fmt.Printf("latency[%d] %d\n", i, asic_latencies[i])
|
||||
if asic_latencies[i] > asic_latency_max {
|
||||
asic_latency_max = asic_latencies[i]
|
||||
address_reg = i
|
||||
|
@ -732,8 +630,6 @@ func Build_SuperScalar_Program(gen *Blake2Generator) SuperScalarProgram {
|
|||
// Set AddressRegister hack
|
||||
program.setAddressRegister(address_reg)
|
||||
|
||||
//fmt.Printf("address_reg %d\n", address_reg)
|
||||
|
||||
return program
|
||||
}
|
||||
|
||||
|
@ -741,34 +637,25 @@ const CYCLE_MAP_SIZE int = RANDOMX_SUPERSCALAR_LATENCY + 4
|
|||
const LOOK_FORWARD_CYCLES int = 4
|
||||
const MAX_THROWAWAY_COUNT int = 256
|
||||
|
||||
// schedule the uop as early as possible
|
||||
// ScheduleUop schedule the uop as early as possible
|
||||
func ScheduleUop(uop ExecutionPort, portbusy [][]int, cycle int, commit bool) int {
|
||||
//cycle++
|
||||
for ; cycle < CYCLE_MAP_SIZE; cycle++ { // since cycle is value based, its restored on return
|
||||
//fmt.Printf("port busy %+v\n", portbusy[cycle])
|
||||
//fmt.Printf("current cycle %d portbusy %+v commit %+v\n", cycle, portbusy[cycle], commit)
|
||||
if (uop&P5) != 0 && portbusy[cycle][2] == 0 {
|
||||
if commit {
|
||||
//fmt.Printf("; P5 at cycle %d\n", cycle)
|
||||
portbusy[cycle][2] = int(uop)
|
||||
}
|
||||
//fmt.Printf("P5 available\n")
|
||||
return cycle
|
||||
}
|
||||
if (uop&P0) != 0 && portbusy[cycle][0] == 0 {
|
||||
if commit {
|
||||
//fmt.Printf("; P0 at cycle %d\n", cycle)
|
||||
portbusy[cycle][0] = int(uop)
|
||||
}
|
||||
//fmt.Printf("P0 available\n")
|
||||
return cycle
|
||||
}
|
||||
if (uop&P1) != 0 && portbusy[cycle][1] == 0 {
|
||||
if commit {
|
||||
//fmt.Printf("; P1 at cycle %d\n", cycle)
|
||||
portbusy[cycle][1] = int(uop)
|
||||
}
|
||||
//fmt.Printf("P1 available\n")
|
||||
return cycle
|
||||
}
|
||||
|
||||
|
@ -779,18 +666,12 @@ func ScheduleUop(uop ExecutionPort, portbusy [][]int, cycle int, commit bool) in
|
|||
func ScheduleMop(mop *MacroOP, portbusy [][]int, cycle int, depcycle int, commit bool) int {
|
||||
|
||||
if mop.IsDependent() {
|
||||
//fmt.Printf("dependent\n")
|
||||
cycle = Max(cycle, depcycle)
|
||||
cycle = max(cycle, depcycle)
|
||||
}
|
||||
|
||||
if mop.IsEliminated() {
|
||||
if commit {
|
||||
//fmt.Printf("; (eliminated)\n")
|
||||
}
|
||||
return cycle
|
||||
} else if mop.IsSimple() {
|
||||
//fmt.Printf("simple 1\n")
|
||||
|
||||
return ScheduleUop(mop.GetUOP1(), portbusy, cycle, commit)
|
||||
} else {
|
||||
for ; cycle < CYCLE_MAP_SIZE; cycle++ { // since cycle is value based, its restored on return
|
||||
|
@ -812,14 +693,6 @@ func ScheduleMop(mop *MacroOP, portbusy [][]int, cycle int, depcycle int, commit
|
|||
return -1
|
||||
}
|
||||
|
||||
// Max returns the larger of x or y.
|
||||
func Max(x, y int) int {
|
||||
if x < y {
|
||||
return y
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
type Register struct {
|
||||
Value uint64
|
||||
Latency int
|
||||
|
@ -836,14 +709,12 @@ func (sins *SuperScalarInstruction) SelectSource(preAllocatedAvailableRegisters
|
|||
available_registers := preAllocatedAvailableRegisters[:0]
|
||||
|
||||
for i := range Registers {
|
||||
//fmt.Printf("\nchecking s reg %d latency %d cycle %d", i, Registers[i].Latency, cycle)
|
||||
if Registers[i].Latency <= cycle {
|
||||
available_registers = append(available_registers, i)
|
||||
//fmt.Printf("available")
|
||||
}
|
||||
}
|
||||
|
||||
if len(available_registers) == 2 && sins.Name == "IADD_RS" {
|
||||
if len(available_registers) == 2 && sins.Opcode == S_IADD_RS {
|
||||
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
|
||||
sins.Src_Reg = RegisterNeedsDisplacement
|
||||
sins.OpGroupPar = sins.Src_Reg
|
||||
|
@ -886,23 +757,17 @@ func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) b
|
|||
|
||||
if len(available_registers) > 1 {
|
||||
tmp := gen.GetUint32()
|
||||
// fmt.Printf("GetUint32 %d len %d \n", tmp,uint32(len(available_registers)))
|
||||
|
||||
index = int(tmp % uint32(len(available_registers)))
|
||||
} else {
|
||||
index = 0
|
||||
}
|
||||
//fmt.Printf("reg index %d\n", index)
|
||||
*reg = available_registers[index] // availableRegisters[index];
|
||||
*reg = available_registers[index]
|
||||
return true
|
||||
}
|
||||
|
||||
const Mask = CacheSize/CacheLineSize - 1
|
||||
|
||||
func getMixBlock(register_value uint64, memory []byte) uint64 {
|
||||
return (register_value * Mask) * CacheLineSize
|
||||
}
|
||||
|
||||
// executeSuperscalar execute the superscalar program
|
||||
func executeSuperscalar(p []SuperScalarInstruction, r *RegisterLine) {
|
||||
|
||||
|
|
5
vm.go
5
vm.go
|
@ -91,8 +91,6 @@ const HIGH = 1
|
|||
// calculate hash based on input
|
||||
func (vm *VM) Run(input_hash [64]byte) {
|
||||
|
||||
//fmt.Printf("%x \n", input_hash)
|
||||
|
||||
aes.FillAes4Rx4(input_hash, vm.buffer[:])
|
||||
|
||||
for i := range vm.entropy {
|
||||
|
@ -122,8 +120,6 @@ func (vm *VM) Run(input_hash [64]byte) {
|
|||
vm.config.eMask[LOW] = getFloatMask(vm.entropy[14])
|
||||
vm.config.eMask[HIGH] = getFloatMask(vm.entropy[15])
|
||||
|
||||
//fmt.Printf("prog %x entropy 0 %x %f \n", vm.buffer[:32], vm.entropy[0], vm.reg.a[0][HIGH])
|
||||
|
||||
vm.Compile_TO_Bytecode()
|
||||
|
||||
spAddr0 := vm.mem.mx
|
||||
|
@ -240,7 +236,6 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
|||
}
|
||||
|
||||
hash512.Sum(tempHash[:0])
|
||||
//fmt.Printf("%d temphash %x\n", chain, tempHash)
|
||||
}
|
||||
|
||||
// final loop executes here
|
||||
|
|
|
@ -149,9 +149,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
ibc.src = src
|
||||
switch opcode {
|
||||
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
|
||||
|
||||
// ibc.Opcode = VM_NOP; break; replace opcode by nop for testing
|
||||
// fmt.Printf("VM_IADD_RS %d\n", opcode)
|
||||
ibc.Opcode = VM_IADD_RS
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
if dst != RegisterNeedsDisplacement {
|
||||
|
@ -166,7 +163,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
registerUsage[dst] = i
|
||||
|
||||
case 16, 17, 18, 19, 20, 21, 22: // 7
|
||||
//fmt.Printf("IADD_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IADD_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
@ -183,7 +179,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
|
||||
//fmt.Printf("ISUB_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISUB_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
|
@ -196,7 +191,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 39, 40, 41, 42, 43, 44, 45: // 7
|
||||
//fmt.Printf("ISUB_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISUB_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
@ -213,8 +207,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
|
||||
|
||||
//fmt.Printf("IMUL_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMUL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
|
@ -227,8 +219,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 62, 63, 64, 65: //4
|
||||
|
||||
//fmt.Printf("IMUL_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMUL_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
@ -245,14 +235,11 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 66, 67, 68, 69: //4
|
||||
|
||||
//fmt.Printf("IMULH_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMULH_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
case 70: //1
|
||||
//fmt.Printf("IMULH_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMULH_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
@ -269,14 +256,11 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 71, 72, 73, 74: //4
|
||||
//fmt.Printf("ISMULH_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISMULH_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
case 75: //1
|
||||
//fmt.Printf("ISMULH_M opcode %d\n", opcode)
|
||||
|
||||
ibc.Opcode = VM_ISMULH_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
@ -293,8 +277,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
|
||||
|
||||
//fmt.Printf("IMUL_RCP opcode %d\n", opcode)
|
||||
divisor := instr.IMM()
|
||||
if !isZeroOrPowerOf2(divisor) {
|
||||
ibc.Opcode = VM_IMUL_R
|
||||
|
@ -307,14 +289,10 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
|
||||
case 84, 85: //2
|
||||
//fmt.Printf("INEG_R opcode %d\n", opcode)
|
||||
|
||||
ibc.Opcode = VM_INEG_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
registerUsage[dst] = i
|
||||
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
|
||||
|
||||
//fmt.Printf("IXOR_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IXOR_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
|
@ -327,7 +305,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 101, 102, 103, 104, 105: //5
|
||||
//fmt.Printf("IXOR_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IXOR_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
@ -344,8 +321,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 106, 107, 108, 109, 110, 111, 112, 113: //8
|
||||
|
||||
//fmt.Printf("IROR_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IROR_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
|
@ -358,8 +333,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
registerUsage[dst] = i
|
||||
case 114, 115: // 2 IROL_R
|
||||
|
||||
//fmt.Printf("IROL_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IROL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
|
@ -373,8 +346,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
registerUsage[dst] = i
|
||||
|
||||
case 116, 117, 118, 119: //4
|
||||
|
||||
//fmt.Printf("ISWAP_R opcode %d\n", opcode)
|
||||
if src != dst {
|
||||
ibc.Opcode = VM_ISWAP_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
@ -388,8 +359,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
|
||||
// below are floating point instructions
|
||||
case 120, 121, 122, 123: // 4
|
||||
|
||||
//fmt.Printf("FSWAP_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_FSWAP_R
|
||||
if dst < REGISTERCOUNTFLT {
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
|
@ -397,8 +366,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
ibc.fdst = &vm.reg.e[dst-REGISTERCOUNTFLT]
|
||||
}
|
||||
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
|
||||
|
||||
//fmt.Printf("FADD_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
src := instr.Src() % REGISTERCOUNTFLT
|
||||
ibc.Opcode = VM_FADD_R
|
||||
|
@ -406,8 +373,6 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
ibc.fsrc = &vm.reg.a[src]
|
||||
|
||||
case 140, 141, 142, 143, 144: //5
|
||||
|
||||
//fmt.Printf("FADD_M opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FADD_M
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
|
@ -420,16 +385,12 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
||||
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
|
||||
|
||||
//fmt.Printf("FSUB_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
src := instr.Src() % REGISTERCOUNTFLT
|
||||
ibc.Opcode = VM_FSUB_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
case 161, 162, 163, 164, 165: //5
|
||||
|
||||
//fmt.Printf("FSUB_M opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FSUB_M
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
|
@ -442,22 +403,16 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
||||
case 166, 167, 168, 169, 170, 171: //6
|
||||
|
||||
//fmt.Printf("FSCAL_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FSCAL_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
|
||||
|
||||
//fmt.Printf("FMUL_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
src := instr.Src() % REGISTERCOUNTFLT
|
||||
ibc.Opcode = VM_FMUL_R
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
case 204, 205, 206, 207: //4
|
||||
|
||||
//fmt.Printf("FDIV_M opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FDIV_M
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
|
@ -469,14 +424,11 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
case 208, 209, 210, 211, 212, 213: //6
|
||||
//fmt.Printf("FSQRT_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FSQRT_R
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
|
||||
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
|
||||
|
||||
//fmt.Printf("CBRANCH opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_CBRANCH
|
||||
reg := instr.Dst() % REGISTERSCOUNT
|
||||
ibc.isrc = &vm.reg.r[reg]
|
||||
|
@ -494,15 +446,11 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
}
|
||||
|
||||
case 239: //1
|
||||
// ibc.Opcode = VM_NOP; break; // not supported
|
||||
//fmt.Printf("CFROUND opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_CFROUND
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.imm = uint64(instr.IMM() & 63)
|
||||
|
||||
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
|
||||
// ibc.Opcode = VM_NOP; break;
|
||||
//fmt.Printf("ISTORE opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISTORE
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
|
|
Loading…
Reference in a new issue