General cleanup, improve load32 and dataset execution
This commit is contained in:
parent
244cff31f9
commit
1bb1da8bbc
|
@ -30,7 +30,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package aes
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/keys"
|
||||
"unsafe"
|
||||
)
|
||||
|
@ -48,20 +47,20 @@ import (
|
|||
//
|
||||
// Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
func HashAes1Rx4(input []byte, output *[64]byte) {
|
||||
if len(input)%64 != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
// states are copied
|
||||
states := keys.AesHash1R_State
|
||||
|
||||
var in [4][4]uint32
|
||||
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
|
||||
for i := 0; i < 63; i += 4 { // load 64 bytes
|
||||
in[i/16][(i%16)/4] = binary.LittleEndian.Uint32(input[input_ptr+i:])
|
||||
}
|
||||
in := (*[4][4]uint32)(unsafe.Pointer(unsafe.SliceData(input[input_ptr:])))
|
||||
|
||||
soft_aesenc(&states[0], &in[0])
|
||||
soft_aesdec(&states[1], &in[1])
|
||||
soft_aesenc(&states[2], &in[2])
|
||||
soft_aesdec(&states[3], &in[3])
|
||||
|
||||
}
|
||||
|
||||
soft_aesenc(&states[0], &keys.AesHash1R_XKeys[0])
|
||||
|
@ -74,11 +73,7 @@ func HashAes1Rx4(input []byte, output *[64]byte) {
|
|||
soft_aesenc(&states[2], &keys.AesHash1R_XKeys[1])
|
||||
soft_aesdec(&states[3], &keys.AesHash1R_XKeys[1])
|
||||
|
||||
// write back to state
|
||||
for i := 0; i < 63; i += 4 {
|
||||
binary.LittleEndian.PutUint32(output[i:], states[i/16][(i%16)/4])
|
||||
}
|
||||
|
||||
copy(output[:], (*[64]byte)(unsafe.Pointer(&states))[:])
|
||||
}
|
||||
|
||||
// FillAes1Rx4
|
||||
|
@ -110,15 +105,17 @@ func FillAes1Rx4(state *[64]byte, output []byte) {
|
|||
}
|
||||
|
||||
// FillAes4Rx4 used to generate final program
|
||||
func FillAes4Rx4(state *[64]byte, output []byte) {
|
||||
|
||||
var states [4][4]uint32
|
||||
for i := 0; i < 63; i += 4 {
|
||||
states[i/16][(i%16)/4] = binary.LittleEndian.Uint32(state[i:])
|
||||
func FillAes4Rx4(state [64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
||||
outptr := 0
|
||||
for ; outptr < len(output); outptr += 64 {
|
||||
// state is copied on caller
|
||||
|
||||
// Copy state
|
||||
states := (*[4][4]uint32)(unsafe.Pointer(&state))
|
||||
|
||||
for outptr := 0; outptr < len(output); outptr += len(state) {
|
||||
soft_aesdec(&states[0], &keys.AesGenerator4R_Keys[0])
|
||||
soft_aesenc(&states[1], &keys.AesGenerator4R_Keys[0])
|
||||
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[4])
|
||||
|
@ -139,11 +136,7 @@ func FillAes4Rx4(state *[64]byte, output []byte) {
|
|||
soft_aesdec(&states[2], &keys.AesGenerator4R_Keys[7])
|
||||
soft_aesenc(&states[3], &keys.AesGenerator4R_Keys[7])
|
||||
|
||||
// store bytes to output buffer
|
||||
for i := 0; i < 63; i += 4 {
|
||||
binary.LittleEndian.PutUint32(output[outptr+i:], states[i/16][(i%16)/4])
|
||||
}
|
||||
|
||||
copy(output[outptr:], state[:])
|
||||
}
|
||||
|
||||
}
|
51
cache.go
51
cache.go
|
@ -34,6 +34,10 @@ func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
|
|||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) HasJIT() bool {
|
||||
return cache.Flags&RANDOMX_FLAG_JIT > 0 && cache.JitPrograms[0] != nil
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) VM_Initialize() *VM {
|
||||
|
||||
return &VM{
|
||||
|
@ -102,34 +106,45 @@ func (cache *Randomx_Cache) InitDatasetItem(rl *RegisterLine, itemNumber uint64)
|
|||
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
|
||||
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
|
||||
|
||||
if cache.JitPrograms[0] != nil {
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
cache.JitPrograms[i].Execute(rl)
|
||||
program := cache.Programs[i]
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
registerValue = rl[cache.Programs[i].AddressRegister()]
|
||||
executeSuperscalar(program.Program(), rl)
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
program := cache.Programs[i]
|
||||
registerValue = rl[program.AddressRegister()]
|
||||
|
||||
executeSuperscalar(program.Program(), rl)
|
||||
}
|
||||
}
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
func (cache *Randomx_Cache) InitDatasetItemJIT(rl *RegisterLine, itemNumber uint64) {
|
||||
registerValue := itemNumber
|
||||
|
||||
registerValue = rl[program.AddressRegister()]
|
||||
rl[0] = (itemNumber + 1) * keys.SuperScalar_Constants[0]
|
||||
rl[1] = rl[0] ^ keys.SuperScalar_Constants[1]
|
||||
rl[2] = rl[0] ^ keys.SuperScalar_Constants[2]
|
||||
rl[3] = rl[0] ^ keys.SuperScalar_Constants[3]
|
||||
rl[4] = rl[0] ^ keys.SuperScalar_Constants[4]
|
||||
rl[5] = rl[0] ^ keys.SuperScalar_Constants[5]
|
||||
rl[6] = rl[0] ^ keys.SuperScalar_Constants[6]
|
||||
rl[7] = rl[0] ^ keys.SuperScalar_Constants[7]
|
||||
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
mix := cache.GetMixBlock(registerValue)
|
||||
|
||||
cache.JitPrograms[i].Execute(rl)
|
||||
|
||||
for q := range rl {
|
||||
rl[q] ^= mix[q]
|
||||
}
|
||||
|
||||
registerValue = rl[cache.Programs[i].AddressRegister()]
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@ const ScratchpadL1Mask16 = (ScratchpadL1/2 - 1) * 16
|
|||
const ScratchpadL2Mask16 = (ScratchpadL2/2 - 1) * 16
|
||||
const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
|
||||
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
|
||||
|
||||
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
|
||||
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
|
||||
const STOREL3CONDITION = 14
|
||||
|
|
|
@ -10,7 +10,11 @@ func (d *Randomx_DatasetLight) PrefetchDataset(address uint64) {
|
|||
}
|
||||
|
||||
func (d *Randomx_DatasetLight) ReadDataset(address uint64, r, cache *RegisterLine) {
|
||||
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
|
||||
if d.Cache.HasJIT() {
|
||||
d.Cache.InitDatasetItemJIT(cache, address/CacheLineSize)
|
||||
} else {
|
||||
d.Cache.InitDatasetItem(cache, address/CacheLineSize)
|
||||
}
|
||||
|
||||
for i := range r {
|
||||
r[i] ^= cache[i]
|
||||
|
|
|
@ -8,6 +8,9 @@ import (
|
|||
)
|
||||
|
||||
func (f ProgramFunc) Execute(rl *RegisterLine) {
|
||||
if f == nil {
|
||||
panic("program is nil")
|
||||
}
|
||||
memoryPtr := &f
|
||||
fun := *(*func(rl *RegisterLine))(unsafe.Pointer(&memoryPtr))
|
||||
|
||||
|
|
45
vm.go
45
vm.go
|
@ -44,10 +44,10 @@ type REG struct {
|
|||
}
|
||||
|
||||
type VM struct {
|
||||
State_start [64]byte
|
||||
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
|
||||
Prog []byte
|
||||
ScratchPad [ScratchpadSize]byte
|
||||
StateStart [64]byte
|
||||
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
|
||||
Prog []byte
|
||||
ScratchPad [ScratchpadSize]byte
|
||||
|
||||
ByteCode [RANDOMX_PROGRAM_SIZE]InstructionByteCode
|
||||
|
||||
|
@ -71,8 +71,8 @@ func MaskRegisterExponentMantissa(f float64, mode uint64) float64 {
|
|||
}
|
||||
|
||||
type Config struct {
|
||||
eMask [2]uint64
|
||||
readReg0, readReg1, readReg2, readReg3 uint64
|
||||
eMask [2]uint64
|
||||
readReg [4]uint64
|
||||
}
|
||||
|
||||
type REGISTER_FILE struct {
|
||||
|
@ -82,15 +82,14 @@ type REGISTER_FILE struct {
|
|||
a [4][2]float64
|
||||
}
|
||||
type MemoryRegisters struct {
|
||||
mx, ma uint64 //addr_t mx, ma;
|
||||
mempry uint64 // uint8_t* memory = nullptr;
|
||||
mx, ma uint64
|
||||
}
|
||||
|
||||
const LOW = 0
|
||||
const HIGH = 1
|
||||
|
||||
// calculate hash based on input
|
||||
func (vm *VM) Run(input_hash *[64]byte) {
|
||||
func (vm *VM) Run(input_hash [64]byte) {
|
||||
|
||||
//fmt.Printf("%x \n", input_hash)
|
||||
|
||||
|
@ -112,14 +111,13 @@ func (vm *VM) Run(input_hash *[64]byte) {
|
|||
|
||||
vm.mem.ma = vm.entropy[8] & CacheLineAlignMask
|
||||
vm.mem.mx = vm.entropy[10]
|
||||
|
||||
addressRegisters := vm.entropy[12]
|
||||
vm.config.readReg0 = 0 + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
vm.config.readReg1 = 2 + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
vm.config.readReg2 = 4 + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
vm.config.readReg3 = 6 + (addressRegisters & 1)
|
||||
for i := range vm.config.readReg {
|
||||
vm.config.readReg[i] = uint64(i*2) + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
}
|
||||
|
||||
vm.datasetOffset = (vm.entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
vm.config.eMask[LOW] = getFloatMask(vm.entropy[14])
|
||||
vm.config.eMask[HIGH] = getFloatMask(vm.entropy[15])
|
||||
|
@ -134,7 +132,7 @@ func (vm *VM) Run(input_hash *[64]byte) {
|
|||
var rlCache RegisterLine
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := vm.reg.r[vm.config.readReg0] ^ vm.reg.r[vm.config.readReg1]
|
||||
spMix := vm.reg.r[vm.config.readReg[0]] ^ vm.reg.r[vm.config.readReg[1]]
|
||||
|
||||
spAddr0 ^= spMix
|
||||
spAddr0 &= ScratchpadL3Mask64
|
||||
|
@ -146,21 +144,20 @@ func (vm *VM) Run(input_hash *[64]byte) {
|
|||
}
|
||||
|
||||
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
|
||||
vm.reg.f[i][LOW] = vm.Load32F(spAddr1 + 8*i)
|
||||
vm.reg.f[i][HIGH] = vm.Load32F(spAddr1 + 8*i + 4)
|
||||
vm.reg.f[i] = vm.Load32FA(spAddr1 + 8*i)
|
||||
}
|
||||
|
||||
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
|
||||
vm.reg.e[i][LOW] = vm.Load32F(spAddr1 + 8*(i+REGISTERCOUNTFLT))
|
||||
vm.reg.e[i][HIGH] = vm.Load32F(spAddr1 + 8*(i+REGISTERCOUNTFLT) + 4)
|
||||
vm.reg.e[i] = vm.Load32FA(spAddr1 + 8*(i+REGISTERCOUNTFLT))
|
||||
|
||||
vm.reg.e[i][LOW] = MaskRegisterExponentMantissa(vm.reg.e[i][LOW], vm.config.eMask[LOW])
|
||||
vm.reg.e[i][HIGH] = MaskRegisterExponentMantissa(vm.reg.e[i][HIGH], vm.config.eMask[HIGH])
|
||||
}
|
||||
|
||||
// todo: pass register file directly!
|
||||
vm.InterpretByteCode()
|
||||
|
||||
vm.mem.mx ^= vm.reg.r[vm.config.readReg2] ^ vm.reg.r[vm.config.readReg3]
|
||||
vm.mem.mx ^= vm.reg.r[vm.config.readReg[2]] ^ vm.reg.r[vm.config.readReg[3]]
|
||||
vm.mem.mx &= CacheLineAlignMask
|
||||
|
||||
vm.Dataset.PrefetchDataset(vm.datasetOffset + vm.mem.mx)
|
||||
|
@ -214,7 +211,7 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
|||
hash512, _ := blake2b.New512(nil)
|
||||
|
||||
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
|
||||
vm.Run(&tempHash)
|
||||
vm.Run(tempHash)
|
||||
|
||||
hash512.Reset()
|
||||
for i := range vm.reg.r {
|
||||
|
@ -247,7 +244,7 @@ func (vm *VM) CalculateHash(input []byte, output *[32]byte) {
|
|||
}
|
||||
|
||||
// final loop executes here
|
||||
vm.Run(&tempHash)
|
||||
vm.Run(tempHash)
|
||||
|
||||
// now hash the scratch pad and place into register a
|
||||
aes.HashAes1Rx4(vm.ScratchPad[:], &tempHash)
|
||||
|
|
|
@ -30,11 +30,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
package randomx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"git.gammaspectra.live/P2Pool/go-randomx/v2/asm"
|
||||
"math"
|
||||
"math/bits"
|
||||
"unsafe"
|
||||
)
|
||||
import "math"
|
||||
import "math/bits"
|
||||
import "encoding/binary"
|
||||
|
||||
//reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding
|
||||
|
@ -156,11 +156,11 @@ func (vm *VM) Compile_TO_Bytecode() {
|
|||
ibc.idst = &vm.reg.r[dst]
|
||||
if dst != RegisterNeedsDisplacement {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.shift = uint16((instr.Mod() >> 2) % 4)
|
||||
ibc.shift = (instr.Mod() >> 2) % 4
|
||||
ibc.imm = 0
|
||||
} else {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.shift = uint16((instr.Mod() >> 2) % 4)
|
||||
ibc.shift = (instr.Mod() >> 2) % 4
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
@ -534,7 +534,7 @@ type InstructionByteCode struct {
|
|||
simm int64
|
||||
Opcode VM_Instruction_Type
|
||||
target int16
|
||||
shift uint16
|
||||
shift uint8
|
||||
memMask uint32
|
||||
/*
|
||||
union {
|
||||
|
@ -563,179 +563,105 @@ func (ibc *InstructionByteCode) getScratchpadAddress() uint64 {
|
|||
return (*ibc.isrc + ibc.imm) & uint64(ibc.memMask)
|
||||
}
|
||||
|
||||
func (vm *VM) Load64(addr uint64) uint64 {
|
||||
return binary.LittleEndian.Uint64(vm.ScratchPad[addr:])
|
||||
}
|
||||
func (vm *VM) Load32(addr uint64) uint32 {
|
||||
return binary.LittleEndian.Uint32(vm.ScratchPad[addr:])
|
||||
func (ibc *InstructionByteCode) getScratchpadDestAddress() uint64 {
|
||||
return (*ibc.idst + ibc.imm) & uint64(ibc.memMask)
|
||||
}
|
||||
|
||||
func (vm *VM) Load32F(addr uint64) float64 {
|
||||
return float64(int32(vm.Load32(addr)))
|
||||
func (vm *VM) Load64(addr uint64) uint64 {
|
||||
return *(*uint64)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
}
|
||||
func (vm *VM) Load32(addr uint64) uint32 {
|
||||
return *(*uint32)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
}
|
||||
|
||||
func (vm *VM) Load32F(addr uint64) (lo, hi float64) {
|
||||
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
return float64(a[LOW]), float64(a[HIGH])
|
||||
}
|
||||
|
||||
func (vm *VM) Load32FA(addr uint64) [2]float64 {
|
||||
a := *(*[2]int32)(unsafe.Pointer(&vm.ScratchPad[addr]))
|
||||
return [2]float64{float64(a[LOW]), float64(a[HIGH])}
|
||||
}
|
||||
|
||||
func (vm *VM) InterpretByteCode() {
|
||||
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
|
||||
ibc := &vm.ByteCode[pc]
|
||||
//fmt.Printf("PCLOOP %d opcode %d %s dst %d src %d\n",pc,ibc.Opcode, Names[ibc.Opcode], ibc.dst, ibc.src)
|
||||
|
||||
switch ibc.Opcode {
|
||||
case VM_IADD_RS:
|
||||
|
||||
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
|
||||
|
||||
//panic("VM_IADD_RS")
|
||||
case VM_IADD_M:
|
||||
*ibc.idst += vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_IADD_M")
|
||||
case VM_ISUB_R:
|
||||
*ibc.idst -= *ibc.isrc
|
||||
|
||||
//panic("VM_ISUB_R")
|
||||
|
||||
case VM_ISUB_M:
|
||||
|
||||
*ibc.idst -= vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_ISUB_M")
|
||||
case VM_IMUL_R: // also handles imul_rcp
|
||||
|
||||
case VM_IMUL_R:
|
||||
// also handles imul_rcp
|
||||
*ibc.idst *= *ibc.isrc
|
||||
|
||||
//panic("VM_IMUL_R")
|
||||
case VM_IMUL_M:
|
||||
*ibc.idst *= vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_IMUL_M")
|
||||
case VM_IMULH_R:
|
||||
|
||||
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
|
||||
|
||||
// panic("VM_IMULH_R")
|
||||
case VM_IMULH_M:
|
||||
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadAddress()))
|
||||
// fmt.Printf("%x \n",*ibc.idst )
|
||||
// panic("VM_IMULH_M")
|
||||
case VM_ISMULH_R:
|
||||
*ibc.idst = uint64(smulh(int64(*ibc.idst), int64(*ibc.isrc)))
|
||||
// fmt.Printf("dst %x\n", *ibc.idst)
|
||||
// panic("VM_ISMULH_R")
|
||||
*ibc.idst = smulh(int64(*ibc.idst), int64(*ibc.isrc))
|
||||
case VM_ISMULH_M:
|
||||
*ibc.idst = uint64(smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadAddress()))))
|
||||
//fmt.Printf("%x \n",*ibc.idst )
|
||||
// panic("VM_ISMULH_M")
|
||||
*ibc.idst = smulh(int64(*ibc.idst), int64(vm.Load64(ibc.getScratchpadAddress())))
|
||||
case VM_INEG_R:
|
||||
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
|
||||
|
||||
//panic("VM_INEG_R")
|
||||
case VM_IXOR_R:
|
||||
*ibc.idst ^= *ibc.isrc
|
||||
|
||||
case VM_IXOR_M:
|
||||
*ibc.idst ^= vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_IXOR_M")
|
||||
case VM_IROR_R:
|
||||
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
|
||||
|
||||
//panic("VM_IROR_R")
|
||||
|
||||
case VM_IROL_R:
|
||||
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
|
||||
|
||||
case VM_ISWAP_R:
|
||||
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
|
||||
//fmt.Printf("%x %x\n",*ibc.idst, *ibc.isrc )
|
||||
//panic("VM_ISWAP_R")
|
||||
case VM_FSWAP_R:
|
||||
//TODO: could be F+E
|
||||
|
||||
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
|
||||
// fmt.Printf("%+v \n",ibc.fdst )
|
||||
// panic("VM_FSWAP_R")
|
||||
case VM_FADD_R:
|
||||
ibc.fdst[LOW] += ibc.fsrc[LOW]
|
||||
ibc.fdst[HIGH] += ibc.fsrc[HIGH]
|
||||
|
||||
//panic("VM_FADD_R")
|
||||
case VM_FADD_M:
|
||||
ibc.fdst[LOW] += vm.Load32F(ibc.getScratchpadAddress() + 0)
|
||||
ibc.fdst[HIGH] += vm.Load32F(ibc.getScratchpadAddress() + 4)
|
||||
|
||||
//panic("VM_FADD_M")
|
||||
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
|
||||
ibc.fdst[LOW] += lo
|
||||
ibc.fdst[HIGH] += hi
|
||||
case VM_FSUB_R:
|
||||
ibc.fdst[LOW] -= ibc.fsrc[LOW]
|
||||
ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
|
||||
|
||||
//fmt.Printf("fdst float %+v\n", ibc.fdst )
|
||||
//panic("VM_FSUB_R")
|
||||
case VM_FSUB_M:
|
||||
ibc.fdst[LOW] -= vm.Load32F(ibc.getScratchpadAddress() + 0)
|
||||
ibc.fdst[HIGH] -= vm.Load32F(ibc.getScratchpadAddress() + 4)
|
||||
|
||||
//panic("VM_FSUB_M")
|
||||
case VM_FSCAL_R: // no dependent on rounding modes
|
||||
//mask := math.Float64frombits(0x80F0000000000000)
|
||||
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
|
||||
ibc.fdst[LOW] -= lo
|
||||
ibc.fdst[HIGH] -= hi
|
||||
case VM_FSCAL_R:
|
||||
// no dependent on rounding modes
|
||||
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
|
||||
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
|
||||
|
||||
//fmt.Printf("fdst float %+v\n", ibc.fdst )
|
||||
//panic("VM_FSCA_M")
|
||||
case VM_FMUL_R:
|
||||
ibc.fdst[LOW] *= ibc.fsrc[LOW]
|
||||
ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
|
||||
|
||||
//panic("VM_FMUL_R")
|
||||
case VM_FDIV_M:
|
||||
ibc.fdst[LOW] /= MaskRegisterExponentMantissa(vm.Load32F(ibc.getScratchpadAddress()+0), vm.config.eMask[LOW])
|
||||
ibc.fdst[HIGH] /= MaskRegisterExponentMantissa(vm.Load32F(ibc.getScratchpadAddress()+4), vm.config.eMask[HIGH])
|
||||
|
||||
//panic("VM_FDIV_M")
|
||||
lo, hi := vm.Load32F(ibc.getScratchpadAddress())
|
||||
ibc.fdst[LOW] /= MaskRegisterExponentMantissa(lo, vm.config.eMask[LOW])
|
||||
ibc.fdst[HIGH] /= MaskRegisterExponentMantissa(hi, vm.config.eMask[HIGH])
|
||||
case VM_FSQRT_R:
|
||||
ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
|
||||
ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
|
||||
|
||||
// panic("VM_FSQRT")
|
||||
case VM_CBRANCH:
|
||||
//fmt.Printf("pc %d src %x imm %x\n",pc ,*ibc.isrc, ibc.imm)
|
||||
*ibc.isrc += ibc.imm
|
||||
//fmt.Printf("pc %d\n",pc)
|
||||
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
|
||||
pc = int(ibc.target)
|
||||
|
||||
}
|
||||
|
||||
// fmt.Printf("pc %d\n",pc)
|
||||
//panic("VM_CBRANCH")
|
||||
case VM_CFROUND:
|
||||
|
||||
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
|
||||
asm.SetRoundingMode(asm.RoundingMode(tmp))
|
||||
|
||||
//panic("round not implemented")
|
||||
//panic("VM_CFROUND")
|
||||
case VM_ISTORE:
|
||||
binary.LittleEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], *ibc.isrc)
|
||||
|
||||
//panic("VM_ISTOREM")
|
||||
|
||||
case VM_NOP: // we do nothing
|
||||
|
||||
default:
|
||||
panic("instruction not implemented")
|
||||
|
||||
}
|
||||
/*fmt.Printf("REGS ")
|
||||
for j := 0; j <7;j++ {
|
||||
fmt.Printf("%16x, " , vm.reg.r[j])
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
*/
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
var umm888_ = fmt.Sprintf("")
|
||||
|
|
Loading…
Reference in a new issue