Compare commits
2 commits
acfff4a4ad
...
9aa3631f37
Author | SHA1 | Date | |
---|---|---|---|
DataHoarder | 9aa3631f37 | ||
DataHoarder | 9826b7beb4 |
7
alignment.go
Normal file
7
alignment.go
Normal file
|
@ -0,0 +1,7 @@
|
|||
package randomx
|
||||
|
||||
func assertAlignedTo16(ptr uintptr) {
|
||||
if ptr&0b1111 != 0 {
|
||||
panic("not aligned to 16")
|
||||
}
|
||||
}
|
|
@ -3,6 +3,7 @@ package randomx
|
|||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE
|
||||
|
@ -31,6 +32,7 @@ func NewDataset(flags Flags) (result *Dataset, err error) {
|
|||
|
||||
//todo: implement large pages, align allocation
|
||||
alignedMemory := make([]RegisterLine, DatasetItemCount)
|
||||
assertAlignedTo16(uintptr(unsafe.Pointer(unsafe.SliceData(alignedMemory))))
|
||||
|
||||
//todo: err on not large pages
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ func NewHardAES() AES {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (h hardAES) HashAes1Rx4(input []byte, output *[64]byte) {
|
||||
func (aes hardAES) HashAes1Rx4(input []byte, output *[64]byte) {
|
||||
if len(input)%len(output) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ func (h hardAES) HashAes1Rx4(input []byte, output *[64]byte) {
|
|||
asm.HashAes1Rx4(&keys.AesHash1R_State, &keys.AesHash1R_XKeys, output, unsafe.SliceData(input), uint64(len(input)))
|
||||
}
|
||||
|
||||
func (h hardAES) FillAes1Rx4(state *[64]byte, output []byte) {
|
||||
func (aes hardAES) FillAes1Rx4(state *[64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ func (h hardAES) FillAes1Rx4(state *[64]byte, output []byte) {
|
|||
runtime.KeepAlive(state)
|
||||
}
|
||||
|
||||
func (h hardAES) FillAes4Rx4(state [64]byte, output []byte) {
|
||||
func (aes hardAES) FillAes4Rx4(state [64]byte, output []byte) {
|
||||
if len(output)%len(state) != 0 {
|
||||
panic("unsupported")
|
||||
}
|
||||
|
@ -61,3 +61,9 @@ func (h hardAES) FillAes4Rx4(state [64]byte, output []byte) {
|
|||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
func (aes hardAES) HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte) {
|
||||
//TODO
|
||||
aes.HashAes1Rx4(scratchpad, output)
|
||||
aes.FillAes1Rx4(fillState, scratchpad)
|
||||
}
|
||||
|
|
|
@ -28,6 +28,9 @@ type AES interface {
|
|||
// calls to this function.
|
||||
FillAes1Rx4(state *[64]byte, output []byte)
|
||||
|
||||
// HashAndFillAes1Rx4 Hashes and fills scratchpad and output in one sweep
|
||||
HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte)
|
||||
|
||||
// FillAes4Rx4 used to generate final program
|
||||
//
|
||||
// 'state' is copied when calling
|
||||
|
|
|
@ -67,3 +67,9 @@ func (aes softAES) FillAes4Rx4(state [64]byte, output []byte) {
|
|||
copy(output[outptr:], state[:])
|
||||
}
|
||||
}
|
||||
|
||||
func (aes softAES) HashAndFillAes1Rx4(scratchpad []byte, output *[64]byte, fillState *[64]byte) {
|
||||
//TODO
|
||||
aes.HashAes1Rx4(scratchpad, output)
|
||||
aes.FillAes1Rx4(fillState, scratchpad)
|
||||
}
|
||||
|
|
|
@ -143,6 +143,60 @@ func Test_RandomXLight(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func Test_RandomXBatch(t *testing.T) {
|
||||
t.Parallel()
|
||||
for _, n := range []string{"softaes", "hardaes"} {
|
||||
t.Run(n, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
tFlags, skip := testFlags(t.Name(), 0)
|
||||
if skip {
|
||||
t.Skip("not supported on this platform")
|
||||
}
|
||||
|
||||
c := NewCache(tFlags)
|
||||
if c == nil {
|
||||
t.Fatal("nil cache")
|
||||
}
|
||||
defer func() {
|
||||
err := c.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
tests := Tests[1:4]
|
||||
|
||||
c.Init(tests[0].key)
|
||||
vm, err := NewVM(tFlags, c, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
err := vm.Close()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}()
|
||||
|
||||
var outputHash [3][RANDOMX_HASH_SIZE]byte
|
||||
|
||||
vm.CalculateHashFirst(tests[0].input)
|
||||
vm.CalculateHashNext(tests[1].input, &outputHash[0])
|
||||
vm.CalculateHashNext(tests[2].input, &outputHash[1])
|
||||
vm.CalculateHashLast(&outputHash[2])
|
||||
|
||||
for i, test := range tests {
|
||||
outputHex := hex.EncodeToString(outputHash[i][:])
|
||||
|
||||
if outputHex != test.expected {
|
||||
t.Errorf("key=%v, input=%v", test.key, test.input)
|
||||
t.Errorf("expected=%s, actual=%s", test.expected, outputHex)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_RandomXFull(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("Skipping full mode with -short")
|
||||
|
|
|
@ -23,9 +23,11 @@ TEXT ·superscalar_run(SB),$0-16
|
|||
CALL AX
|
||||
|
||||
|
||||
// todo: not supported by golang
|
||||
// prefetchw BYTE PTR [rsi]
|
||||
// PREFETCHW 0(SI)
|
||||
PREFETCHT0 0(SI)
|
||||
BYTE $0x0F
|
||||
BYTE $0x0D
|
||||
BYTE $0x0E
|
||||
|
||||
// move registers back to register line
|
||||
MOVQ R8, 0(SI)
|
||||
|
|
67
vm.go
67
vm.go
|
@ -39,7 +39,7 @@ import (
|
|||
import "golang.org/x/crypto/blake2b"
|
||||
|
||||
type VM struct {
|
||||
pad ScratchPad
|
||||
pad *ScratchPad
|
||||
|
||||
flags Flags
|
||||
|
||||
|
@ -48,7 +48,7 @@ type VM struct {
|
|||
|
||||
hashState [blake2b.Size]byte
|
||||
|
||||
registerFile RegisterFile
|
||||
registerFile *RegisterFile
|
||||
|
||||
AES aes.AES
|
||||
|
||||
|
@ -92,11 +92,16 @@ func NewVM(flags Flags, cache *Cache, dataset *Dataset) (*VM, error) {
|
|||
}
|
||||
|
||||
vm := &VM{
|
||||
Cache: cache,
|
||||
Dataset: dataset,
|
||||
flags: flags,
|
||||
Cache: cache,
|
||||
Dataset: dataset,
|
||||
flags: flags,
|
||||
pad: new(ScratchPad),
|
||||
registerFile: new(RegisterFile),
|
||||
}
|
||||
|
||||
assertAlignedTo16(uintptr(unsafe.Pointer(vm.pad)))
|
||||
assertAlignedTo16(uintptr(unsafe.Pointer(vm.registerFile)))
|
||||
|
||||
if flags.Has(RANDOMX_FLAG_HARD_AES) {
|
||||
vm.AES = aes.NewHardAES()
|
||||
}
|
||||
|
@ -128,7 +133,7 @@ func (vm *VM) run() {
|
|||
|
||||
// do more initialization before we run
|
||||
|
||||
reg := &vm.registerFile
|
||||
reg := vm.registerFile
|
||||
reg.Clear()
|
||||
|
||||
// initialize constant registers
|
||||
|
@ -178,7 +183,7 @@ func (vm *VM) run() {
|
|||
jitProgram = vm.program.generateCode(vm.jitProgram, &readReg)
|
||||
}
|
||||
|
||||
vm.jitProgram.ExecuteFull(reg, &vm.pad, &vm.Dataset.Memory()[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
|
||||
vm.jitProgram.ExecuteFull(reg, vm.pad, &vm.Dataset.Memory()[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
@ -215,9 +220,9 @@ func (vm *VM) run() {
|
|||
// run the actual bytecode
|
||||
if jitProgram != nil {
|
||||
// light mode
|
||||
jitProgram.Execute(reg, &vm.pad, eMask)
|
||||
jitProgram.Execute(reg, vm.pad, eMask)
|
||||
} else {
|
||||
vm.program.Execute(reg, &vm.pad, eMask)
|
||||
vm.program.Execute(reg, vm.pad, eMask)
|
||||
}
|
||||
|
||||
mx ^= uint32(reg.R[readReg[2]] ^ reg.R[readReg[3]])
|
||||
|
@ -271,10 +276,10 @@ func (vm *VM) runLoops() {
|
|||
}
|
||||
|
||||
// always force a restore before startup
|
||||
ResetRoundingMode(&vm.registerFile)
|
||||
ResetRoundingMode(vm.registerFile)
|
||||
|
||||
// restore rounding mode at the end
|
||||
defer ResetRoundingMode(&vm.registerFile)
|
||||
defer ResetRoundingMode(vm.registerFile)
|
||||
|
||||
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
|
||||
vm.run()
|
||||
|
@ -327,6 +332,46 @@ func (vm *VM) CalculateHash(input []byte, output *[RANDOMX_HASH_SIZE]byte) {
|
|||
*output = blake2b.Sum256(regMem[:])
|
||||
}
|
||||
|
||||
// CalculateHashFirst will begin a hash calculation.
|
||||
func (vm *VM) CalculateHashFirst(input []byte) {
|
||||
vm.hashState = blake2b.Sum512(input)
|
||||
|
||||
vm.initScratchpad(&vm.hashState)
|
||||
}
|
||||
|
||||
// CalculateHashNext will output the hash value of the previous input and begin the calculation of the next hash.
|
||||
func (vm *VM) CalculateHashNext(nextInput []byte, output *[RANDOMX_HASH_SIZE]byte) {
|
||||
vm.runLoops()
|
||||
|
||||
// now hash the scratch pad as it will act as register A
|
||||
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
|
||||
|
||||
// Finish current hash and fill the scratchpad for the next hash at the same time
|
||||
regMem := vm.registerFile.Memory()
|
||||
vm.hashState = blake2b.Sum512(nextInput)
|
||||
// write hash onto register A
|
||||
vm.AES.HashAndFillAes1Rx4(vm.pad[:], (*[64]byte)(unsafe.Pointer(unsafe.SliceData(regMem[RegisterFileSize-RegistersCountFloat*2*8:]))), &vm.hashState)
|
||||
runtime.KeepAlive(regMem)
|
||||
|
||||
// write R, F, E, A registers
|
||||
*output = blake2b.Sum256(regMem[:])
|
||||
}
|
||||
|
||||
// CalculateHashLast will output the hash value of the previous input.
|
||||
func (vm *VM) CalculateHashLast(output *[RANDOMX_HASH_SIZE]byte) {
|
||||
vm.runLoops()
|
||||
|
||||
// now hash the scratch pad as it will act as register A
|
||||
vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState)
|
||||
|
||||
regMem := vm.registerFile.Memory()
|
||||
// write hash onto register A
|
||||
copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:])
|
||||
|
||||
// write R, F, E, A registers
|
||||
*output = blake2b.Sum256(regMem[:])
|
||||
}
|
||||
|
||||
// Close Releases all memory occupied by the structure.
|
||||
func (vm *VM) Close() error {
|
||||
if vm.jitProgram != nil {
|
||||
|
|
|
@ -86,17 +86,13 @@ xorpd xmm1, xmm5
|
|||
xorpd xmm2, xmm6
|
||||
xorpd xmm3, xmm7
|
||||
|
||||
movupd xmmword ptr [rcx+0], xmm0
|
||||
movupd xmmword ptr [rcx+16], xmm1
|
||||
movupd xmmword ptr [rcx+32], xmm2
|
||||
movupd xmmword ptr [rcx+48], xmm3
|
||||
;#movapd xmmword ptr [rcx+0], xmm0
|
||||
;#movapd xmmword ptr [rcx+16], xmm1
|
||||
;#movapd xmmword ptr [rcx+32], xmm2
|
||||
;#movapd xmmword ptr [rcx+48], xmm3
|
||||
;# aligned mode
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
*/
|
||||
//var programLoopStore = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x29, 0x01, 0x66, 0x0F, 0x29, 0x49, 0x10, 0x66, 0x0F, 0x29, 0x51, 0x20, 0x66, 0x0F, 0x29, 0x59, 0x30}
|
||||
var programLoopStore = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x11, 0x01, 0x66, 0x0F, 0x11, 0x49, 0x10, 0x66, 0x0F, 0x11, 0x51, 0x20, 0x66, 0x0F, 0x11, 0x59, 0x30}
|
||||
var programLoopStoreAligned = []byte{0x59, 0x4C, 0x89, 0x01, 0x4C, 0x89, 0x49, 0x08, 0x4C, 0x89, 0x51, 0x10, 0x4C, 0x89, 0x59, 0x18, 0x4C, 0x89, 0x61, 0x20, 0x4C, 0x89, 0x69, 0x28, 0x4C, 0x89, 0x71, 0x30, 0x4C, 0x89, 0x79, 0x38, 0x59, 0x66, 0x0F, 0x57, 0xC4, 0x66, 0x0F, 0x57, 0xCD, 0x66, 0x0F, 0x57, 0xD6, 0x66, 0x0F, 0x57, 0xDF, 0x66, 0x0F, 0x29, 0x01, 0x66, 0x0F, 0x29, 0x49, 0x10, 0x66, 0x0F, 0x29, 0x51, 0x20, 0x66, 0x0F, 0x29, 0x59, 0x30}
|
||||
|
||||
/*
|
||||
#define RANDOMX_SCRATCHPAD_L3 2097152
|
||||
|
@ -440,7 +436,7 @@ func (c *ByteCode) generateCode(program []byte, readReg *[4]uint64) []byte {
|
|||
program = append(program, 0xc0+byte(readReg[1]))
|
||||
//todo: prefetch scratchpad
|
||||
|
||||
program = append(program, programLoopStore...)
|
||||
program = append(program, programLoopStoreAligned...)
|
||||
|
||||
if BranchesWithin32B {
|
||||
branchBegin := uint32(len(program))
|
||||
|
|
|
@ -19,20 +19,20 @@ TEXT ·vm_run(SB),$8-40
|
|||
MOVQ (7*8)(AX), R15
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD (8*8)(AX), X0
|
||||
VMOVUPD (10*8)(AX), X1
|
||||
VMOVUPD (12*8)(AX), X2
|
||||
VMOVUPD (14*8)(AX), X3
|
||||
VMOVAPD (8*8)(AX), X0
|
||||
VMOVAPD (10*8)(AX), X1
|
||||
VMOVAPD (12*8)(AX), X2
|
||||
VMOVAPD (14*8)(AX), X3
|
||||
// e0-e3
|
||||
VMOVUPD (16*8)(AX), X4
|
||||
VMOVUPD (18*8)(AX), X5
|
||||
VMOVUPD (20*8)(AX), X6
|
||||
VMOVUPD (22*8)(AX), X7
|
||||
VMOVAPD (16*8)(AX), X4
|
||||
VMOVAPD (18*8)(AX), X5
|
||||
VMOVAPD (20*8)(AX), X6
|
||||
VMOVAPD (22*8)(AX), X7
|
||||
// a0-a3
|
||||
VMOVUPD (24*8)(AX), X8
|
||||
VMOVUPD (26*8)(AX), X9
|
||||
VMOVUPD (28*8)(AX), X10
|
||||
VMOVUPD (30*8)(AX), X11
|
||||
VMOVAPD (24*8)(AX), X8
|
||||
VMOVAPD (26*8)(AX), X9
|
||||
VMOVAPD (28*8)(AX), X10
|
||||
VMOVAPD (30*8)(AX), X11
|
||||
|
||||
// mantissa mask
|
||||
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
||||
|
@ -62,7 +62,12 @@ TEXT ·vm_run(SB),$8-40
|
|||
// move register file back to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHT0 0(AX)
|
||||
// prefetchw BYTE PTR [rax]
|
||||
// PREFETCHW 0(AX)
|
||||
BYTE $0x0F
|
||||
BYTE $0x0D
|
||||
BYTE $0x08
|
||||
|
||||
// r0-r7
|
||||
MOVQ R8, (0*8)(AX)
|
||||
MOVQ R9, (1*8)(AX)
|
||||
|
@ -74,15 +79,15 @@ TEXT ·vm_run(SB),$8-40
|
|||
MOVQ R15, (7*8)(AX)
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD X0, (8*8)(AX)
|
||||
VMOVUPD X1, (10*8)(AX)
|
||||
VMOVUPD X2, (12*8)(AX)
|
||||
VMOVUPD X3, (14*8)(AX)
|
||||
VMOVAPD X0, (8*8)(AX)
|
||||
VMOVAPD X1, (10*8)(AX)
|
||||
VMOVAPD X2, (12*8)(AX)
|
||||
VMOVAPD X3, (14*8)(AX)
|
||||
// e0-e3
|
||||
VMOVUPD X4, (16*8)(AX)
|
||||
VMOVUPD X5, (18*8)(AX)
|
||||
VMOVUPD X6, (20*8)(AX)
|
||||
VMOVUPD X7, (22*8)(AX)
|
||||
VMOVAPD X4, (16*8)(AX)
|
||||
VMOVAPD X5, (18*8)(AX)
|
||||
VMOVAPD X6, (20*8)(AX)
|
||||
VMOVAPD X7, (22*8)(AX)
|
||||
|
||||
// a0-a3 are constant, no need to move
|
||||
|
||||
|
@ -109,20 +114,20 @@ TEXT ·vm_run_full(SB),$32-64
|
|||
MOVQ (7*8)(AX), R15
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD (8*8)(AX), X0
|
||||
VMOVUPD (10*8)(AX), X1
|
||||
VMOVUPD (12*8)(AX), X2
|
||||
VMOVUPD (14*8)(AX), X3
|
||||
VMOVAPD (8*8)(AX), X0
|
||||
VMOVAPD (10*8)(AX), X1
|
||||
VMOVAPD (12*8)(AX), X2
|
||||
VMOVAPD (14*8)(AX), X3
|
||||
// e0-e3
|
||||
VMOVUPD (16*8)(AX), X4
|
||||
VMOVUPD (18*8)(AX), X5
|
||||
VMOVUPD (20*8)(AX), X6
|
||||
VMOVUPD (22*8)(AX), X7
|
||||
VMOVAPD (16*8)(AX), X4
|
||||
VMOVAPD (18*8)(AX), X5
|
||||
VMOVAPD (20*8)(AX), X6
|
||||
VMOVAPD (22*8)(AX), X7
|
||||
// load constants a0-a3
|
||||
VMOVUPD (24*8)(AX), X8
|
||||
VMOVUPD (26*8)(AX), X9
|
||||
VMOVUPD (28*8)(AX), X10
|
||||
VMOVUPD (30*8)(AX), X11
|
||||
VMOVAPD (24*8)(AX), X8
|
||||
VMOVAPD (26*8)(AX), X9
|
||||
VMOVAPD (28*8)(AX), X10
|
||||
VMOVAPD (30*8)(AX), X11
|
||||
|
||||
//TODO: rest of init
|
||||
|
||||
|
@ -166,7 +171,13 @@ TEXT ·vm_run_full(SB),$32-64
|
|||
// move register file back to registers
|
||||
MOVQ rf+0(FP), AX
|
||||
|
||||
PREFETCHT0 0(AX)
|
||||
|
||||
// prefetchw BYTE PTR [rax]
|
||||
// PREFETCHW 0(AX)
|
||||
BYTE $0x0F
|
||||
BYTE $0x0D
|
||||
BYTE $0x08
|
||||
|
||||
// r0-r7
|
||||
MOVQ R8, (0*8)(AX)
|
||||
MOVQ R9, (1*8)(AX)
|
||||
|
@ -178,15 +189,15 @@ TEXT ·vm_run_full(SB),$32-64
|
|||
MOVQ R15, (7*8)(AX)
|
||||
|
||||
// f0-f3
|
||||
VMOVUPD X0, (8*8)(AX)
|
||||
VMOVUPD X1, (10*8)(AX)
|
||||
VMOVUPD X2, (12*8)(AX)
|
||||
VMOVUPD X3, (14*8)(AX)
|
||||
VMOVAPD X0, (8*8)(AX)
|
||||
VMOVAPD X1, (10*8)(AX)
|
||||
VMOVAPD X2, (12*8)(AX)
|
||||
VMOVAPD X3, (14*8)(AX)
|
||||
// e0-e3
|
||||
VMOVUPD X4, (16*8)(AX)
|
||||
VMOVUPD X5, (18*8)(AX)
|
||||
VMOVUPD X6, (20*8)(AX)
|
||||
VMOVUPD X7, (22*8)(AX)
|
||||
VMOVAPD X4, (16*8)(AX)
|
||||
VMOVAPD X5, (18*8)(AX)
|
||||
VMOVAPD X6, (20*8)(AX)
|
||||
VMOVAPD X7, (22*8)(AX)
|
||||
|
||||
// a0-a3 are constant, no need to move
|
||||
|
||||
|
|
Loading…
Reference in a new issue