DataHoarder
9aa3631f37
All checks were successful
continuous-integration/drone/push Build is passing
205 lines
4.1 KiB
ArmAsm
205 lines
4.1 KiB
ArmAsm
//go:build unix && amd64 && !disable_jit && !purego
|
|
|
|
#include "textflag.h"
|
|
|
|
TEXT ·vm_run(SB),$8-40
|
|
|
|
// move register file to registers
|
|
MOVQ rf+0(FP), AX
|
|
|
|
PREFETCHNTA 0(AX)
|
|
// r0-r7
|
|
MOVQ (0*8)(AX), R8
|
|
MOVQ (1*8)(AX), R9
|
|
MOVQ (2*8)(AX), R10
|
|
MOVQ (3*8)(AX), R11
|
|
MOVQ (4*8)(AX), R12
|
|
MOVQ (5*8)(AX), R13
|
|
MOVQ (6*8)(AX), R14
|
|
MOVQ (7*8)(AX), R15
|
|
|
|
// f0-f3
|
|
VMOVAPD (8*8)(AX), X0
|
|
VMOVAPD (10*8)(AX), X1
|
|
VMOVAPD (12*8)(AX), X2
|
|
VMOVAPD (14*8)(AX), X3
|
|
// e0-e3
|
|
VMOVAPD (16*8)(AX), X4
|
|
VMOVAPD (18*8)(AX), X5
|
|
VMOVAPD (20*8)(AX), X6
|
|
VMOVAPD (22*8)(AX), X7
|
|
// a0-a3
|
|
VMOVAPD (24*8)(AX), X8
|
|
VMOVAPD (26*8)(AX), X9
|
|
VMOVAPD (28*8)(AX), X10
|
|
VMOVAPD (30*8)(AX), X11
|
|
|
|
// mantissa mask
|
|
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
|
MOVQ $0x00ffffffffffffff, AX
|
|
VMOVQ AX, X13
|
|
VPBROADCASTQ X13, X13
|
|
|
|
// eMask
|
|
VMOVDQU64 eMask+16(FP), X14
|
|
|
|
// scale mask
|
|
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
|
|
MOVQ $0x80F0000000000000, AX
|
|
VMOVQ AX, X15
|
|
VPBROADCASTQ X15, X15
|
|
|
|
// scratchpad pointer
|
|
MOVQ pad+8(FP), SI
|
|
|
|
// JIT location
|
|
MOVQ jmp+32(FP), AX
|
|
|
|
// jump to JIT code
|
|
CALL AX
|
|
|
|
|
|
// move register file back to registers
|
|
MOVQ rf+0(FP), AX
|
|
|
|
// prefetchw BYTE PTR [rax]
|
|
// PREFETCHW 0(AX)
|
|
BYTE $0x0F
|
|
BYTE $0x0D
|
|
BYTE $0x08
|
|
|
|
// r0-r7
|
|
MOVQ R8, (0*8)(AX)
|
|
MOVQ R9, (1*8)(AX)
|
|
MOVQ R10, (2*8)(AX)
|
|
MOVQ R11, (3*8)(AX)
|
|
MOVQ R12, (4*8)(AX)
|
|
MOVQ R13, (5*8)(AX)
|
|
MOVQ R14, (6*8)(AX)
|
|
MOVQ R15, (7*8)(AX)
|
|
|
|
// f0-f3
|
|
VMOVAPD X0, (8*8)(AX)
|
|
VMOVAPD X1, (10*8)(AX)
|
|
VMOVAPD X2, (12*8)(AX)
|
|
VMOVAPD X3, (14*8)(AX)
|
|
// e0-e3
|
|
VMOVAPD X4, (16*8)(AX)
|
|
VMOVAPD X5, (18*8)(AX)
|
|
VMOVAPD X6, (20*8)(AX)
|
|
VMOVAPD X7, (22*8)(AX)
|
|
|
|
// a0-a3 are constant, no need to move
|
|
|
|
RET
|
|
|
|
|
|
#define RANDOMX_SCRATCHPAD_L3 2097152
|
|
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
|
|
|
|
TEXT ·vm_run_full(SB),$32-64
|
|
|
|
// move register file to registers
|
|
MOVQ rf+0(FP), AX
|
|
|
|
PREFETCHNTA 0(AX)
|
|
// r0-r7
|
|
MOVQ (0*8)(AX), R8
|
|
MOVQ (1*8)(AX), R9
|
|
MOVQ (2*8)(AX), R10
|
|
MOVQ (3*8)(AX), R11
|
|
MOVQ (4*8)(AX), R12
|
|
MOVQ (5*8)(AX), R13
|
|
MOVQ (6*8)(AX), R14
|
|
MOVQ (7*8)(AX), R15
|
|
|
|
// f0-f3
|
|
VMOVAPD (8*8)(AX), X0
|
|
VMOVAPD (10*8)(AX), X1
|
|
VMOVAPD (12*8)(AX), X2
|
|
VMOVAPD (14*8)(AX), X3
|
|
// e0-e3
|
|
VMOVAPD (16*8)(AX), X4
|
|
VMOVAPD (18*8)(AX), X5
|
|
VMOVAPD (20*8)(AX), X6
|
|
VMOVAPD (22*8)(AX), X7
|
|
// load constants a0-a3
|
|
VMOVAPD (24*8)(AX), X8
|
|
VMOVAPD (26*8)(AX), X9
|
|
VMOVAPD (28*8)(AX), X10
|
|
VMOVAPD (30*8)(AX), X11
|
|
|
|
//TODO: rest of init
|
|
|
|
// mantissa mask
|
|
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
|
|
MOVQ $0x00ffffffffffffff, AX
|
|
VMOVQ AX, X13
|
|
VPBROADCASTQ X13, X13
|
|
|
|
// eMask
|
|
VMOVDQU64 eMask+40(FP), X14
|
|
|
|
// scale mask
|
|
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
|
|
MOVQ $0x80F0000000000000, AX
|
|
VMOVQ AX, X15
|
|
VPBROADCASTQ X15, X15
|
|
|
|
// scratchpad pointer on rsi
|
|
MOVQ pad+8(FP), SI
|
|
// dataset pointer on rdi
|
|
MOVQ dataset+16(FP), DI
|
|
// iterations on rbx
|
|
MOVQ iterations+24(FP), BX
|
|
// ma and mx on rbp TODO: change this
|
|
MOVQ memoryRegisters+32(FP), BP
|
|
|
|
// do ma/mx calcs
|
|
MOVQ BP, AX
|
|
RORQ $32, BP
|
|
|
|
//AX = spAddr0
|
|
//DX = spAddr1
|
|
|
|
// JIT location
|
|
MOVQ jmp+56(FP), CX
|
|
// jump to JIT code
|
|
// this handles readReg[0-3] and dataset reading, load, stores
|
|
CALL CX
|
|
|
|
// move register file back to registers
|
|
MOVQ rf+0(FP), AX
|
|
|
|
|
|
// prefetchw BYTE PTR [rax]
|
|
// PREFETCHW 0(AX)
|
|
BYTE $0x0F
|
|
BYTE $0x0D
|
|
BYTE $0x08
|
|
|
|
// r0-r7
|
|
MOVQ R8, (0*8)(AX)
|
|
MOVQ R9, (1*8)(AX)
|
|
MOVQ R10, (2*8)(AX)
|
|
MOVQ R11, (3*8)(AX)
|
|
MOVQ R12, (4*8)(AX)
|
|
MOVQ R13, (5*8)(AX)
|
|
MOVQ R14, (6*8)(AX)
|
|
MOVQ R15, (7*8)(AX)
|
|
|
|
// f0-f3
|
|
VMOVAPD X0, (8*8)(AX)
|
|
VMOVAPD X1, (10*8)(AX)
|
|
VMOVAPD X2, (12*8)(AX)
|
|
VMOVAPD X3, (14*8)(AX)
|
|
// e0-e3
|
|
VMOVAPD X4, (16*8)(AX)
|
|
VMOVAPD X5, (18*8)(AX)
|
|
VMOVAPD X6, (20*8)(AX)
|
|
VMOVAPD X7, (22*8)(AX)
|
|
|
|
// a0-a3 are constant, no need to move
|
|
|
|
RET
|