go-randomx/vm_bytecode_jit_amd64.s
2024-05-02 12:06:38 +02:00

205 lines
4.1 KiB
ArmAsm

//go:build unix && amd64 && !disable_jit && !purego
#include "textflag.h"
TEXT ·vm_run(SB),$8-40
// move register file to registers
MOVQ rf+0(FP), AX
PREFETCHNTA 0(AX)
// r0-r7
MOVQ (0*8)(AX), R8
MOVQ (1*8)(AX), R9
MOVQ (2*8)(AX), R10
MOVQ (3*8)(AX), R11
MOVQ (4*8)(AX), R12
MOVQ (5*8)(AX), R13
MOVQ (6*8)(AX), R14
MOVQ (7*8)(AX), R15
// f0-f3
VMOVAPD (8*8)(AX), X0
VMOVAPD (10*8)(AX), X1
VMOVAPD (12*8)(AX), X2
VMOVAPD (14*8)(AX), X3
// e0-e3
VMOVAPD (16*8)(AX), X4
VMOVAPD (18*8)(AX), X5
VMOVAPD (20*8)(AX), X6
VMOVAPD (22*8)(AX), X7
// a0-a3
VMOVAPD (24*8)(AX), X8
VMOVAPD (26*8)(AX), X9
VMOVAPD (28*8)(AX), X10
VMOVAPD (30*8)(AX), X11
// mantissa mask
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
MOVQ $0x00ffffffffffffff, AX
VMOVQ AX, X13
VPBROADCASTQ X13, X13
// eMask
VMOVDQU64 eMask+16(FP), X14
// scale mask
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
MOVQ $0x80F0000000000000, AX
VMOVQ AX, X15
VPBROADCASTQ X15, X15
// scratchpad pointer
MOVQ pad+8(FP), SI
// JIT location
MOVQ jmp+32(FP), AX
// jump to JIT code
CALL AX
// move register file back to registers
MOVQ rf+0(FP), AX
// prefetchw BYTE PTR [rax]
// PREFETCHW 0(AX)
BYTE $0x0F
BYTE $0x0D
BYTE $0x08
// r0-r7
MOVQ R8, (0*8)(AX)
MOVQ R9, (1*8)(AX)
MOVQ R10, (2*8)(AX)
MOVQ R11, (3*8)(AX)
MOVQ R12, (4*8)(AX)
MOVQ R13, (5*8)(AX)
MOVQ R14, (6*8)(AX)
MOVQ R15, (7*8)(AX)
// f0-f3
VMOVAPD X0, (8*8)(AX)
VMOVAPD X1, (10*8)(AX)
VMOVAPD X2, (12*8)(AX)
VMOVAPD X3, (14*8)(AX)
// e0-e3
VMOVAPD X4, (16*8)(AX)
VMOVAPD X5, (18*8)(AX)
VMOVAPD X6, (20*8)(AX)
VMOVAPD X7, (22*8)(AX)
// a0-a3 are constant, no need to move
RET
#define RANDOMX_SCRATCHPAD_L3 2097152
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
TEXT ·vm_run_full(SB),$32-64
// move register file to registers
MOVQ rf+0(FP), AX
PREFETCHNTA 0(AX)
// r0-r7
MOVQ (0*8)(AX), R8
MOVQ (1*8)(AX), R9
MOVQ (2*8)(AX), R10
MOVQ (3*8)(AX), R11
MOVQ (4*8)(AX), R12
MOVQ (5*8)(AX), R13
MOVQ (6*8)(AX), R14
MOVQ (7*8)(AX), R15
// f0-f3
VMOVAPD (8*8)(AX), X0
VMOVAPD (10*8)(AX), X1
VMOVAPD (12*8)(AX), X2
VMOVAPD (14*8)(AX), X3
// e0-e3
VMOVAPD (16*8)(AX), X4
VMOVAPD (18*8)(AX), X5
VMOVAPD (20*8)(AX), X6
VMOVAPD (22*8)(AX), X7
// load constants a0-a3
VMOVAPD (24*8)(AX), X8
VMOVAPD (26*8)(AX), X9
VMOVAPD (28*8)(AX), X10
VMOVAPD (30*8)(AX), X11
//TODO: rest of init
// mantissa mask
//VMOVQ $0x00ffffffffffffff, $0x00ffffffffffffff, X13
MOVQ $0x00ffffffffffffff, AX
VMOVQ AX, X13
VPBROADCASTQ X13, X13
// eMask
VMOVDQU64 eMask+40(FP), X14
// scale mask
//VMOVQ $0x80F0000000000000, $0x80F0000000000000, X15
MOVQ $0x80F0000000000000, AX
VMOVQ AX, X15
VPBROADCASTQ X15, X15
// scratchpad pointer on rsi
MOVQ pad+8(FP), SI
// dataset pointer on rdi
MOVQ dataset+16(FP), DI
// iterations on rbx
MOVQ iterations+24(FP), BX
// ma and mx on rbp TODO: change this
MOVQ memoryRegisters+32(FP), BP
// do ma/mx calcs
MOVQ BP, AX
RORQ $32, BP
//AX = spAddr0
//DX = spAddr1
// JIT location
MOVQ jmp+56(FP), CX
// jump to JIT code
// this handles readReg[0-3] and dataset reading, load, stores
CALL CX
// move register file back to registers
MOVQ rf+0(FP), AX
// prefetchw BYTE PTR [rax]
// PREFETCHW 0(AX)
BYTE $0x0F
BYTE $0x0D
BYTE $0x08
// r0-r7
MOVQ R8, (0*8)(AX)
MOVQ R9, (1*8)(AX)
MOVQ R10, (2*8)(AX)
MOVQ R11, (3*8)(AX)
MOVQ R12, (4*8)(AX)
MOVQ R13, (5*8)(AX)
MOVQ R14, (6*8)(AX)
MOVQ R15, (7*8)(AX)
// f0-f3
VMOVAPD X0, (8*8)(AX)
VMOVAPD X1, (10*8)(AX)
VMOVAPD X2, (12*8)(AX)
VMOVAPD X3, (14*8)(AX)
// e0-e3
VMOVAPD X4, (16*8)(AX)
VMOVAPD X5, (18*8)(AX)
VMOVAPD X6, (20*8)(AX)
VMOVAPD X7, (22*8)(AX)
// a0-a3 are constant, no need to move
RET