/* Copyright (c) 2019 DERO Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package randomx import ( "errors" "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/aes" "git.gammaspectra.live/P2Pool/go-randomx/v3/internal/memory" "math" "runtime" "unsafe" ) import "golang.org/x/crypto/blake2b" type VM struct { pad *ScratchPad flags Flags // buffer first 128 bytes are entropy below rest are program bytes buffer [16*8 + RANDOMX_PROGRAM_SIZE*8]byte hashState [blake2b.Size]byte registerFile *RegisterFile AES aes.AES Cache *Cache Dataset *Dataset program ByteCode jitProgram VMProgramFunc } // NewVM Creates and initializes a RandomX virtual machine. // * // * @param flags is any combination of these 5 flags (each flag can be set or not set): // * RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages // * RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES // * RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset // * RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler // * RANDOMX_FLAG_SECURE - when combined with RANDOMX_FLAG_JIT, the JIT pages are never // * writable and executable at the same time (W^X policy) // * The numeric values of the first 4 flags are ordered so that a higher value will provide // * faster hash calculation and a lower numeric value will provide higher portability. // * Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest. // * @param cache is a pointer to an initialized randomx_cache structure. Can be // * NULL if RANDOMX_FLAG_FULL_MEM is set. // * @param dataset is a pointer to a randomx_dataset structure. Can be NULL // * if RANDOMX_FLAG_FULL_MEM is not set. // * // * @return Pointer to an initialized randomx_vm structure. // * Returns NULL if: // * (1) Scratchpad memory allocation fails. // * (2) The requested initialization flags are not supported on the current platform. // * (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set // * (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set // */ func NewVM(flags Flags, cache *Cache, dataset *Dataset) (*VM, error) { if cache == nil && !flags.Has(RANDOMX_FLAG_FULL_MEM) { return nil, errors.New("nil cache in light mode") } if dataset == nil && flags.Has(RANDOMX_FLAG_FULL_MEM) { return nil, errors.New("nil dataset in full mode") } pad, err := memory.Allocate[ScratchPad](cacheLineAlignedAllocator) if err != nil { return nil, err } registerFile, err := memory.Allocate[RegisterFile](cacheLineAlignedAllocator) if err != nil { return nil, err } _ = pad _ = registerFile vm := &VM{ Cache: cache, Dataset: dataset, flags: flags, pad: new(ScratchPad), registerFile: new(RegisterFile), } if flags.Has(RANDOMX_FLAG_HARD_AES) { vm.AES = aes.NewHardAES() } // fallback if vm.AES == nil { vm.AES = aes.NewSoftAES() } if flags.HasJIT() { vm.jitProgram, err = memory.AllocateSlice[byte](pageAllocator, int(RandomXCodeSize)) if err != nil { return nil, err } if !flags.Has(RANDOMX_FLAG_SECURE) { err = memory.PageReadWriteExecute(vm.jitProgram) if err != nil { vm.jitProgram.Close() return nil, err } } } return vm, nil } // run calculate hash based on input. Not thread safe. // Warning: Underlying callers will run float64 SetRoundingMode directly // It is the caller's responsibility to set and restore the mode to IEEE 754 roundTiesToEven between full executions // Additionally, runtime.LockOSThread and defer runtime.UnlockOSThread is recommended to prevent other goroutines sharing these changes func (vm *VM) run() { // buffer first 128 bytes are entropy below rest are program bytes vm.AES.FillAes4Rx4(vm.hashState, vm.buffer[:]) entropy := (*[16]uint64)(unsafe.Pointer(&vm.buffer)) // do more initialization before we run reg := vm.registerFile reg.Clear() // initialize constant registers for i := range entropy[:8] { reg.A[i/2][i%2] = SmallPositiveFloatBits(entropy[i]) } // memory registers var ma, mx uint32 ma = uint32(entropy[8] & CacheLineAlignMask) mx = uint32(entropy[10]) addressRegisters := entropy[12] var readReg [4]uint64 for i := range readReg { readReg[i] = uint64(i*2) + (addressRegisters & 1) addressRegisters >>= 1 } datasetOffset := (entropy[13] % (DatasetExtraItems + 1)) * CacheLineSize eMask := [2]uint64{ExponentMask(entropy[14]), ExponentMask(entropy[15])} prog := vm.buffer[len(entropy)*8:] CompileProgramToByteCode(prog, &vm.program) var jitProgram VMProgramFunc if vm.jitProgram != nil { if vm.Dataset == nil { //light mode if vm.flags.Has(RANDOMX_FLAG_SECURE) { err := memory.PageReadWrite(vm.jitProgram) if err != nil { panic(err) } jitProgram = vm.program.generateCode(vm.jitProgram, nil) err = memory.PageReadExecute(vm.jitProgram) if err != nil { panic(err) } } else { jitProgram = vm.program.generateCode(vm.jitProgram, nil) } } else { // full mode and we have JIT if vm.flags.Has(RANDOMX_FLAG_SECURE) { err := memory.PageReadWrite(vm.jitProgram) if err != nil { panic(err) } jitProgram = vm.program.generateCode(vm.jitProgram, &readReg) err = memory.PageReadExecute(vm.jitProgram) if err != nil { panic(err) } } else { jitProgram = vm.program.generateCode(vm.jitProgram, &readReg) } vm.jitProgram.ExecuteFull(reg, vm.pad, &vm.Dataset.Memory()[datasetOffset/CacheLineSize], RANDOMX_PROGRAM_ITERATIONS, ma, mx, eMask) return } } spAddr0 := uint64(mx) spAddr1 := uint64(ma) var rlCache RegisterLine for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ { spMix := reg.R[readReg[0]] ^ reg.R[readReg[1]] spAddr0 ^= spMix spAddr0 &= ScratchpadL3Mask64 spAddr1 ^= spMix >> 32 spAddr1 &= ScratchpadL3Mask64 //TODO: optimize these loads! for i := uint64(0); i < RegistersCount; i++ { reg.R[i] ^= vm.pad.Load64(uint32(spAddr0 + 8*i)) } for i := uint64(0); i < RegistersCountFloat; i++ { reg.F[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*i)) } for i := uint64(0); i < RegistersCountFloat; i++ { reg.E[i] = vm.pad.Load32FA(uint32(spAddr1 + 8*(i+RegistersCountFloat))) reg.E[i][LOW] = MaskRegisterExponentMantissa(reg.E[i][LOW], eMask[LOW]) reg.E[i][HIGH] = MaskRegisterExponentMantissa(reg.E[i][HIGH], eMask[HIGH]) } // run the actual bytecode if jitProgram != nil { // light mode jitProgram.Execute(reg, vm.pad, eMask) } else { vm.program.Execute(reg, vm.pad, eMask) } mx ^= uint32(reg.R[readReg[2]] ^ reg.R[readReg[3]]) mx &= uint32(CacheLineAlignMask) if vm.Dataset != nil { // full mode vm.Dataset.prefetchDataset(datasetOffset + uint64(mx)) // load output from superscalar program to get dataset 64 bytes vm.Dataset.readDataset(datasetOffset+uint64(ma), ®.R) } else { // light mode // execute output from superscalar program to get dataset 64 bytes vm.Cache.initDataset(&rlCache, (datasetOffset+uint64(ma))/CacheLineSize) for i := range reg.R { reg.R[i] ^= rlCache[i] } } // swap the elements mx, ma = ma, mx for i := uint64(0); i < RegistersCount; i++ { vm.pad.Store64(uint32(spAddr1+8*i), reg.R[i]) } for i := uint64(0); i < RegistersCountFloat; i++ { reg.F[i][LOW] = Xor(reg.F[i][LOW], reg.E[i][LOW]) reg.F[i][HIGH] = Xor(reg.F[i][HIGH], reg.E[i][HIGH]) vm.pad.Store64(uint32(spAddr0+16*i), math.Float64bits(reg.F[i][LOW])) vm.pad.Store64(uint32(spAddr0+16*i+8), math.Float64bits(reg.F[i][HIGH])) } spAddr0 = 0 spAddr1 = 0 } } func (vm *VM) initScratchpad(seed *[64]byte) { clear(vm.pad[:]) vm.AES.FillAes1Rx4(seed, vm.pad[:]) } func (vm *VM) runLoops() { if lockThreadDueToRoundingMode { // Lock thread due to rounding mode flags runtime.LockOSThread() defer runtime.UnlockOSThread() } // always force a restore before startup ResetRoundingMode(vm.registerFile) // restore rounding mode at the end defer ResetRoundingMode(vm.registerFile) for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ { vm.run() // write R, F, E, A registers vm.hashState = blake2b.Sum512(vm.registerFile.Memory()[:]) } // final loop executes here vm.run() } // SetCache Reinitializes a virtual machine with a new Cache. // This function should be called anytime the Cache is reinitialized with a new key. // Does nothing if called with a Cache containing the same key value as already set. // VM must be initialized without RANDOMX_FLAG_FULL_MEM. func (vm *VM) SetCache(cache *Cache) { if vm.flags.Has(RANDOMX_FLAG_FULL_MEM) { panic("unsupported") } vm.Cache = cache //todo } // SetDataset Reinitializes a virtual machine with a new Dataset. // VM must be initialized with RANDOMX_FLAG_FULL_MEM. func (vm *VM) SetDataset(dataset *Dataset) { if !vm.flags.Has(RANDOMX_FLAG_FULL_MEM) { panic("unsupported") } vm.Dataset = dataset } // CalculateHash Calculates a RandomX hash value. func (vm *VM) CalculateHash(input []byte, output *[RANDOMX_HASH_SIZE]byte) { vm.hashState = blake2b.Sum512(input) vm.initScratchpad(&vm.hashState) vm.runLoops() // now hash the scratch pad as it will act as register A vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState) regMem := vm.registerFile.Memory() // write hash onto register A copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:]) // write R, F, E, A registers *output = blake2b.Sum256(regMem[:]) } // CalculateHashFirst will begin a hash calculation. func (vm *VM) CalculateHashFirst(input []byte) { vm.hashState = blake2b.Sum512(input) vm.initScratchpad(&vm.hashState) } // CalculateHashNext will output the hash value of the previous input and begin the calculation of the next hash. func (vm *VM) CalculateHashNext(nextInput []byte, output *[RANDOMX_HASH_SIZE]byte) { vm.runLoops() // now hash the scratch pad as it will act as register A vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState) // Finish current hash and fill the scratchpad for the next hash at the same time regMem := vm.registerFile.Memory() vm.hashState = blake2b.Sum512(nextInput) // write hash onto register A vm.AES.HashAndFillAes1Rx4(vm.pad[:], (*[64]byte)(unsafe.Pointer(unsafe.SliceData(regMem[RegisterFileSize-RegistersCountFloat*2*8:]))), &vm.hashState) runtime.KeepAlive(regMem) // write R, F, E, A registers *output = blake2b.Sum256(regMem[:]) } // CalculateHashLast will output the hash value of the previous input. func (vm *VM) CalculateHashLast(output *[RANDOMX_HASH_SIZE]byte) { vm.runLoops() // now hash the scratch pad as it will act as register A vm.AES.HashAes1Rx4(vm.pad[:], &vm.hashState) regMem := vm.registerFile.Memory() // write hash onto register A copy(regMem[RegisterFileSize-RegistersCountFloat*2*8:], vm.hashState[:]) // write R, F, E, A registers *output = blake2b.Sum256(regMem[:]) } // Close Releases all memory occupied by the structure. func (vm *VM) Close() error { memory.Free(cacheLineAlignedAllocator, vm.pad) memory.Free(cacheLineAlignedAllocator, vm.registerFile) if vm.jitProgram != nil { return vm.jitProgram.Close() } return nil }