Upload files to ''

This commit is contained in:
dank 2019-10-15 19:45:39 +02:00
parent 8983c5408d
commit 906ed8092e
10 changed files with 3049 additions and 2 deletions

View file

@ -1,2 +1 @@
# RandomX
Analysis of RandomX

365
aes_const.go Normal file
View file

@ -0,0 +1,365 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package aes implements AES encryption (formerly Rijndael), as defined in
// U.S. Federal Information Processing Standards Publication 197.
//
// The AES operations in this package are not implemented using constant-time algorithms.
// An exception is when running on systems with enabled hardware support for AES
// that makes these operations constant-time. Examples include amd64 systems using AES-NI
// extensions and s390x systems using Message-Security-Assist extensions.
// On such systems, when the result of NewCipher is passed to cipher.NewGCM,
// the GHASH operation used by GCM is also constant-time.
package randomx
// This file contains AES constants - 8720 bytes of initialized data.
// https://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
// AES is based on the mathematical behavior of binary polynomials
// (polynomials over GF(2)) modulo the irreducible polynomial x⁸ + x⁴ + x³ + x + 1.
// Addition of these binary polynomials corresponds to binary xor.
// Reducing mod poly corresponds to binary xor with poly every
// time a 0x100 bit appears.
const poly = 1<<8 | 1<<4 | 1<<3 | 1<<1 | 1<<0 // x⁸ + x⁴ + x³ + x + 1
// Powers of x mod poly in GF(2).
var powx = [16]byte{
0x01,
0x02,
0x04,
0x08,
0x10,
0x20,
0x40,
0x80,
0x1b,
0x36,
0x6c,
0xd8,
0xab,
0x4d,
0x9a,
0x2f,
}
// FIPS-197 Figure 7. S-box substitution values in hexadecimal format.
var sbox0 = [256]byte{
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
}
// FIPS-197 Figure 14. Inverse S-box substitution values in hexadecimal format.
var sbox1 = [256]byte{
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
}
// Lookup tables for encryption.
// These can be recomputed by adapting the tests in aes_test.go.
var te0 = [256]uint32{
0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
}
var te1 = [256]uint32{
0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
}
var te2 = [256]uint32{
0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
}
var te3 = [256]uint32{
0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
}
// Lookup tables for decryption.
// These can be recomputed by adapting the tests in aes_test.go.
var td0 = [256]uint32{
0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
}
var td1 = [256]uint32{
0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
}
var td2 = [256]uint32{
0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
}
var td3 = [256]uint32{
0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
}

176
aes_hash.go Normal file
View file

@ -0,0 +1,176 @@
package randomx
import "fmt"
import "math/bits"
import "encoding/binary"
var tmp_______ = fmt.Sprintf("dd")
var AES_HASH_1R_STATE0 = ARRAY_TO_BIGENDIAN([4]uint32{0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d})
var AES_HASH_1R_STATE1 = ARRAY_TO_BIGENDIAN([4]uint32{0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e})
var AES_HASH_1R_STATE2 = ARRAY_TO_BIGENDIAN([4]uint32{0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017})
var AES_HASH_1R_STATE3 = ARRAY_TO_BIGENDIAN([4]uint32{0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c})
var AES_HASH_1R_XKEY0 = ARRAY_TO_BIGENDIAN([4]uint32{0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389})
var AES_HASH_1R_XKEY1 = ARRAY_TO_BIGENDIAN([4]uint32{0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1})
// used for final hash calculation
func hashAes1Rx4(input []byte, output []byte) {
var states [4][4]uint32
for i := range states {
states[0][i] = AES_HASH_1R_STATE0[i]
states[1][i] = AES_HASH_1R_STATE1[i]
states[2][i] = AES_HASH_1R_STATE2[i]
states[3][i] = AES_HASH_1R_STATE3[i]
}
var in [4][4]uint32
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
for i := 0; i < 63; i += 4 { // load 64 bytes
in[i/16][(i%16)/4] = binary.LittleEndian.Uint32(input[input_ptr+i:])
}
AES_ENC_ROUND(states[0][:], in[0][:])
AES_DEC_ROUND(states[1][:], in[1][:])
AES_ENC_ROUND(states[2][:], in[2][:])
AES_DEC_ROUND(states[3][:], in[3][:])
}
AES_ENC_ROUND(states[0][:], AES_HASH_1R_XKEY0[:])
AES_DEC_ROUND(states[1][:], AES_HASH_1R_XKEY0[:])
AES_ENC_ROUND(states[2][:], AES_HASH_1R_XKEY0[:])
AES_DEC_ROUND(states[3][:], AES_HASH_1R_XKEY0[:])
AES_ENC_ROUND(states[0][:], AES_HASH_1R_XKEY1[:])
AES_DEC_ROUND(states[1][:], AES_HASH_1R_XKEY1[:])
AES_ENC_ROUND(states[2][:], AES_HASH_1R_XKEY1[:])
AES_DEC_ROUND(states[3][:], AES_HASH_1R_XKEY1[:])
// write back to state
for i := 0; i < 63; i += 4 {
binary.BigEndian.PutUint32(output[i:], states[i/16][(i%16)/4])
}
fmt.Printf("aes hash %x\n", output)
}
// these keys are used to generate scratchpad
var AES_GEN_1R_KEY0 = ARRAY_TO_BIGENDIAN([4]uint32{0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553})
var AES_GEN_1R_KEY1 = ARRAY_TO_BIGENDIAN([4]uint32{0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07})
var AES_GEN_1R_KEY2 = ARRAY_TO_BIGENDIAN([4]uint32{0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1})
var AES_GEN_1R_KEY3 = ARRAY_TO_BIGENDIAN([4]uint32{0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135})
// reverses order of elements and also reverse byte order
func ARRAY_TO_BIGENDIAN(input [4]uint32) (output [4]uint32) {
for i := range input {
output[i] = bits.ReverseBytes32(input[i])
}
output[0], output[3] = output[3], output[0]
output[1], output[2] = output[2], output[1]
return
}
func fillAes1Rx4(state_start []byte, output []byte) {
var states [4][4]uint32
for i := 0; i < 63; i += 4 {
states[i/16][(i%16)/4] = binary.BigEndian.Uint32(state_start[i:])
}
outptr := 0
for ; outptr < len(output); outptr += 64 {
AES_DEC_ROUND(states[0][:], AES_GEN_1R_KEY0[:])
AES_ENC_ROUND(states[1][:], AES_GEN_1R_KEY1[:])
AES_DEC_ROUND(states[2][:], AES_GEN_1R_KEY2[:])
AES_ENC_ROUND(states[3][:], AES_GEN_1R_KEY3[:])
for i := 0; i < 63; i += 4 {
binary.LittleEndian.PutUint32(output[outptr+i:], states[i/16][(i%16)/4])
}
}
// write back to state
for i := 0; i < 63; i += 4 {
binary.BigEndian.PutUint32(state_start[i:], states[i/16][(i%16)/4])
}
}
func AES_ENC_ROUND(state []uint32, key []uint32) {
s0 := state[0]
s1 := state[1]
s2 := state[2]
s3 := state[3]
state[0] = key[0] ^ te0[uint8(s0>>24)] ^ te1[uint8(s1>>16)] ^ te2[uint8(s2>>8)] ^ te3[uint8(s3)]
state[1] = key[1] ^ te0[uint8(s1>>24)] ^ te1[uint8(s2>>16)] ^ te2[uint8(s3>>8)] ^ te3[uint8(s0)]
state[2] = key[2] ^ te0[uint8(s2>>24)] ^ te1[uint8(s3>>16)] ^ te2[uint8(s0>>8)] ^ te3[uint8(s1)]
state[3] = key[3] ^ te0[uint8(s3>>24)] ^ te1[uint8(s0>>16)] ^ te2[uint8(s1>>8)] ^ te3[uint8(s2)]
}
func AES_DEC_ROUND(state []uint32, key []uint32) {
s0 := state[0]
s1 := state[1]
s2 := state[2]
s3 := state[3]
state[0] = key[0] ^ td0[uint8(s0>>24)] ^ td1[uint8(s3>>16)] ^ td2[uint8(s2>>8)] ^ td3[uint8(s1)]
state[1] = key[1] ^ td0[uint8(s1>>24)] ^ td1[uint8(s0>>16)] ^ td2[uint8(s3>>8)] ^ td3[uint8(s2)]
state[2] = key[2] ^ td0[uint8(s2>>24)] ^ td1[uint8(s1>>16)] ^ td2[uint8(s0>>8)] ^ td3[uint8(s3)]
state[3] = key[3] ^ td0[uint8(s3>>24)] ^ td1[uint8(s2>>16)] ^ td2[uint8(s1>>8)] ^ td3[uint8(s0)]
}
// these keys are used to used as per RandomX spec
var AES_GEN_4R_KEY0 = ARRAY_TO_BIGENDIAN([4]uint32{0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd})
var AES_GEN_4R_KEY1 = ARRAY_TO_BIGENDIAN([4]uint32{0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450})
var AES_GEN_4R_KEY2 = ARRAY_TO_BIGENDIAN([4]uint32{0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904})
var AES_GEN_4R_KEY3 = ARRAY_TO_BIGENDIAN([4]uint32{0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763})
var AES_GEN_4R_KEY4 = ARRAY_TO_BIGENDIAN([4]uint32{0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73})
var AES_GEN_4R_KEY5 = ARRAY_TO_BIGENDIAN([4]uint32{0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3})
var AES_GEN_4R_KEY6 = ARRAY_TO_BIGENDIAN([4]uint32{0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7})
var AES_GEN_4R_KEY7 = ARRAY_TO_BIGENDIAN([4]uint32{0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609})
// used to generate final program
func fillAes4Rx4(state_start []byte, output []byte) {
var states [4][4]uint32
for i := 0; i < 63; i += 4 {
states[i/16][(i%16)/4] = binary.BigEndian.Uint32(state_start[i:])
}
outptr := 0
for ; outptr < len(output); outptr += 64 {
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY0[:])
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY0[:])
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY4[:])
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY4[:])
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY1[:])
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY1[:])
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY5[:])
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY5[:])
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY2[:])
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY2[:])
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY6[:])
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY6[:])
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY3[:])
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY3[:])
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY7[:])
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY7[:])
// store bytes to output buffer
for i := 0; i < 63; i += 4 {
binary.BigEndian.PutUint32(output[outptr+i:], states[i/16][(i%16)/4])
}
}
}

226
config.go Normal file
View file

@ -0,0 +1,226 @@
package randomx
import "fmt"
import "encoding/binary"
import "golang.org/x/crypto/blake2b"
import _ "unsafe"
import _ "golang.org/x/crypto/argon2"
// see reference configuration.h
//Cache size in KiB. Must be a power of 2.
const RANDOMX_ARGON_MEMORY = 262144
//Number of Argon2d iterations for Cache initialization.
const RANDOMX_ARGON_ITERATIONS = 3
//Number of parallel lanes for Cache initialization.
const RANDOMX_ARGON_LANES = 1
//Argon2d salt
const RANDOMX_ARGON_SALT = "RandomX\x03"
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1;
//Number of random Cache accesses per Dataset item. Minimum is 2.
const RANDOMX_CACHE_ACCESSES = 8
//Target latency for SuperscalarHash (in cycles of the reference CPU).
const RANDOMX_SUPERSCALAR_LATENCY = 170
//Dataset base size in bytes. Must be a power of 2.
const RANDOMX_DATASET_BASE_SIZE = 2147483648
//Dataset extra size. Must be divisible by 64.
const RANDOMX_DATASET_EXTRA_SIZE = 33554368
//Number of instructions in a RandomX program. Must be divisible by 8.
const RANDOMX_PROGRAM_SIZE = 256
//Number of iterations during VM execution.
const RANDOMX_PROGRAM_ITERATIONS = 2048
//Number of chained VM executions per hash.
const RANDOMX_PROGRAM_COUNT = 8
//Scratchpad L3 size in bytes. Must be a power of 2.
const RANDOMX_SCRATCHPAD_L3 = 2097152
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
const RANDOMX_SCRATCHPAD_L2 = 262144
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
const RANDOMX_SCRATCHPAD_L1 = 16384
//Jump condition mask size in bits.
const RANDOMX_JUMP_BITS = 8
//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
const RANDOMX_JUMP_OFFSET = 8
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
const ArgonBlockSize uint32 = 1024
const SuperscalarMaxSize int = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64
const CacheLineSize uint64 = RANDOMX_DATASET_ITEM_SIZE
const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3
const CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & (^(CacheLineSize - 1))
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(ArgonBlockSize)
const ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / 8
const ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / 8
const ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / 8
const ScratchpadL1Mask = (ScratchpadL1 - 1) * 8
const ScratchpadL2Mask = (ScratchpadL2 - 1) * 8
const ScratchpadL1Mask16 = (ScratchpadL1/2 - 1) * 16
const ScratchpadL2Mask16 = (ScratchpadL2/2 - 1) * 16
const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
const STOREL3CONDITION = 14
const REGISTERSCOUNT = 8
const REGISTERCOUNTFLT = 4
const mantissaSize = 52
const exponentSize = 11
const mantissaMask = (uint64(1) << mantissaSize) - 1
const exponentMask = (uint64(1) << exponentSize) - 1
const exponentBias = 1023
const dynamicExponentBits = 4
const staticExponentBits = 4
const constExponentBits uint64 = 0x300
const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1
const RANDOMX_FLAG_DEFAULT = 0
const RANDOMX_FLAG_JIT = 1
const RANDOMX_FLAG_LARGE_PAGES = 2
func isZeroOrPowerOf2(x uint64) bool {
return (x & (x - 1)) == 0
}
type Blake2Generator struct {
data [64]byte
dataindex int
}
func Init_Blake2Generator(key []byte, nonce uint32) *Blake2Generator {
var b Blake2Generator
b.dataindex = len(b.data)
if len(key) > 60 {
copy(b.data[:], key[0:60])
} else {
copy(b.data[:], key)
}
binary.LittleEndian.PutUint32(b.data[60:], nonce)
return &b
}
func (b *Blake2Generator) checkdata(bytesNeeded int) {
if b.dataindex+bytesNeeded > cap(b.data) {
//blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
h := blake2b.Sum512(b.data[:])
copy(b.data[:], h[:])
b.dataindex = 0
}
}
func (b *Blake2Generator) GetByte() byte {
b.checkdata(1)
ret := b.data[b.dataindex]
fmt.Printf("returning byte %02x\n", ret)
b.dataindex++
return ret
}
func (b *Blake2Generator) GetUint32() uint32 {
b.checkdata(4)
ret := uint32(binary.LittleEndian.Uint32(b.data[b.dataindex:]))
fmt.Printf("returning int32 %08x %08x\n", ret, binary.LittleEndian.Uint32(b.data[b.dataindex:]))
b.dataindex += 4
fmt.Printf("returning int32 %08x\n", ret)
if ret == 0xc5dac17e {
// panic("exiting")
}
return ret
}
type Randomx_Cache struct {
Blocks []block
Programs [RANDOMX_PROGRAM_COUNT]*SuperScalarProgram
}
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
return &Randomx_Cache{}
}
func (cache *Randomx_Cache) Randomx_init_cache(key []byte) {
fmt.Printf("appending null byte is not necessary but only done for testing")
kkey := append([]byte{}, key...)
//kkey = append(kkey,0)
//cache->initialize(cache, key, keySize);
cache.Blocks = buildBlocks(argon2d, kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
}
// fetch a 64 byte block in uint64 form
func (cache *Randomx_Cache) GetBlock(addr uint64, out []uint64) {
mask := CacheSize/CacheLineSize - 1
addr = (addr & mask) * CacheLineSize
block := addr / 1024
index_within_block := (addr % 1024) / 8
copy(out, cache.Blocks[block][index_within_block:])
}
// some constants for argon
const (
argon2d = iota
argon2i
argon2id
)
type block [128]uint64
const syncPoints = 4
//go:linkname argon2_initHash golang.org/x/crypto/argon2.initHash
func argon2_initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
//go:linkname argon2_initBlocks golang.org/x/crypto/argon2.initBlocks
func argon2_initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block
//go:linkname argon2_processBlocks golang.org/x/crypto/argon2.processBlocks
func argon2_processBlocks(B []block, time, memory, threads uint32, mode int)
func buildBlocks(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []block {
if time < 1 {
panic("argon2: number of rounds too small")
}
if threads < 1 {
panic("argon2: parallelism degree too low")
}
h0 := argon2_initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
if memory < 2*syncPoints*uint32(threads) {
memory = 2 * syncPoints * uint32(threads)
}
B := argon2_initBlocks(&h0, memory, uint32(threads))
argon2_processBlocks(B, time, memory, uint32(threads), mode)
return B
//return extractKey(B, memory, uint32(threads), keyLen)
}

65
example.go Normal file
View file

@ -0,0 +1,65 @@
//+build ignore
package main
import "randomx"
import "fmt"
func main() {
c := randomx.Randomx_alloc_cache(0)
key := []byte("RandomX example key\x00")
myinput := []byte("RandomX example input\x00")
c.Randomx_init_cache(key)
nonce := uint32(0) //uint32(len(key))
gen := randomx.Init_Blake2Generator(key, nonce)
for i := 0; i < 8; i++ {
c.Programs[i] = randomx.Build_SuperScalar_Program(gen) // build a superscalar program
}
vm := c.VM_Initialize()
_ = fmt.Sprintf("t")
var output_hash [32]byte
vm.CalculateHash(myinput, output_hash[:])
fmt.Printf("final output hash %x\n", output_hash)
vm.CalculateHash(myinput, output_hash[:])
fmt.Printf("final output hash %x\n", output_hash)
/*
fmt.Printf("cache blocks %d block size %d %+v\n", len(c.Blocks), len(c.Blocks[0]), c.Blocks[0])
register_value := uint64(0x70c13c)
mask := randomx.CacheSize / randomx.CacheLineSize - 1;
address := (register_value&mask)* randomx.CacheLineSize
var block [8]uint64
c.GetBlock(address,block[:])
for i := range block{
fmt.Printf("%d %16x\n", i, block[i])
}
//block := address / 1024
//index_within_block := (address % 1024) / 8
//fmt.Printf("mask %x address %x block %d index_within_block %d data %16x\n",mask, address, block, index_within_block,c.Blocks[block][index_within_block])
/*
for i := range c.Blocks[block]{
fmt.Printf("%3d %16x\n", i,c.Blocks[block][i])
}
*/
//c.InitDatasetItem(nil,0x70c13c)
}

1
randomx.go Normal file
View file

@ -0,0 +1 @@
package randomx

42
randomx_test.go Normal file
View file

@ -0,0 +1,42 @@
package randomx
import "fmt"
import "testing"
func Test_Randomx(t *testing.T) {
var Tests = []struct {
key []byte // key
input []byte // input
expected string // expected result
}{
{[]byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
{[]byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"}, // test a
// {[]byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969" }, // test b
{[]byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"}, // test c
{[]byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"}, // test d
}
c := Randomx_alloc_cache(0)
for _, tt := range Tests {
c.Randomx_init_cache(tt.key)
nonce := uint32(0) //uint32(len(key))
gen := Init_Blake2Generator(tt.key, nonce)
for i := 0; i < 8; i++ {
c.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
}
vm := c.VM_Initialize()
var output_hash [32]byte
vm.CalculateHash(tt.input, output_hash[:])
actual := fmt.Sprintf("%x", output_hash)
if actual != tt.expected {
t.Errorf("Fib(%d): expected %s, actual %s", tt.key, tt.expected, actual)
}
}
}

998
superscalar.go Normal file
View file

@ -0,0 +1,998 @@
package randomx
import "fmt"
import "math"
import "math/bits"
type ExecutionPort byte
const (
Null ExecutionPort = iota
P0 = 1
P1 = 2
P5 = 4
P01 = P0 | P1
P05 = P0 | P5
P015 = P0 | P1 | P5
)
type MacroOP struct {
Name string
Size int
Latency int
UOP1 ExecutionPort
UOP2 ExecutionPort
Dependent bool
}
func (m *MacroOP) GetSize() int {
return m.Size
}
func (m *MacroOP) GetLatency() int {
return m.Latency
}
func (m *MacroOP) GetUOP1() ExecutionPort {
return m.UOP1
}
func (m *MacroOP) GetUOP2() ExecutionPort {
return m.UOP2
}
func (m *MacroOP) IsSimple() bool {
return m.UOP2 == Null
}
func (m *MacroOP) IsEliminated() bool {
return m.UOP1 == Null
}
func (m *MacroOP) IsDependent() bool {
return m.Dependent
}
// 3 byte instructions
var M_NOP = MacroOP{"NOP", 0, 0, Null, Null, false}
var M_Add_rr = MacroOP{"add r,r", 3, 1, P015, Null, false}
var M_Sub_rr = MacroOP{"sub r,r", 3, 1, P015, Null, false}
var M_Xor_rr = MacroOP{"xor r,r", 3, 1, P015, Null, false}
var M_Imul_r = MacroOP{"imul r", 3, 4, P1, P5, false}
var M_Mul_r = MacroOP{"mul r", 3, 4, P1, P5, false}
var M_Mov_rr = MacroOP{"mov r,r", 3, 0, Null, Null, false}
// latency is 1 lower
var M_Imul_r_dependent = MacroOP{"imul r", 3, 3, P1, Null, true} // this is the dependent version where current instruction depends on previous instruction
//Size: 4 bytes
var M_Lea_SIB = MacroOP{"lea r,r+r*s", 4, 1, P01, Null, false}
var M_Imul_rr = MacroOP{"imul r,r", 4, 3, P1, Null, false}
var M_Ror_ri = MacroOP{"ror r,i", 4, 1, P05, Null, false}
//Size: 7 bytes (can be optionally padded with nop to 8 or 9 bytes)
var M_Add_ri = MacroOP{"add r,i", 7, 1, P015, Null, false}
var M_Xor_ri = MacroOP{"xor r,i", 7, 1, P015, Null, false}
//Size: 10 bytes
var M_Mov_ri64 = MacroOP{"mov rax,i64", 10, 1, P015, Null, false}
// unused are not implemented
type Instruction struct {
Name string
Opcode byte
UOP MacroOP
SrcOP int
ResultOP int
DstOP int
UOP_Array []MacroOP
}
func (ins *Instruction) GetUOPCount() int {
if len(ins.UOP_Array) != 0 {
return len(ins.UOP_Array)
} else {
if ins.Name == "NOP" { // nop is assumed to be zero bytes
return 0
}
return 1
}
}
func (ins *Instruction) GetSize() int {
if len(ins.UOP_Array) != 0 {
sum_size := 0
for i := range ins.UOP_Array {
sum_size += ins.UOP_Array[i].GetSize()
}
return sum_size
} else {
return ins.UOP.GetSize()
}
}
func (ins *Instruction) IsSimple() bool {
if ins.GetSize() == 1 {
return true
}
return false
}
func (ins *Instruction) GetLatency() int {
if len(ins.UOP_Array) != 0 {
sum := 0
for i := range ins.UOP_Array {
sum += ins.UOP_Array[i].GetLatency()
}
return sum
} else {
return ins.UOP.GetLatency()
}
}
const (
S_INVALID int = -1
S_ISUB_R = 0
S_IXOR_R = 1
S_IADD_RS = 2
S_IMUL_R = 3
S_IROR_C = 4
S_IADD_C7 = 5
S_IXOR_C7 = 6
S_IADD_C8 = 7
S_IXOR_C8 = 8
S_IADD_C9 = 9
S_IXOR_C9 = 10
S_IMULH_R = 11
S_ISMULH_R = 12
S_IMUL_RCP = 13
)
var Opcode_To_String = map[int]string{S_INVALID: "INVALID",
S_ISUB_R: "ISUB_R",
S_IXOR_R: "IXOR_R",
S_IADD_RS: "IADD_RS",
S_IMUL_R: "IMUL_R",
S_IROR_C: "IROR_C",
S_IADD_C7: "IADD_C7",
S_IXOR_C7: "IXOR_C7",
S_IADD_C8: "IADD_C8",
S_IXOR_C8: "IXOR_C8",
S_IADD_C9: "IADD_C9",
S_IXOR_C9: "IXOR_C9",
S_IMULH_R: "IMULH_R",
S_ISMULH_R: "ISMULH_R",
S_IMUL_RCP: "IMUL_RCP",
}
// SrcOP/DstOp are used to selected registers
var ISUB_R = Instruction{Name: "ISUB_R", Opcode: S_ISUB_R, UOP: M_Sub_rr, SrcOP: 0}
var IXOR_R = Instruction{Name: "IXOR_R", Opcode: S_IXOR_R, UOP: M_Xor_rr, SrcOP: 0}
var IADD_RS = Instruction{Name: "IADD_RS", Opcode: S_IADD_RS, UOP: M_Lea_SIB, SrcOP: 0}
var IMUL_R = Instruction{Name: "IMUL_R", Opcode: S_IMUL_R, UOP: M_Imul_rr, SrcOP: 0}
var IROR_C = Instruction{Name: "IROR_C", Opcode: S_IROR_C, UOP: M_Ror_ri, SrcOP: -1}
var IADD_C7 = Instruction{Name: "IADD_C7", Opcode: S_IADD_C7, UOP: M_Add_ri, SrcOP: -1}
var IXOR_C7 = Instruction{Name: "IXOR_C7", Opcode: S_IXOR_C7, UOP: M_Xor_ri, SrcOP: -1}
var IADD_C8 = Instruction{Name: "IADD_C8", Opcode: S_IADD_C8, UOP: M_Add_ri, SrcOP: -1}
var IXOR_C8 = Instruction{Name: "IXOR_C8", Opcode: S_IXOR_C8, UOP: M_Xor_ri, SrcOP: -1}
var IADD_C9 = Instruction{Name: "IADD_C9", Opcode: S_IADD_C9, UOP: M_Add_ri, SrcOP: -1}
var IXOR_C9 = Instruction{Name: "IXOR_C9", Opcode: S_IXOR_C9, UOP: M_Xor_ri, SrcOP: -1}
var IMULH_R = Instruction{Name: "IMULH_R", Opcode: S_IMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Mul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
var ISMULH_R = Instruction{Name: "ISMULH_R", Opcode: S_ISMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Imul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
var IMUL_RCP = Instruction{Name: "IMUL_RCP", Opcode: S_IMUL_RCP, UOP_Array: []MacroOP{M_Mov_ri64, M_Imul_r_dependent}, ResultOP: 1, DstOP: 1, SrcOP: -1}
var INOP = Instruction{Name: "NOP", UOP: M_NOP}
// how random 16 bytes are split into instructions
var buffer0 = []int{4, 8, 4}
var buffer1 = []int{7, 3, 3, 3}
var buffer2 = []int{3, 7, 3, 3}
var buffer3 = []int{4, 9, 3}
var buffer4 = []int{4, 4, 4, 4}
var buffer5 = []int{3, 3, 10}
var Decoder_To_Instruction_Length = [][]int{{4, 8, 4},
{7, 3, 3, 3},
{3, 7, 3, 3},
{4, 9, 3},
{4, 4, 4, 4},
{3, 3, 10}}
type DecoderType int
const Decoder484 DecoderType = 0
const Decoder7333 DecoderType = 1
const Decoder3733 DecoderType = 2
const Decoder493 DecoderType = 3
const Decoder4444 DecoderType = 4
const Decoder3310 DecoderType = 5
func (d DecoderType) GetSize() int {
switch d {
case Decoder484:
return 3
case Decoder7333:
return 4
case Decoder3733:
return 4
case Decoder493:
return 3
case Decoder4444:
return 4
case Decoder3310:
return 3
default:
panic("unknown decoder")
}
}
func (d DecoderType) String() string {
switch d {
case Decoder484:
return "Decoder484"
case Decoder7333:
return "Decoder7333"
case Decoder3733:
return "Decoder3733"
case Decoder493:
return "Decoder493"
case Decoder4444:
return "Decoder4444"
case Decoder3310:
return "Decoder3310"
default:
panic("unknown decoder")
}
}
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Generator) DecoderType {
if ins.Name == IMULH_R.Name || ins.Name == ISMULH_R.Name {
return Decoder3310
}
// make sure multiplication port is satured, if number of multiplications les less than number of cycles, a 4444 is returned
if mulcount < (cycle + 1) {
return Decoder4444
}
if ins.Name == IMUL_RCP.Name {
if gen.GetByte()&1 == 1 {
return Decoder484
} else {
return Decoder493
}
}
// we are here means selecta decoded randomly
rnd_byte := gen.GetByte()
switch rnd_byte & 3 {
case 0:
return Decoder484
case 1:
return Decoder7333
case 2:
return Decoder3733
case 3:
return Decoder493
}
panic("can never reach")
return Decoder484
}
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
var slot10 = []*Instruction{&IMUL_RCP}
// superscalar program is built with superscalara instructions
type SuperScalarInstruction struct {
Opcode byte
Dst_Reg int
Src_Reg int
Mod byte
Imm32 uint32
Type int
Name string
OpGroup int
OpGroupPar int
GroupParIsSource int
ins *Instruction
CanReuse bool
}
func (sins SuperScalarInstruction) String() string {
result := fmt.Sprintf("; %10s %2d ", sins.Name, sins.Opcode)
result += fmt.Sprintf("dst r%d ", sins.Dst_Reg)
if sins.Src_Reg >= 0 {
result += fmt.Sprintf("src r%d ", sins.Src_Reg)
} else {
result += fmt.Sprintf("src r%d ", sins.Dst_Reg)
}
result += fmt.Sprintf("Mod %08x ", sins.Mod)
result += fmt.Sprintf("Imm %08x ", sins.Imm32)
return result
}
func (sins *SuperScalarInstruction) FixSrcReg() {
if sins.Src_Reg >= 0 {
// do nothing
} else {
sins.Src_Reg = sins.Dst_Reg
}
}
func (sins *SuperScalarInstruction) Reset() {
sins.Opcode = 99
sins.Src_Reg = -1
sins.Dst_Reg = -1
sins.CanReuse = false
sins.GroupParIsSource = 0
}
func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator) {
sins.Reset()
sins.ins = ins
sins.Name = ins.Name
sins.OpGroupPar = -1
sins.Opcode = ins.Opcode
switch ins.Name {
case ISUB_R.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case IXOR_R.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IXOR_R
sins.GroupParIsSource = 1
case IADD_RS.Name:
fmt.Printf("q %s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = gen.GetByte()
sins.Imm32 = 0
sins.OpGroup = S_IADD_RS
sins.GroupParIsSource = 1
case IMUL_R.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMUL_R
sins.GroupParIsSource = 1
case IROR_C.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = 0
for sins.Imm32 = 0; sins.Imm32 == 0; {
sins.Imm32 = uint32(gen.GetByte() & 63)
}
sins.OpGroup = S_IROR_C
sins.OpGroupPar = -1
case IADD_C7.Name, IADD_C8.Name, IADD_C9.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IADD_C7
sins.OpGroupPar = -1
case IXOR_C7.Name, IXOR_C8.Name, IXOR_C9.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = 0
sins.Imm32 = gen.GetUint32()
sins.OpGroup = S_IXOR_C7
sins.OpGroupPar = -1
case IMULH_R.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_IMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case ISMULH_R.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.CanReuse = true
sins.Mod = 0
sins.Imm32 = 0
sins.OpGroup = S_ISMULH_R
sins.OpGroupPar = int(gen.GetUint32())
case IMUL_RCP.Name:
fmt.Printf("%s \n", ins.Name)
sins.Name = ins.Name
sins.Mod = 0
for {
sins.Imm32 = gen.GetUint32()
if (sins.Imm32&sins.Imm32 - 1) != 0 {
break
}
}
sins.OpGroup = S_IMUL_RCP
default:
fmt.Printf("%s \n", ins.Name)
panic("should not occur")
}
}
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Generator, instruction_len int, decoder_type int, islast, isfirst bool) {
fmt.Printf("instruction len %d\n", instruction_len)
switch instruction_len {
case 3:
if islast {
create(sins, slot3L[gen.GetByte()&3], gen)
} else {
create(sins, slot3[gen.GetByte()&1], gen)
}
case 4:
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
if decoder_type == int(Decoder4444) && !islast {
create(sins, &IMUL_R, gen)
} else {
create(sins, slot4[gen.GetByte()&1], gen)
}
case 7:
create(sins, slot7[gen.GetByte()&1], gen)
case 8:
fmt.Printf("creating 8\n")
create(sins, slot8[gen.GetByte()&1], gen)
case 9:
create(sins, slot9[gen.GetByte()&1], gen)
case 10:
create(sins, slot10[0], gen)
default:
panic("should not be possible")
}
}
type SuperScalarProgram struct {
Ins []SuperScalarInstruction // all instructions of program
AddressReg int
}
func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
cycle := 0
depcycle := 0
retire_cycle := 0
mulcount := 0
ports_saturated := false
program_size := 0
current_instruction := INOP
macro_op_index := 0
macro_op_count := 0
throwAwayCount := 0
code_size := 0
var program SuperScalarProgram
registers := make([]Register, 8, 8)
sins := &SuperScalarInstruction{}
sins.ins = &Instruction{Name: "NOP"}
portbusy := make([][]int, CYCLE_MAP_SIZE)
for i := range portbusy {
portbusy[i] = make([]int, 3)
}
done := 0
for decode_cycle := 0; decode_cycle < RANDOMX_SUPERSCALAR_LATENCY && !ports_saturated && program_size < SuperscalarMaxSize; decode_cycle++ {
decoder := FetchNextDecoder(sins.ins, decode_cycle, mulcount, gen)
fmt.Printf("; ------------- fetch cycle %d (%s)\n", cycle, decoder)
if cycle == 51 {
// break
}
/* for i := range portbusy {
for j := range portbusy[i]{
portbusy[i][j]=false
}
}*/
buffer_index := 0
for buffer_index < decoder.GetSize() { // generate instructions for the current decoder
top_cycle := cycle
fmt.Printf("macro_op_index %d current_instruction %s actual instruction uop %d\n", macro_op_index, current_instruction.Name, sins.ins.GetUOPCount())
if macro_op_index >= sins.ins.GetUOPCount() {
if ports_saturated || program_size >= SuperscalarMaxSize {
//panic("breaking off") program built successfully
break
}
CreateSuperScalarInstruction(sins, gen, Decoder_To_Instruction_Length[int(decoder)][buffer_index], int(decoder), len(Decoder_To_Instruction_Length[decoder]) == (buffer_index+1), buffer_index == 0)
macro_op_index = 0
}
mop := sins.ins.UOP
if sins.ins.GetUOPCount() == 1 {
} else {
mop = sins.ins.UOP_Array[macro_op_index]
}
fmt.Printf("MOP name %s depcycle %d\n", mop.Name, depcycle)
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
scheduleCycle := ScheduleMop(&mop, portbusy, cycle, depcycle, false)
if scheduleCycle < 0 {
fmt.Printf("Unable to map operation %s to execution port (cycle %d)", mop.Name, cycle)
//__debugbreak();
ports_saturated = true
break
}
fmt.Printf("scheduleCycle %d\n", scheduleCycle)
if macro_op_index == sins.ins.SrcOP { // FIXME
forward := 0
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(scheduleCycle, registers, gen); forward++ {
fmt.Printf(";src STALL at cycle %d\n", cycle)
scheduleCycle++
cycle++
}
if forward == LOOK_FORWARD_CYCLES {
if throwAwayCount < MAX_THROWAWAY_COUNT {
throwAwayCount++
macro_op_index = sins.ins.GetUOPCount()
fmt.Printf(";throwAway %s\n", sins.Name)
continue
}
fmt.Printf("aborting at cycle %d source registers not available", cycle)
break
}
fmt.Printf("; src = r%d\n", sins.Src_Reg)
}
if macro_op_index == sins.ins.DstOP { // FIXME
forward := 0
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); forward++ {
fmt.Printf(";dst STALL at cycle %d\n", cycle)
scheduleCycle++
cycle++
}
if forward == LOOK_FORWARD_CYCLES {
if throwAwayCount < MAX_THROWAWAY_COUNT {
throwAwayCount++
macro_op_index = sins.ins.GetUOPCount()
fmt.Printf(";throwAway %s\n", sins.Name)
continue
}
fmt.Printf("aborting at cycle %d destination registers not available", cycle)
break
}
fmt.Printf("; dst = r%d\n", sins.Dst_Reg)
}
throwAwayCount = 0
// recalculate when the instruction can be scheduled based on operand availability
scheduleCycle = ScheduleMop(&mop, portbusy, scheduleCycle, scheduleCycle, true)
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
if macro_op_index == sins.ins.ResultOP { // fix me
retire_cycle = depcycle
fmt.Printf("; RETIRED at cycle %d Dst_Reg %d\n", retire_cycle, sins.Dst_Reg)
registers[sins.Dst_Reg].Latency = depcycle
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar
}
code_size += mop.GetSize()
buffer_index++
macro_op_index++
macro_op_count++
// terminating condition for 99% case
if scheduleCycle >= RANDOMX_SUPERSCALAR_LATENCY {
ports_saturated = true
}
cycle = top_cycle
// when all uops of current instruction have been issued, add the instruction to supercalara program
if macro_op_index >= sins.ins.GetUOPCount() {
sins.FixSrcReg() // fix src register once and for all
program.Ins = append(program.Ins, *sins)
if sins.ins.Name == "IMUL_R" || sins.ins.Name == "IMULH_R" || sins.ins.Name == "ISMULH_R" || sins.ins.Name == "IMUL_RCP" {
mulcount++
}
}
done++
// if done >= 20 {break}
}
cycle++
}
for i := range program.Ins {
fmt.Printf("%d %s\n", i, program.Ins[i].String())
}
var asic_latencies [8]int
for i := range program.Ins {
//fmt.Printf("%d %s\n",i ,program[i].String() )
lastdst := asic_latencies[program.Ins[i].Dst_Reg] + 1
lastsrc := 0
if program.Ins[i].Dst_Reg != program.Ins[i].Src_Reg {
lastsrc = asic_latencies[program.Ins[i].Src_Reg] + 1
}
asic_latencies[program.Ins[i].Dst_Reg] = Max(lastdst, lastsrc)
}
asic_latency_max := 0
address_reg := 0
for i := range asic_latencies {
fmt.Printf("latency[%d] %d\n", i, asic_latencies[i])
if asic_latencies[i] > asic_latency_max {
asic_latency_max = asic_latencies[i]
address_reg = i
}
}
program.AddressReg = address_reg
fmt.Printf("address_reg %d\n", address_reg)
return &program
}
const CYCLE_MAP_SIZE int = RANDOMX_SUPERSCALAR_LATENCY + 4
const LOOK_FORWARD_CYCLES int = 4
const MAX_THROWAWAY_COUNT int = 256
// schedule the uop as early as possible
func ScheduleUop(uop ExecutionPort, portbusy [][]int, cycle int, commit bool) int {
//cycle++
for ; cycle < CYCLE_MAP_SIZE; cycle++ { // since cycle is value based, its restored on return
//fmt.Printf("port busy %+v\n", portbusy[cycle])
fmt.Printf("current cycle %d portbusy %+v commit %+v\n", cycle, portbusy[cycle], commit)
if (uop&P5) != 0 && portbusy[cycle][2] == 0 {
if commit {
fmt.Printf("; P5 at cycle %d\n", cycle)
portbusy[cycle][2] = int(uop)
}
fmt.Printf("P5 available\n")
return cycle
}
if (uop&P0) != 0 && portbusy[cycle][0] == 0 {
if commit {
fmt.Printf("; P0 at cycle %d\n", cycle)
portbusy[cycle][0] = int(uop)
}
fmt.Printf("P0 available\n")
return cycle
}
if (uop&P1) != 0 && portbusy[cycle][1] == 0 {
if commit {
fmt.Printf("; P1 at cycle %d\n", cycle)
portbusy[cycle][1] = int(uop)
}
fmt.Printf("P1 available\n")
return cycle
}
}
return -1
}
func ScheduleMop(mop *MacroOP, portbusy [][]int, cycle int, depcycle int, commit bool) int {
if mop.IsDependent() {
fmt.Printf("dependent\n")
cycle = Max(cycle, depcycle)
}
if mop.IsEliminated() {
if commit {
fmt.Printf("; (eliminated)\n")
}
return cycle
} else if mop.IsSimple() {
fmt.Printf("simple 1\n")
return ScheduleUop(mop.GetUOP1(), portbusy, cycle, commit)
} else {
for ; cycle < CYCLE_MAP_SIZE; cycle++ { // since cycle is value based, its restored on return
cycle1 := ScheduleUop(mop.GetUOP1(), portbusy, cycle, false)
cycle2 := ScheduleUop(mop.GetUOP2(), portbusy, cycle, false)
if cycle1 == cycle2 {
if commit {
ScheduleUop(mop.GetUOP1(), portbusy, cycle, true)
ScheduleUop(mop.GetUOP2(), portbusy, cycle, true)
}
return cycle1
}
}
}
return -1
}
// Max returns the larger of x or y.
func Max(x, y int) int {
if x < y {
return y
}
return x
}
type Register struct {
Value uint64
Latency int
LastOpGroup int
LastOpPar int //-1 = immediate , 0 to 7 register
Status int // can be RegisterNeedsDisplacement = 5; //x86 r13 register
//RegisterNeedsSib = 4; //x86 r12 register
}
const RegisterNeedsDisplacement = 5
const RegisterNeedsSib = 4
func (sins *SuperScalarInstruction) SelectSource(cycle int, Registers []Register, gen *Blake2Generator) bool {
var available_registers []int
for i := range Registers {
fmt.Printf("\nchecking s reg %d latency %d cycle %d", i, Registers[i].Latency, cycle)
if Registers[i].Latency <= cycle {
available_registers = append(available_registers, i)
fmt.Printf("available")
}
}
if len(available_registers) == 2 && sins.Name == "IADD_RS" {
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
sins.Src_Reg = RegisterNeedsDisplacement
sins.OpGroupPar = sins.Src_Reg
return true
}
}
if selectRegister(available_registers, gen, &sins.Src_Reg) {
if sins.GroupParIsSource == 0 {
} else {
sins.OpGroupPar = sins.Src_Reg
}
return true
}
return false
}
func (sins *SuperScalarInstruction) SelectDestination(cycle int, allowChainedMul bool, Registers []Register, gen *Blake2Generator) bool {
var available_registers []int
for i := range Registers {
fmt.Printf("\nchecking d reg %d cycle %d CanReuse %+v src %d latency %d chained_mul %+v | ", i, cycle, sins.CanReuse, sins.Src_Reg, Registers[i].Latency, allowChainedMul)
fmt.Printf("%+v %+v %+v %+v %+v ", Registers[i].Latency <= cycle,
(sins.CanReuse || i != sins.Src_Reg),
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R),
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar),
(sins.Name != "IADD_RS" || i != RegisterNeedsDisplacement))
//fmt.Printf("qq %+v %+v %+v qq",allowChainedMul, sins.OpGroup != S_IMUL_R, Registers[i].LastOpGroup != S_IMUL_R )
fmt.Printf("yy %+v %+v yy ", Registers[i].LastOpPar, sins.OpGroupPar)
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src_Reg) &&
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
(sins.Name != "IADD_RS" || i != RegisterNeedsDisplacement) {
available_registers = append(available_registers, i)
fmt.Printf("available ")
}
}
return selectRegister(available_registers, gen, &sins.Dst_Reg)
}
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
index := 0
if len(available_registers) == 0 {
return false
}
if len(available_registers) > 1 {
tmp := gen.GetUint32()
// fmt.Printf("GetUint32 %d len %d \n", tmp,uint32(len(available_registers)))
index = int(tmp % uint32(len(available_registers)))
} else {
index = 0
}
fmt.Printf("reg index %d\n", index)
*reg = available_registers[index] // availableRegisters[index];
return true
}
const Mask = CacheSize/CacheLineSize - 1
func getMixBlock(register_value uint64, memory []byte) uint64 {
return (register_value * Mask) * CacheLineSize
}
const superscalarMul0 uint64 = 6364136223846793005
const superscalarAdd1 uint64 = 9298411001130361340
const superscalarAdd2 uint64 = 12065312585734608966
const superscalarAdd3 uint64 = 9306329213124626780
const superscalarAdd4 uint64 = 5281919268842080866
const superscalarAdd5 uint64 = 10536153434571861004
const superscalarAdd6 uint64 = 3398623926847679864
const superscalarAdd7 uint64 = 9549104520008361294
func (cache *Randomx_Cache) InitDatasetItem(out []uint64, itemnumber uint64) {
var rl_array, mix_array [8]uint64
rl := rl_array[:]
mix_block := mix_array[:]
register_value := itemnumber
_ = register_value
rl[0] = (itemnumber + 1) * superscalarMul0
rl[1] = rl[0] ^ superscalarAdd1
rl[2] = rl[0] ^ superscalarAdd2
rl[3] = rl[0] ^ superscalarAdd3
rl[4] = rl[0] ^ superscalarAdd4
rl[5] = rl[0] ^ superscalarAdd5
rl[6] = rl[0] ^ superscalarAdd6
rl[7] = rl[0] ^ superscalarAdd7
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
//mix_block_index := getMixBlock(register_value,nil)
cache.Programs[i].executeSuperscalar_nocache(rl)
cache.GetBlock(register_value, mix_block)
for q := range rl {
// fmt.Printf("%d rl[%d] %16x mix %16x\n",i, q,rl[q], mix_block[q])
rl[q] ^= mix_block[q]
}
register_value = rl[cache.Programs[i].AddressReg]
// fmt.Printf("%d\n",i)
}
for q := range rl {
out[q] = rl[q]
}
}
func (cache *Randomx_Cache) initDataset(start_item, end_item uint64) {
for itemnumber := start_item; itemnumber < end_item; itemnumber++ {
cache.InitDatasetItem(nil, itemnumber)
// dataset_index += CacheLineSize
fmt.Printf("exiting dataset item\n")
break
}
}
// execute the superscalar program
func (p *SuperScalarProgram) executeSuperscalar_nocache(r []uint64) {
for _, ins := range p.Ins {
//fmt.Printf("%d %s\n",i ,program[i].String() )
switch ins.Opcode {
case S_ISUB_R:
r[ins.Dst_Reg] -= r[ins.Src_Reg]
case S_IXOR_R:
r[ins.Dst_Reg] ^= r[ins.Src_Reg]
case S_IADD_RS:
mod_shift := (ins.Mod >> 2) % 4 // bits 2-3
r[ins.Dst_Reg] += (r[ins.Src_Reg] << mod_shift)
case S_IMUL_R:
r[ins.Dst_Reg] *= r[ins.Src_Reg]
case S_IROR_C:
r[ins.Dst_Reg] = bits.RotateLeft64(r[ins.Dst_Reg], 0-int(ins.Imm32))
// panic("check rotate right is working fine")
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
r[ins.Dst_Reg] += signExtend2sCompl(ins.Imm32)
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
r[ins.Dst_Reg] ^= signExtend2sCompl(ins.Imm32)
case S_IMULH_R:
r[ins.Dst_Reg], _ = bits.Mul64(r[ins.Dst_Reg], r[ins.Src_Reg])
case S_ISMULH_R:
r[ins.Dst_Reg] = uint64(smulh(int64(r[ins.Dst_Reg]), int64(r[ins.Src_Reg])))
case S_IMUL_RCP:
r[ins.Dst_Reg] *= randomx_reciprocal(uint64(ins.Imm32))
default:
panic(fmt.Sprintf("unknown opcode %d", ins.Opcode))
}
}
}
func smulh(a, b int64) uint64 {
hi_, _ := bits.Mul64(uint64(a), uint64(b))
hi := int64(hi_)
if a < 0 {
hi -= b
}
if b < 0 {
hi -= a
}
return uint64(hi)
}
const p2exp63 uint64 = uint64(1) << 63
func randomx_reciprocal(divisor uint64) uint64 {
quotient := p2exp63 / divisor
remainder := p2exp63 % divisor
bsr := 0
for bit := divisor; bit > 0; bit = bit >> 1 {
bsr++
}
for shift := 0; shift < bsr; shift++ {
if remainder >= divisor-remainder {
quotient = quotient*2 + 1
remainder = remainder*2 - divisor
} else {
quotient = quotient * 2
remainder = remainder * 2
}
}
return quotient
}
func signExtend2sCompl(x uint32) uint64 {
if -1 == (^0) {
return uint64(int64(int32(x)))
} else if x > math.MaxInt32 {
return uint64(x) | 0xffffffff00000000
} else {
return uint64(x)
}
}

315
vm.go Normal file
View file

@ -0,0 +1,315 @@
package randomx
import "fmt"
import "math"
import "math/big"
import "math/bits"
import "encoding/binary"
import "golang.org/x/crypto/blake2b"
type REG struct {
Hi uint64
Lo uint64
}
type VM struct {
State_start [64]byte
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
Prog []byte
ScratchPad []byte
ByteCode [RANDOMX_PROGRAM_SIZE]InstructionByteCode
// program configuration see program.hpp
entropy [16]uint64
reg REGISTER_FILE // the register file
mem MemoryRegisters
config Config // configuration
datasetOffset uint64
RoundingMode big.RoundingMode
fresult, fdst, fsrc *big.Float
Cache *Randomx_Cache // randomx cache
}
func (cache *Randomx_Cache) VM_Initialize() *VM {
return &VM{Cache: cache, RoundingMode: big.ToNearestEven, fresult: &big.Float{}, fdst: &big.Float{}, fsrc: &big.Float{}} //// setup the cache
}
type Config struct {
eMask [2]uint64
readReg0, readReg1, readReg2, readReg3 uint64
}
type REGISTER_FILE struct {
r [8]uint64
f [4][2]float64
e [4][2]float64
a [4][2]float64
}
type MemoryRegisters struct {
mx, ma uint64 //addr_t mx, ma;
mempry uint64 // uint8_t* memory = nullptr;
}
const LOW = 0
const HIGH = 1
// calculate hash based on input
func (vm *VM) Run(input_hash []byte) {
var mix_block [8]uint64
fmt.Printf("%x \n", input_hash)
fillAes4Rx4(input_hash[:], vm.buffer[:])
for i := range vm.entropy {
vm.entropy[i] = binary.LittleEndian.Uint64(vm.buffer[i*8:])
}
vm.Prog = vm.buffer[len(vm.entropy)*8:]
for i := range vm.reg.r {
vm.reg.r[i] = 0
}
// do more initialization before we run
vm.reg.a[0][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[0]))
vm.reg.a[0][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[1]))
vm.reg.a[1][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[2]))
vm.reg.a[1][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[3]))
vm.reg.a[2][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[4]))
vm.reg.a[2][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[5]))
vm.reg.a[3][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[6]))
vm.reg.a[3][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[7]))
vm.mem.ma = vm.entropy[8] & CacheLineAlignMask
vm.mem.mx = vm.entropy[10]
addressRegisters := vm.entropy[12]
vm.config.readReg0 = 0 + (addressRegisters & 1)
addressRegisters >>= 1
vm.config.readReg1 = 2 + (addressRegisters & 1)
addressRegisters >>= 1
vm.config.readReg2 = 4 + (addressRegisters & 1)
addressRegisters >>= 1
vm.config.readReg3 = 6 + (addressRegisters & 1)
vm.datasetOffset = (vm.entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
vm.config.eMask[0] = getFloatMask(vm.entropy[14])
vm.config.eMask[1] = getFloatMask(vm.entropy[15])
fmt.Printf("prog %x entropy 0 %x %f \n", vm.buffer[:32], vm.entropy[0], vm.reg.a[0][HIGH])
vm.Compile_TO_Bytecode()
spAddr0 := vm.mem.mx
spAddr1 := vm.mem.ma
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
spMix := vm.reg.r[vm.config.readReg0] ^ vm.reg.r[vm.config.readReg1]
spAddr0 ^= spMix
spAddr0 &= ScratchpadL3Mask64
spAddr1 ^= spMix >> 32
spAddr1 &= ScratchpadL3Mask64
//fmt.Printf("spAddr0 %x %x\n", spAddr0,spAddr1)
for i := uint64(0); i < REGISTERSCOUNT; i++ {
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
//fmt.Printf("r[%d] %x \n", i,vm.reg.r[i]);
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.f[i][LOW] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*i)))
vm.reg.f[i][HIGH] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*i + 4)))
//fmt.Printf("lo %f %f\n", vm.reg.f[i][LOW] , vm.reg.f[i][HIGH] )
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.e[i][LOW] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*(i+REGISTERCOUNTFLT))))
vm.reg.e[i][HIGH] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*(i+REGISTERCOUNTFLT) + 4)))
// fmt.Printf("OR %x %x\n", (math.Float64bits(vm.reg.e[i][LOW]) & dynamicMantissaMask) | vm.config.eMask[LOW] , (math.Float64bits(vm.reg.e[i][HIGH]) & dynamicMantissaMask)| vm.config.eMask[HIGH] )
vm.reg.e[i][LOW] = math.Float64frombits((math.Float64bits(vm.reg.e[i][LOW]) & dynamicMantissaMask) | vm.config.eMask[LOW])
vm.reg.e[i][HIGH] = math.Float64frombits((math.Float64bits(vm.reg.e[i][HIGH]) & dynamicMantissaMask) | vm.config.eMask[HIGH])
//fmt.Printf("lo e %f %f\n", vm.reg.e[i][LOW] , vm.reg.e[i][HIGH] )
}
//for i := uint64(0); i < REGISTERCOUNTFLT; i++{
//fmt.Printf("a low %f high %f\n", vm.reg.a[i][LOW] , vm.reg.a[i][HIGH] )
//}
vm.InterpretByteCode()
vm.mem.mx ^= vm.reg.r[vm.config.readReg2] ^ vm.reg.r[vm.config.readReg3]
vm.mem.mx &= CacheLineAlignMask
//fmt.Printf("mx %x\n",vm.mem.mx )
// execute diffuser superscalar program to get dataset 64 bytes
{
itemnumber := (vm.datasetOffset + vm.mem.ma) / CacheLineSize
//fmt.Printf("qitem number %x\n", itemnumber)
vm.Cache.InitDatasetItem(mix_block[:], itemnumber)
for i := range vm.reg.r {
vm.reg.r[i] ^= mix_block[i]
}
}
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx // swap the elements
for i := uint64(0); i < REGISTERSCOUNT; i++ {
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr1+(8*i):], bits.RotateLeft64(vm.reg.r[i], 32))
//fmt.Printf("reg r[%d] %x\n", i,vm.reg.r[i])
}
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr0+(16*i):], bits.RotateLeft64(math.Float64bits(vm.reg.f[i][LOW]), 32))
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr0+(16*i)+8:], bits.RotateLeft64(math.Float64bits(vm.reg.f[i][HIGH]), 32))
// fmt.Printf("%d %+v\n", i, vm.reg.f[i])
}
spAddr0 = 0
spAddr1 = 0
}
}
func (vm *VM) CalculateHash(input []byte, output []byte) {
var buf [8]byte
vm.RoundingMode = big.ToNearestEven // reset rounding mode if new hash eing calculated
input_hash := blake2b.Sum512(input)
vm.ScratchPad = make([]byte, ScratchpadSize, ScratchpadSize) // calculate and fill scratchpad
fillAes1Rx4(input_hash[:], vm.ScratchPad)
hash512, _ := blake2b.New512(nil)
temp_hash := input_hash[:]
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
vm.Run(temp_hash)
hash512.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
hash512.Write(buf[:])
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
hash512.Write(buf[:])
}
for i := range vm.reg.a {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][LOW]))
hash512.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][HIGH]))
hash512.Write(buf[:])
}
temp_hash = hash512.Sum(nil)
fmt.Printf("%d temphash %x\n", chain, temp_hash)
}
// final loop executes here
vm.Run(temp_hash)
// now hash the scratch pad and place into register a
hashAes1Rx4(vm.ScratchPad, temp_hash)
hash256, _ := blake2b.New256(nil)
hash256.Reset()
for i := range vm.reg.r {
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
hash256.Write(buf[:])
}
for i := range vm.reg.f {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
hash256.Write(buf[:])
}
for i := range vm.reg.e {
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
hash256.Write(buf[:])
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
hash256.Write(buf[:])
}
// copy temp_hash as it first copied to register and then hashed
hash256.Write(temp_hash)
final_hash := hash256.Sum(nil)
copy(output, final_hash)
fmt.Printf("final %x\n", final_hash)
}
/*
const mantissaSize = 52;
const exponentSize = 11;
const mantissaMask = ( (uint64(1)) << mantissaSize) - 1;
const exponentMask = (uint64(1) << exponentSize) - 1;
const exponentBias = 1023;
const dynamicExponentBits = 4;
const staticExponentBits = 4;
const constExponentBits uint64= 0x300;
const dynamicMantissaMask = ( uint64(1) << (mantissaSize + dynamicExponentBits)) - 1;
*/
const mask22bit = (uint64(1) << 22) - 1
func getSmallPositiveFloatBits(entropy uint64) uint64 {
exponent := entropy >> 59 //0..31
mantissa := entropy & mantissaMask
exponent += exponentBias
exponent &= exponentMask
exponent = exponent << mantissaSize
return exponent | mantissa
}
func getStaticExponent(entropy uint64) uint64 {
exponent := constExponentBits
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
exponent <<= mantissaSize
return exponent
}
func getFloatMask(entropy uint64) uint64 {
return (entropy & mask22bit) | getStaticExponent(entropy)
}

860
vm_instruction.go Normal file
View file

@ -0,0 +1,860 @@
package randomx
import "fmt"
import "math"
import "math/big"
import "math/bits"
import "encoding/binary"
//reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding
var Zero uint64 = 0
// since go does not have union, use byte array
type VM_Instruction []byte // it is hardcode 8 bytes
func (ins VM_Instruction) IMM() uint32 {
return binary.LittleEndian.Uint32(ins[4:])
}
func (ins VM_Instruction) Mod() byte {
return ins[3]
}
func (ins VM_Instruction) Src() byte {
return ins[2]
}
func (ins VM_Instruction) Dst() byte {
return ins[1]
}
func (ins VM_Instruction) Opcode() byte {
return ins[0]
}
type VM_Instruction_Type int
const (
VM_IADD_RS VM_Instruction_Type = 0
VM_IADD_M VM_Instruction_Type = 1
VM_ISUB_R VM_Instruction_Type = 2
VM_ISUB_M VM_Instruction_Type = 3
VM_IMUL_R VM_Instruction_Type = 4
VM_IMUL_M VM_Instruction_Type = 5
VM_IMULH_R VM_Instruction_Type = 6
VM_IMULH_M VM_Instruction_Type = 7
VM_ISMULH_R VM_Instruction_Type = 8
VM_ISMULH_M VM_Instruction_Type = 9
VM_IMUL_RCP VM_Instruction_Type = 10
VM_INEG_R VM_Instruction_Type = 11
VM_IXOR_R VM_Instruction_Type = 12
VM_IXOR_M VM_Instruction_Type = 13
VM_IROR_R VM_Instruction_Type = 14
VM_IROL_R VM_Instruction_Type = 15
VM_ISWAP_R VM_Instruction_Type = 16
VM_FSWAP_R VM_Instruction_Type = 17
VM_FADD_R VM_Instruction_Type = 18
VM_FADD_M VM_Instruction_Type = 19
VM_FSUB_R VM_Instruction_Type = 20
VM_FSUB_M VM_Instruction_Type = 21
VM_FSCAL_R VM_Instruction_Type = 22
VM_FMUL_R VM_Instruction_Type = 23
VM_FDIV_M VM_Instruction_Type = 24
VM_FSQRT_R VM_Instruction_Type = 25
VM_CBRANCH VM_Instruction_Type = 26
VM_CFROUND VM_Instruction_Type = 27
VM_ISTORE VM_Instruction_Type = 28
VM_NOP VM_Instruction_Type = 29
)
var Names = map[VM_Instruction_Type]string{
VM_IADD_RS: "VM_IADD_RS",
VM_IADD_M: "VM_IADD_M",
VM_ISUB_R: "VM_ISUB_R",
VM_ISUB_M: "VM_ISUB_M",
VM_IMUL_R: "VM_IMUL_R",
VM_IMUL_M: "VM_IMUL_M",
VM_IMULH_R: "VM_IMULH_R",
VM_IMULH_M: "VM_IMULH_M",
VM_ISMULH_R: "VM_ISMULH_R",
VM_ISMULH_M: "VM_ISMULH_M",
VM_IMUL_RCP: "VM_IMUL_RCP",
VM_INEG_R: "VM_INEG_R",
VM_IXOR_R: "VM_IXOR_R",
VM_IXOR_M: "VM_IXOR_M",
VM_IROR_R: "VM_IROR_R",
VM_IROL_R: "VM_IROL_R",
VM_ISWAP_R: "VM_ISWAP_R",
VM_FSWAP_R: "VM_FSWAP_R",
VM_FADD_R: "VM_FADD_R",
VM_FADD_M: "VM_FADD_M",
VM_FSUB_R: "VM_FSUB_R",
VM_FSUB_M: "VM_FSUB_M",
VM_FSCAL_R: "VM_FSCAL_R",
VM_FMUL_R: "VM_FMUL_R",
VM_FDIV_M: "VM_FDIV_M",
VM_FSQRT_R: "VM_FSQRT_R",
VM_CBRANCH: "VM_CBRANCH",
VM_CFROUND: "VM_CFROUND",
VM_ISTORE: "VM_ISTORE",
VM_NOP: "VM_NOP",
}
// this will interpret single vm instruction
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
func (vm *VM) Compile_TO_Bytecode() {
var registerUsage [REGISTERSCOUNT]int
for i := range registerUsage {
registerUsage[i] = -1
}
for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ {
instr := VM_Instruction(vm.Prog[i*8:])
ibc := &vm.ByteCode[i]
opcode := instr.Opcode()
dst := instr.Dst() % REGISTERSCOUNT // bit shift optimization
src := instr.Src() % REGISTERSCOUNT
ibc.dst = dst
ibc.src = src
switch opcode {
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
// ibc.Opcode = VM_NOP; break; replace opcode by nop for testing
// fmt.Printf("VM_IADD_RS %d\n", opcode)
ibc.Opcode = VM_IADD_RS
ibc.idst = &vm.reg.r[dst]
if dst != RegisterNeedsDisplacement {
ibc.isrc = &vm.reg.r[src]
ibc.shift = uint16((instr.Mod() >> 2) % 4)
ibc.imm = 0
} else {
ibc.isrc = &vm.reg.r[src]
ibc.shift = uint16((instr.Mod() >> 2) % 4)
ibc.imm = signExtend2sCompl(instr.IMM())
}
registerUsage[dst] = i
case 16, 17, 18, 19, 20, 21, 22: // 7
//fmt.Printf("IADD_M opcode %d\n", opcode)
ibc.Opcode = VM_IADD_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
}
registerUsage[dst] = i
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
//fmt.Printf("ISUB_R opcode %d\n", opcode)
ibc.Opcode = VM_ISUB_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
}
registerUsage[dst] = i
case 39, 40, 41, 42, 43, 44, 45: // 7
//fmt.Printf("ISUB_M opcode %d\n", opcode)
ibc.Opcode = VM_ISUB_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
}
registerUsage[dst] = i
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
//fmt.Printf("IMUL_R opcode %d\n", opcode)
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
}
registerUsage[dst] = i
case 62, 63, 64, 65: //4
//fmt.Printf("IMUL_M opcode %d\n", opcode)
ibc.Opcode = VM_IMUL_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
}
registerUsage[dst] = i
case 66, 67, 68, 69: //4
//fmt.Printf("IMULH_R opcode %d\n", opcode)
ibc.Opcode = VM_IMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 70: //1
//fmt.Printf("IMULH_M opcode %d\n", opcode)
ibc.Opcode = VM_IMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
}
registerUsage[dst] = i
case 71, 72, 73, 74: //4
//fmt.Printf("ISMULH_R opcode %d\n", opcode)
ibc.Opcode = VM_ISMULH_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
case 75: //1
//fmt.Printf("ISMULH_M opcode %d\n", opcode)
ibc.Opcode = VM_ISMULH_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
}
registerUsage[dst] = i
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
//fmt.Printf("IMUL_RCP opcode %d\n", opcode)
divisor := uint64(instr.IMM())
if !isZeroOrPowerOf2(divisor) {
ibc.Opcode = VM_IMUL_R
ibc.idst = &vm.reg.r[dst]
ibc.imm = randomx_reciprocal(divisor)
ibc.isrc = &ibc.imm
registerUsage[dst] = i
} else {
ibc.Opcode = VM_NOP
}
case 84, 85: //2
//fmt.Printf("INEG_R opcode %d\n", opcode)
ibc.Opcode = VM_INEG_R
ibc.idst = &vm.reg.r[dst]
registerUsage[dst] = i
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
//fmt.Printf("IXOR_R opcode %d\n", opcode)
ibc.Opcode = VM_IXOR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
}
registerUsage[dst] = i
case 101, 102, 103, 104, 105: //5
//fmt.Printf("IXOR_M opcode %d\n", opcode)
ibc.Opcode = VM_IXOR_M
ibc.idst = &vm.reg.r[dst]
ibc.imm = signExtend2sCompl(instr.IMM())
if src != dst {
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.isrc = &Zero
ibc.memMask = ScratchpadL3Mask
}
registerUsage[dst] = i
case 106, 107, 108, 109, 110, 111, 112, 113: //8
//fmt.Printf("IROR_R opcode %d\n", opcode)
ibc.Opcode = VM_IROR_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
}
registerUsage[dst] = i
case 114, 115: // 2 IROL_R
//fmt.Printf("IROL_R opcode %d\n", opcode)
ibc.Opcode = VM_IROL_R
ibc.idst = &vm.reg.r[dst]
if src != dst {
ibc.isrc = &vm.reg.r[src]
} else {
ibc.imm = signExtend2sCompl(instr.IMM())
ibc.isrc = &ibc.imm // we are pointing within bytecode
}
registerUsage[dst] = i
case 116, 117, 118, 119: //4
//fmt.Printf("ISWAP_R opcode %d\n", opcode)
if src != dst {
ibc.Opcode = VM_ISWAP_R
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
registerUsage[dst] = i
registerUsage[src] = i
} else {
ibc.Opcode = VM_NOP
}
// below are floating point instructions
case 120, 121, 122, 123: // 4
//fmt.Printf("FSWAP_R opcode %d\n", opcode)
ibc.Opcode = VM_FSWAP_R
if dst < REGISTERCOUNTFLT {
ibc.fdst = &vm.reg.f[dst]
} else {
ibc.fdst = &vm.reg.e[dst-REGISTERCOUNTFLT]
}
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
//fmt.Printf("FADD_R opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.Opcode = VM_FADD_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 140, 141, 142, 143, 144: //5
//fmt.Printf("FADD_M opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Opcode = VM_FADD_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
//fmt.Printf("FSUB_R opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.Opcode = VM_FSUB_R
ibc.fdst = &vm.reg.f[dst]
ibc.fsrc = &vm.reg.a[src]
case 161, 162, 163, 164, 165: //5
//fmt.Printf("FSUB_M opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Opcode = VM_FSUB_M
ibc.fdst = &vm.reg.f[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
case 166, 167, 168, 169, 170, 171: //6
//fmt.Printf("FSCAL_R opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Opcode = VM_FSCAL_R
ibc.fdst = &vm.reg.f[dst]
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
//fmt.Printf("FMUL_R opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
src := instr.Src() % REGISTERCOUNTFLT
ibc.Opcode = VM_FMUL_R
ibc.fdst = &vm.reg.e[dst]
ibc.fsrc = &vm.reg.a[src]
case 204, 205, 206, 207: //4
//fmt.Printf("FDIV_M opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Opcode = VM_FDIV_M
ibc.fdst = &vm.reg.e[dst]
ibc.isrc = &vm.reg.r[src]
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
ibc.imm = signExtend2sCompl(instr.IMM())
case 208, 209, 210, 211, 212, 213: //6
//fmt.Printf("FSQRT_R opcode %d\n", opcode)
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
ibc.Opcode = VM_FSQRT_R
ibc.fdst = &vm.reg.e[dst]
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
//fmt.Printf("CBRANCH opcode %d\n", opcode)
ibc.Opcode = VM_CBRANCH
reg := instr.Dst() % REGISTERSCOUNT
ibc.isrc = &vm.reg.r[reg]
ibc.target = int16(registerUsage[reg])
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
//conditionmask := CONDITIONMASK << shift
ibc.imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
if CONDITIONOFFSET > 0 || shift > 0 {
ibc.imm &= (^(uint64(1) << (shift - 1)))
}
ibc.memMask = CONDITIONMASK << shift
for j := 0; j < REGISTERSCOUNT; j++ {
registerUsage[j] = i
}
case 239: //1
// ibc.Opcode = VM_NOP; break; // not supported
//fmt.Printf("CFROUND opcode %d\n", opcode)
ibc.Opcode = VM_CFROUND
ibc.isrc = &vm.reg.r[src]
ibc.imm = uint64(instr.IMM() & 63)
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
// ibc.Opcode = VM_NOP; break;
//fmt.Printf("ISTORE opcode %d\n", opcode)
ibc.Opcode = VM_ISTORE
ibc.idst = &vm.reg.r[dst]
ibc.isrc = &vm.reg.r[src]
ibc.imm = signExtend2sCompl(instr.IMM())
if (instr.Mod() >> 4) < STOREL3CONDITION {
if (instr.Mod() % 4) != 0 {
ibc.memMask = ScratchpadL1Mask
} else {
ibc.memMask = ScratchpadL2Mask
}
} else {
ibc.memMask = ScratchpadL3Mask
}
default:
panic("unreachable")
}
}
}
type InstructionByteCode struct {
dst, src byte
idst, isrc *uint64
fdst, fsrc *[2]float64
imm uint64
simm int64
Opcode VM_Instruction_Type
target int16
shift uint16
memMask uint32
RoundingMode big.RoundingMode
/*
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
int_reg_t* isrc;
rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
*/
}
func (ibc *InstructionByteCode) getScratchpadAddress() uint64 {
return (*ibc.isrc + ibc.imm) & uint64(ibc.memMask)
}
func (vm *VM) Load64(addr uint64) uint64 {
//return uint64(binary.BigEndian.Uint32(vm.ScratchPad[addr:]))| (uint64(binary.BigEndian.Uint32(vm.ScratchPad[addr+4:])) <<32)
return bits.RotateLeft64(binary.BigEndian.Uint64(vm.ScratchPad[addr:]), 32)
}
func (vm *VM) Load32(addr uint64) uint32 {
return binary.BigEndian.Uint32(vm.ScratchPad[addr:])
}
func unsigned32ToSigned2sCompl(x uint32) int32 {
if -1 == (^0) {
return int32(x)
} else {
if x > math.MaxInt32 {
return (-(int32(math.MaxUint32-x) - 1))
} else {
return int32(x)
}
}
}
func unsigned64ToSigned2sCompl(x uint64) int64 {
if -1 == (^0) {
return int64(x)
} else {
if x > math.MaxInt64 {
return (-(int64(math.MaxUint64-x) - 1))
} else {
return int64(x)
}
}
}
func (vm *VM) InterpretByteCode() {
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
ibc := &vm.ByteCode[pc]
//fmt.Printf("PCLOOP %d opcode %d %s dst %d src %d\n",pc,ibc.Opcode, Names[ibc.Opcode], ibc.dst, ibc.src)
switch ibc.Opcode {
case VM_IADD_RS:
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
//panic("VM_IADD_RS")
case VM_IADD_M:
*ibc.idst += vm.Load64(ibc.getScratchpadAddress())
//panic("VM_IADD_M")
case VM_ISUB_R:
*ibc.idst -= *ibc.isrc
//panic("VM_ISUB_R")
case VM_ISUB_M:
*ibc.idst -= vm.Load64(ibc.getScratchpadAddress())
//panic("VM_ISUB_M")
case VM_IMUL_R: // also handles imul_rcp
*ibc.idst *= *ibc.isrc
//panic("VM_IMUL_R")
case VM_IMUL_M:
*ibc.idst *= vm.Load64(ibc.getScratchpadAddress())
//panic("VM_IMUL_M")
case VM_IMULH_R:
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
// panic("VM_IMULH_R")
case VM_IMULH_M:
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadAddress()))
// fmt.Printf("%x \n",*ibc.idst )
// panic("VM_IMULH_M")
case VM_ISMULH_R:
*ibc.idst = uint64(smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc)))
// fmt.Printf("dst %x\n", *ibc.idst)
// panic("VM_ISMULH_R")
case VM_ISMULH_M:
*ibc.idst = uint64(smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(vm.Load64(ibc.getScratchpadAddress()))))
//fmt.Printf("%x \n",*ibc.idst )
// panic("VM_ISMULH_M")
case VM_INEG_R:
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
//panic("VM_INEG_R")
case VM_IXOR_R:
*ibc.idst ^= *ibc.isrc
case VM_IXOR_M:
*ibc.idst ^= vm.Load64(ibc.getScratchpadAddress())
//panic("VM_IXOR_M")
case VM_IROR_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
//panic("VM_IROR_R")
case VM_IROL_R:
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
case VM_ISWAP_R:
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
//fmt.Printf("%x %x\n",*ibc.idst, *ibc.isrc )
//panic("VM_ISWAP_R")
case VM_FSWAP_R:
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
// fmt.Printf("%+v \n",ibc.fdst )
// panic("VM_FSWAP_R")
case VM_FADD_R:
//ibc.fdst[LOW] += ibc.fsrc[LOW]
//ibc.fdst[HIGH] += ibc.fsrc[HIGH]
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[LOW])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(ibc.fsrc[LOW])
vm.fresult.Add(vm.fdst, vm.fsrc)
ibc.fdst[LOW], _ = vm.fresult.Float64()
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[HIGH])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(ibc.fsrc[HIGH])
vm.fresult.Add(vm.fdst, vm.fsrc)
ibc.fdst[HIGH], _ = vm.fresult.Float64()
//panic("VM_FADD_R")
case VM_FADD_M:
//ibc.fdst[LOW] += float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+0)))
//ibc.fdst[HIGH] += float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+4)))
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[LOW])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 0))))
vm.fresult.Add(vm.fdst, vm.fsrc)
ibc.fdst[LOW], _ = vm.fresult.Float64()
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[HIGH])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 4))))
vm.fresult.Add(vm.fdst, vm.fsrc)
ibc.fdst[HIGH], _ = vm.fresult.Float64()
//panic("VM_FADD_M")
case VM_FSUB_R:
//fmt.Printf("Rounding mode %d\n", vm.RoundingMode)
//ibc.fdst[LOW] -= ibc.fsrc[LOW]
//ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[LOW])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(ibc.fsrc[LOW])
vm.fresult.Sub(vm.fdst, vm.fsrc)
ibc.fdst[LOW], _ = vm.fresult.Float64()
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[HIGH])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(ibc.fsrc[HIGH])
vm.fresult.Sub(vm.fdst, vm.fsrc)
ibc.fdst[HIGH], _ = vm.fresult.Float64()
//fmt.Printf("fdst float %+v\n", ibc.fdst )
//panic("VM_FSUB_R")
case VM_FSUB_M:
//ibc.fdst[LOW] -= float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+0)))
//ibc.fdst[HIGH] -= float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+4)))
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[LOW])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 0))))
vm.fresult.Sub(vm.fdst, vm.fsrc)
ibc.fdst[LOW], _ = vm.fresult.Float64()
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[HIGH])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 4))))
vm.fresult.Sub(vm.fdst, vm.fsrc)
ibc.fdst[HIGH], _ = vm.fresult.Float64()
//panic("VM_FSUB_M")
case VM_FSCAL_R: // no dependent on rounding modes
//mask := math.Float64frombits(0x80F0000000000000)
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
//fmt.Printf("fdst float %+v\n", ibc.fdst )
//panic("VM_FSCA_M")
case VM_FMUL_R:
// ibc.fdst[LOW] *= ibc.fsrc[LOW]
// ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[LOW])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(ibc.fsrc[LOW])
vm.fresult.Mul(vm.fdst, vm.fsrc)
ibc.fdst[LOW], _ = vm.fresult.Float64()
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[HIGH])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(ibc.fsrc[HIGH])
vm.fresult.Mul(vm.fdst, vm.fsrc)
ibc.fdst[HIGH], _ = vm.fresult.Float64()
//panic("VM_FMUK_M")
case VM_FDIV_M:
lo := float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 0)))
high := float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 4)))
lo = math.Float64frombits((math.Float64bits(lo) & dynamicMantissaMask) | vm.config.eMask[LOW])
high = math.Float64frombits((math.Float64bits(high) & dynamicMantissaMask) | vm.config.eMask[HIGH])
//ibc.fdst[LOW] /= lo
//ibc.fdst[HIGH] /= high
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[LOW])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(lo)
vm.fresult.Quo(vm.fdst, vm.fsrc)
ibc.fdst[LOW], _ = vm.fresult.Float64()
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[HIGH])
vm.fsrc.SetPrec(0)
vm.fsrc.SetFloat64(high)
vm.fresult.Quo(vm.fdst, vm.fsrc)
ibc.fdst[HIGH], _ = vm.fresult.Float64()
//panic("VM_FDIV_M")
case VM_FSQRT_R:
// ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
// ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[LOW])
vm.fdst.SetMode(vm.RoundingMode)
vm.fresult.Sqrt(vm.fdst)
ibc.fdst[LOW], _ = vm.fresult.Float64()
vm.fresult.SetMode(vm.RoundingMode)
vm.fdst.SetPrec(0)
vm.fdst.SetFloat64(ibc.fdst[HIGH])
vm.fdst.SetMode(vm.RoundingMode)
vm.fresult.Sqrt(vm.fdst)
ibc.fdst[HIGH], _ = vm.fresult.Float64()
// panic("VM_FSQRT")
case VM_CBRANCH:
//fmt.Printf("pc %d src %x imm %x\n",pc ,*ibc.isrc, ibc.imm)
*ibc.isrc += ibc.imm
//fmt.Printf("pc %d\n",pc)
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
pc = int(ibc.target)
}
// fmt.Printf("pc %d\n",pc)
//panic("VM_CBRANCH")
case VM_CFROUND:
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
switch tmp {
case 0:
vm.RoundingMode = big.ToNearestEven // RoundToNearest
case 1:
vm.RoundingMode = big.ToNegativeInf // RoundDown
case 2:
vm.RoundingMode = big.ToPositiveInf // RoundUp
case 3:
vm.RoundingMode = big.ToZero // RoundToZero
}
//panic("round not implemented")
//panic("VM_CFROUND")
case VM_ISTORE:
binary.BigEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], bits.RotateLeft64(*ibc.isrc, 32))
//panic("VM_ISTOREM")
case VM_NOP: // we do nothing
default:
panic("instruction not implemented")
}
/*fmt.Printf("REGS ")
for j := 0; j <7;j++ {
fmt.Printf("%16x, " , vm.reg.r[j])
}
fmt.Printf("\n")
*/
}
}
var umm888_ = fmt.Sprintf("")