Upload files to ''
This commit is contained in:
parent
8983c5408d
commit
906ed8092e
365
aes_const.go
Normal file
365
aes_const.go
Normal file
|
@ -0,0 +1,365 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package aes implements AES encryption (formerly Rijndael), as defined in
|
||||
// U.S. Federal Information Processing Standards Publication 197.
|
||||
//
|
||||
// The AES operations in this package are not implemented using constant-time algorithms.
|
||||
// An exception is when running on systems with enabled hardware support for AES
|
||||
// that makes these operations constant-time. Examples include amd64 systems using AES-NI
|
||||
// extensions and s390x systems using Message-Security-Assist extensions.
|
||||
// On such systems, when the result of NewCipher is passed to cipher.NewGCM,
|
||||
// the GHASH operation used by GCM is also constant-time.
|
||||
package randomx
|
||||
|
||||
// This file contains AES constants - 8720 bytes of initialized data.
|
||||
|
||||
// https://csrc.nist.gov/publications/fips/fips197/fips-197.pdf
|
||||
|
||||
// AES is based on the mathematical behavior of binary polynomials
|
||||
// (polynomials over GF(2)) modulo the irreducible polynomial x⁸ + x⁴ + x³ + x + 1.
|
||||
// Addition of these binary polynomials corresponds to binary xor.
|
||||
// Reducing mod poly corresponds to binary xor with poly every
|
||||
// time a 0x100 bit appears.
|
||||
const poly = 1<<8 | 1<<4 | 1<<3 | 1<<1 | 1<<0 // x⁸ + x⁴ + x³ + x + 1
|
||||
|
||||
// Powers of x mod poly in GF(2).
|
||||
var powx = [16]byte{
|
||||
0x01,
|
||||
0x02,
|
||||
0x04,
|
||||
0x08,
|
||||
0x10,
|
||||
0x20,
|
||||
0x40,
|
||||
0x80,
|
||||
0x1b,
|
||||
0x36,
|
||||
0x6c,
|
||||
0xd8,
|
||||
0xab,
|
||||
0x4d,
|
||||
0x9a,
|
||||
0x2f,
|
||||
}
|
||||
|
||||
// FIPS-197 Figure 7. S-box substitution values in hexadecimal format.
|
||||
var sbox0 = [256]byte{
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
}
|
||||
|
||||
// FIPS-197 Figure 14. Inverse S-box substitution values in hexadecimal format.
|
||||
var sbox1 = [256]byte{
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
|
||||
}
|
||||
|
||||
// Lookup tables for encryption.
|
||||
// These can be recomputed by adapting the tests in aes_test.go.
|
||||
|
||||
var te0 = [256]uint32{
|
||||
0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
|
||||
0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
|
||||
0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
|
||||
0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
|
||||
0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
|
||||
0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
|
||||
0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
|
||||
0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
|
||||
0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
|
||||
0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
|
||||
0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
|
||||
0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
|
||||
0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
|
||||
0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
|
||||
0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
|
||||
0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
|
||||
0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
|
||||
0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
|
||||
0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
|
||||
0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
|
||||
0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
|
||||
0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
|
||||
0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
|
||||
0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
|
||||
0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
|
||||
0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
|
||||
0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
|
||||
0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
|
||||
0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
|
||||
0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
|
||||
0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
|
||||
0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a,
|
||||
}
|
||||
var te1 = [256]uint32{
|
||||
0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5,
|
||||
0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676,
|
||||
0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
|
||||
0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0,
|
||||
0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc,
|
||||
0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
|
||||
0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a,
|
||||
0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575,
|
||||
0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
|
||||
0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484,
|
||||
0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b,
|
||||
0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
|
||||
0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585,
|
||||
0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8,
|
||||
0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
|
||||
0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2,
|
||||
0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717,
|
||||
0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
|
||||
0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888,
|
||||
0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb,
|
||||
0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
|
||||
0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979,
|
||||
0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9,
|
||||
0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
|
||||
0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6,
|
||||
0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a,
|
||||
0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
|
||||
0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e,
|
||||
0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494,
|
||||
0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
|
||||
0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868,
|
||||
0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616,
|
||||
}
|
||||
var te2 = [256]uint32{
|
||||
0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b, 0x6fb1de6f, 0xc55491c5,
|
||||
0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, 0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76,
|
||||
0xca458fca, 0x829d1f82, 0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
|
||||
0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4, 0x7296e472, 0xc05b9bc0,
|
||||
0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, 0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc,
|
||||
0x345c6834, 0xa5f451a5, 0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
|
||||
0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796, 0x050f0a05, 0x9ab52f9a,
|
||||
0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, 0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75,
|
||||
0x091b1209, 0x839e1d83, 0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
|
||||
0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3, 0x2f715e2f, 0x84971384,
|
||||
0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, 0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b,
|
||||
0x6abed46a, 0xcb468dcb, 0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
|
||||
0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d, 0x33556633, 0x85941185,
|
||||
0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, 0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8,
|
||||
0x51f3a251, 0xa3fe5da3, 0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
|
||||
0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff, 0xf30efdf3, 0xd26dbfd2,
|
||||
0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, 0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17,
|
||||
0xc45793c4, 0xa7f255a7, 0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
|
||||
0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a, 0x90ab3b90, 0x88830b88,
|
||||
0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, 0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb,
|
||||
0xe03bdbe0, 0x32566432, 0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
|
||||
0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195, 0xe437d3e4, 0x798bf279,
|
||||
0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, 0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9,
|
||||
0x6cb4d86c, 0x56faac56, 0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
|
||||
0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6, 0xb4c773b4, 0xc65197c6,
|
||||
0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, 0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a,
|
||||
0x7090e070, 0x3e427c3e, 0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
|
||||
0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1, 0x1d273a1d, 0x9eb9279e,
|
||||
0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, 0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394,
|
||||
0x9bb62d9b, 0x1e223c1e, 0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
|
||||
0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6, 0x42c68442, 0x68b8d068,
|
||||
0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, 0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16,
|
||||
}
|
||||
var te3 = [256]uint32{
|
||||
0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6, 0x6f6fb1de, 0xc5c55491,
|
||||
0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, 0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec,
|
||||
0xcaca458f, 0x82829d1f, 0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
|
||||
0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753, 0x727296e4, 0xc0c05b9b,
|
||||
0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, 0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83,
|
||||
0x34345c68, 0xa5a5f451, 0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
|
||||
0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137, 0x05050f0a, 0x9a9ab52f,
|
||||
0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, 0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea,
|
||||
0x09091b12, 0x83839e1d, 0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
|
||||
0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd, 0x2f2f715e, 0x84849713,
|
||||
0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, 0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6,
|
||||
0x6a6abed4, 0xcbcb468d, 0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
|
||||
0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a, 0x33335566, 0x85859411,
|
||||
0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, 0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b,
|
||||
0x5151f3a2, 0xa3a3fe5d, 0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
|
||||
0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5, 0xf3f30efd, 0xd2d26dbf,
|
||||
0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, 0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e,
|
||||
0xc4c45793, 0xa7a7f255, 0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
|
||||
0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54, 0x9090ab3b, 0x8888830b,
|
||||
0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, 0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad,
|
||||
0xe0e03bdb, 0x32325664, 0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
|
||||
0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431, 0xe4e437d3, 0x79798bf2,
|
||||
0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, 0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049,
|
||||
0x6c6cb4d8, 0x5656faac, 0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
|
||||
0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157, 0xb4b4c773, 0xc6c65197,
|
||||
0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, 0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f,
|
||||
0x707090e0, 0x3e3e427c, 0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
|
||||
0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899, 0x1d1d273a, 0x9e9eb927,
|
||||
0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, 0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733,
|
||||
0x9b9bb62d, 0x1e1e223c, 0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
|
||||
0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7, 0x4242c684, 0x6868b8d0,
|
||||
0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c,
|
||||
}
|
||||
|
||||
// Lookup tables for decryption.
|
||||
// These can be recomputed by adapting the tests in aes_test.go.
|
||||
|
||||
var td0 = [256]uint32{
|
||||
0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
|
||||
0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
|
||||
0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
|
||||
0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
|
||||
0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
|
||||
0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
|
||||
0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
|
||||
0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
|
||||
0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
|
||||
0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
|
||||
0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
|
||||
0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
|
||||
0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
|
||||
0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
|
||||
0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
|
||||
0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
|
||||
0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
|
||||
0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
|
||||
0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
|
||||
0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
|
||||
0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
|
||||
0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
|
||||
0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
|
||||
0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
|
||||
0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
|
||||
0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
|
||||
0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
|
||||
0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
|
||||
0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
|
||||
0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
|
||||
0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
|
||||
0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742,
|
||||
}
|
||||
var td1 = [256]uint32{
|
||||
0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303,
|
||||
0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3,
|
||||
0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
|
||||
0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8,
|
||||
0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a,
|
||||
0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
|
||||
0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab,
|
||||
0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682,
|
||||
0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
|
||||
0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10,
|
||||
0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015,
|
||||
0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
|
||||
0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72,
|
||||
0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e,
|
||||
0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
|
||||
0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9,
|
||||
0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e,
|
||||
0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
|
||||
0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3,
|
||||
0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390,
|
||||
0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
|
||||
0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af,
|
||||
0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb,
|
||||
0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
|
||||
0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266,
|
||||
0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6,
|
||||
0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
|
||||
0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647,
|
||||
0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1,
|
||||
0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
|
||||
0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95,
|
||||
0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857,
|
||||
}
|
||||
var td2 = [256]uint32{
|
||||
0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, 0x6bcb3bab, 0x45f11f9d, 0x58abacfa, 0x03934be3,
|
||||
0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, 0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562,
|
||||
0x5a49deb1, 0x1b6725ba, 0x0e9845ea, 0xc0e15dfe, 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
|
||||
0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, 0x832dd4be, 0x21d35874, 0x692949e0, 0xc8448ec9,
|
||||
0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, 0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce,
|
||||
0x4a1863df, 0x3182e51a, 0x33609751, 0x7f456253, 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
|
||||
0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, 0xd323ab73, 0x02e2724b, 0x8f57e31f, 0xab2a6655,
|
||||
0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, 0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16,
|
||||
0x1c2b8acf, 0xb492a779, 0xf2f0f307, 0xe2a14e69, 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
|
||||
0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, 0xec390b83, 0xefaa4060, 0x9f065e71, 0x1051bd6e,
|
||||
0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, 0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050,
|
||||
0xfb241998, 0xe997d6bd, 0x43cc8940, 0x9e7767d9, 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
|
||||
0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, 0x86830980, 0xed48322b, 0x70ac1e11, 0x724e6c5a,
|
||||
0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, 0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436,
|
||||
0x67b10c0a, 0xe70f9357, 0x96d2b4ee, 0x919e1b9b, 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
|
||||
0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, 0x0d0b0e09, 0xc7adf28b, 0xa8b92db6, 0xa9c8141e,
|
||||
0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, 0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb,
|
||||
0x29768b43, 0xc6dccb23, 0xfc68b6ed, 0xf163b8e4, 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
|
||||
0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, 0x2f4b1d9e, 0x30f3dcb2, 0x52ec0d86, 0xe3d077c1,
|
||||
0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, 0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233,
|
||||
0x4ec78749, 0xd1c1d938, 0xa2fe8cca, 0x0b3698d4, 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
|
||||
0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, 0x13c2f68d, 0xb8e890d8, 0xf75e2e39, 0xaff582c3,
|
||||
0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, 0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b,
|
||||
0x7809cd26, 0x18f46e59, 0xb701ec9a, 0x9aa8834f, 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
|
||||
0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, 0xb2af31a4, 0x23312a3f, 0x9430c6a5, 0x66c035a2,
|
||||
0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, 0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791,
|
||||
0xd68d764d, 0xb04d43ef, 0x4d54ccaa, 0x04dfe496, 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
|
||||
0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, 0x1d5ab367, 0xd25292db, 0x5633e910, 0x47136dd6,
|
||||
0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, 0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47,
|
||||
0xdf599cd2, 0x733f55f2, 0xce791814, 0x37bf73c7, 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
|
||||
0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, 0xc372161d, 0x250cbce2, 0x498b283c, 0x9541ff0d,
|
||||
0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, 0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8,
|
||||
}
|
||||
var td3 = [256]uint32{
|
||||
0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, 0xab6bcb3b, 0x9d45f11f, 0xfa58abac, 0xe303934b,
|
||||
0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, 0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5,
|
||||
0xb15a49de, 0xba1b6725, 0xea0e9845, 0xfec0e15d, 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
|
||||
0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, 0xbe832dd4, 0x7421d358, 0xe0692949, 0xc9c8448e,
|
||||
0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, 0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d,
|
||||
0xdf4a1863, 0x1a3182e5, 0x51336097, 0x537f4562, 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
|
||||
0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, 0x73d323ab, 0x4b02e272, 0x1f8f57e3, 0x55ab2a66,
|
||||
0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, 0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced,
|
||||
0xcf1c2b8a, 0x79b492a7, 0x07f2f0f3, 0x69e2a14e, 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
|
||||
0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, 0x83ec390b, 0x60efaa40, 0x719f065e, 0x6e1051bd,
|
||||
0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, 0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60,
|
||||
0x98fb2419, 0xbde997d6, 0x4043cc89, 0xd99e7767, 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
|
||||
0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, 0x80868309, 0x2bed4832, 0x1170ac1e, 0x5a724e6c,
|
||||
0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, 0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24,
|
||||
0x0a67b10c, 0x57e70f93, 0xee96d2b4, 0x9b919e1b, 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
|
||||
0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, 0x090d0b0e, 0x8bc7adf2, 0xb6a8b92d, 0x1ea9c814,
|
||||
0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, 0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b,
|
||||
0x4329768b, 0x23c6dccb, 0xedfc68b6, 0xe4f163b8, 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
|
||||
0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, 0x9e2f4b1d, 0xb230f3dc, 0x8652ec0d, 0xc1e3d077,
|
||||
0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, 0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22,
|
||||
0x494ec787, 0x38d1c1d9, 0xcaa2fe8c, 0xd40b3698, 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
|
||||
0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, 0x8d13c2f6, 0xd8b8e890, 0x39f75e2e, 0xc3aff582,
|
||||
0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, 0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb,
|
||||
0x267809cd, 0x5918f46e, 0x9ab701ec, 0x4f9aa883, 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
|
||||
0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, 0xa4b2af31, 0x3f23312a, 0xa59430c6, 0xa266c035,
|
||||
0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, 0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17,
|
||||
0x4dd68d76, 0xefb04d43, 0xaa4d54cc, 0x9604dfe4, 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
|
||||
0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, 0x671d5ab3, 0xdbd25292, 0x105633e9, 0xd647136d,
|
||||
0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, 0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a,
|
||||
0xd2df599c, 0xf2733f55, 0x14ce7918, 0xc737bf73, 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
|
||||
0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, 0x1dc37216, 0xe2250cbc, 0x3c498b28, 0x0d9541ff,
|
||||
0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, 0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0,
|
||||
}
|
176
aes_hash.go
Normal file
176
aes_hash.go
Normal file
|
@ -0,0 +1,176 @@
|
|||
package randomx
|
||||
|
||||
import "fmt"
|
||||
import "math/bits"
|
||||
import "encoding/binary"
|
||||
|
||||
var tmp_______ = fmt.Sprintf("dd")
|
||||
|
||||
var AES_HASH_1R_STATE0 = ARRAY_TO_BIGENDIAN([4]uint32{0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d})
|
||||
var AES_HASH_1R_STATE1 = ARRAY_TO_BIGENDIAN([4]uint32{0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e})
|
||||
var AES_HASH_1R_STATE2 = ARRAY_TO_BIGENDIAN([4]uint32{0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017})
|
||||
var AES_HASH_1R_STATE3 = ARRAY_TO_BIGENDIAN([4]uint32{0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c})
|
||||
|
||||
var AES_HASH_1R_XKEY0 = ARRAY_TO_BIGENDIAN([4]uint32{0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389})
|
||||
var AES_HASH_1R_XKEY1 = ARRAY_TO_BIGENDIAN([4]uint32{0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1})
|
||||
|
||||
// used for final hash calculation
|
||||
func hashAes1Rx4(input []byte, output []byte) {
|
||||
|
||||
var states [4][4]uint32
|
||||
for i := range states {
|
||||
states[0][i] = AES_HASH_1R_STATE0[i]
|
||||
states[1][i] = AES_HASH_1R_STATE1[i]
|
||||
states[2][i] = AES_HASH_1R_STATE2[i]
|
||||
states[3][i] = AES_HASH_1R_STATE3[i]
|
||||
}
|
||||
|
||||
var in [4][4]uint32
|
||||
for input_ptr := 0; input_ptr < len(input); input_ptr += 64 {
|
||||
for i := 0; i < 63; i += 4 { // load 64 bytes
|
||||
in[i/16][(i%16)/4] = binary.LittleEndian.Uint32(input[input_ptr+i:])
|
||||
}
|
||||
|
||||
AES_ENC_ROUND(states[0][:], in[0][:])
|
||||
AES_DEC_ROUND(states[1][:], in[1][:])
|
||||
AES_ENC_ROUND(states[2][:], in[2][:])
|
||||
AES_DEC_ROUND(states[3][:], in[3][:])
|
||||
|
||||
}
|
||||
|
||||
AES_ENC_ROUND(states[0][:], AES_HASH_1R_XKEY0[:])
|
||||
AES_DEC_ROUND(states[1][:], AES_HASH_1R_XKEY0[:])
|
||||
AES_ENC_ROUND(states[2][:], AES_HASH_1R_XKEY0[:])
|
||||
AES_DEC_ROUND(states[3][:], AES_HASH_1R_XKEY0[:])
|
||||
|
||||
AES_ENC_ROUND(states[0][:], AES_HASH_1R_XKEY1[:])
|
||||
AES_DEC_ROUND(states[1][:], AES_HASH_1R_XKEY1[:])
|
||||
AES_ENC_ROUND(states[2][:], AES_HASH_1R_XKEY1[:])
|
||||
AES_DEC_ROUND(states[3][:], AES_HASH_1R_XKEY1[:])
|
||||
|
||||
// write back to state
|
||||
for i := 0; i < 63; i += 4 {
|
||||
binary.BigEndian.PutUint32(output[i:], states[i/16][(i%16)/4])
|
||||
}
|
||||
|
||||
fmt.Printf("aes hash %x\n", output)
|
||||
|
||||
}
|
||||
|
||||
// these keys are used to generate scratchpad
|
||||
var AES_GEN_1R_KEY0 = ARRAY_TO_BIGENDIAN([4]uint32{0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553})
|
||||
var AES_GEN_1R_KEY1 = ARRAY_TO_BIGENDIAN([4]uint32{0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07})
|
||||
var AES_GEN_1R_KEY2 = ARRAY_TO_BIGENDIAN([4]uint32{0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1})
|
||||
var AES_GEN_1R_KEY3 = ARRAY_TO_BIGENDIAN([4]uint32{0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135})
|
||||
|
||||
// reverses order of elements and also reverse byte order
|
||||
func ARRAY_TO_BIGENDIAN(input [4]uint32) (output [4]uint32) {
|
||||
for i := range input {
|
||||
output[i] = bits.ReverseBytes32(input[i])
|
||||
}
|
||||
output[0], output[3] = output[3], output[0]
|
||||
output[1], output[2] = output[2], output[1]
|
||||
return
|
||||
}
|
||||
|
||||
func fillAes1Rx4(state_start []byte, output []byte) {
|
||||
|
||||
var states [4][4]uint32
|
||||
for i := 0; i < 63; i += 4 {
|
||||
states[i/16][(i%16)/4] = binary.BigEndian.Uint32(state_start[i:])
|
||||
}
|
||||
|
||||
outptr := 0
|
||||
for ; outptr < len(output); outptr += 64 {
|
||||
AES_DEC_ROUND(states[0][:], AES_GEN_1R_KEY0[:])
|
||||
AES_ENC_ROUND(states[1][:], AES_GEN_1R_KEY1[:])
|
||||
AES_DEC_ROUND(states[2][:], AES_GEN_1R_KEY2[:])
|
||||
AES_ENC_ROUND(states[3][:], AES_GEN_1R_KEY3[:])
|
||||
|
||||
for i := 0; i < 63; i += 4 {
|
||||
binary.LittleEndian.PutUint32(output[outptr+i:], states[i/16][(i%16)/4])
|
||||
}
|
||||
|
||||
}
|
||||
// write back to state
|
||||
for i := 0; i < 63; i += 4 {
|
||||
|
||||
binary.BigEndian.PutUint32(state_start[i:], states[i/16][(i%16)/4])
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func AES_ENC_ROUND(state []uint32, key []uint32) {
|
||||
|
||||
s0 := state[0]
|
||||
s1 := state[1]
|
||||
s2 := state[2]
|
||||
s3 := state[3]
|
||||
state[0] = key[0] ^ te0[uint8(s0>>24)] ^ te1[uint8(s1>>16)] ^ te2[uint8(s2>>8)] ^ te3[uint8(s3)]
|
||||
state[1] = key[1] ^ te0[uint8(s1>>24)] ^ te1[uint8(s2>>16)] ^ te2[uint8(s3>>8)] ^ te3[uint8(s0)]
|
||||
state[2] = key[2] ^ te0[uint8(s2>>24)] ^ te1[uint8(s3>>16)] ^ te2[uint8(s0>>8)] ^ te3[uint8(s1)]
|
||||
state[3] = key[3] ^ te0[uint8(s3>>24)] ^ te1[uint8(s0>>16)] ^ te2[uint8(s1>>8)] ^ te3[uint8(s2)]
|
||||
}
|
||||
|
||||
func AES_DEC_ROUND(state []uint32, key []uint32) {
|
||||
|
||||
s0 := state[0]
|
||||
s1 := state[1]
|
||||
s2 := state[2]
|
||||
s3 := state[3]
|
||||
|
||||
state[0] = key[0] ^ td0[uint8(s0>>24)] ^ td1[uint8(s3>>16)] ^ td2[uint8(s2>>8)] ^ td3[uint8(s1)]
|
||||
state[1] = key[1] ^ td0[uint8(s1>>24)] ^ td1[uint8(s0>>16)] ^ td2[uint8(s3>>8)] ^ td3[uint8(s2)]
|
||||
state[2] = key[2] ^ td0[uint8(s2>>24)] ^ td1[uint8(s1>>16)] ^ td2[uint8(s0>>8)] ^ td3[uint8(s3)]
|
||||
state[3] = key[3] ^ td0[uint8(s3>>24)] ^ td1[uint8(s2>>16)] ^ td2[uint8(s1>>8)] ^ td3[uint8(s0)]
|
||||
|
||||
}
|
||||
|
||||
// these keys are used to used as per RandomX spec
|
||||
var AES_GEN_4R_KEY0 = ARRAY_TO_BIGENDIAN([4]uint32{0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd})
|
||||
var AES_GEN_4R_KEY1 = ARRAY_TO_BIGENDIAN([4]uint32{0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450})
|
||||
var AES_GEN_4R_KEY2 = ARRAY_TO_BIGENDIAN([4]uint32{0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904})
|
||||
var AES_GEN_4R_KEY3 = ARRAY_TO_BIGENDIAN([4]uint32{0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763})
|
||||
var AES_GEN_4R_KEY4 = ARRAY_TO_BIGENDIAN([4]uint32{0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73})
|
||||
var AES_GEN_4R_KEY5 = ARRAY_TO_BIGENDIAN([4]uint32{0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3})
|
||||
var AES_GEN_4R_KEY6 = ARRAY_TO_BIGENDIAN([4]uint32{0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7})
|
||||
var AES_GEN_4R_KEY7 = ARRAY_TO_BIGENDIAN([4]uint32{0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609})
|
||||
|
||||
// used to generate final program
|
||||
func fillAes4Rx4(state_start []byte, output []byte) {
|
||||
|
||||
var states [4][4]uint32
|
||||
for i := 0; i < 63; i += 4 {
|
||||
states[i/16][(i%16)/4] = binary.BigEndian.Uint32(state_start[i:])
|
||||
}
|
||||
|
||||
outptr := 0
|
||||
for ; outptr < len(output); outptr += 64 {
|
||||
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY0[:])
|
||||
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY0[:])
|
||||
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY4[:])
|
||||
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY4[:])
|
||||
|
||||
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY1[:])
|
||||
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY1[:])
|
||||
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY5[:])
|
||||
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY5[:])
|
||||
|
||||
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY2[:])
|
||||
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY2[:])
|
||||
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY6[:])
|
||||
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY6[:])
|
||||
|
||||
AES_DEC_ROUND(states[0][:], AES_GEN_4R_KEY3[:])
|
||||
AES_ENC_ROUND(states[1][:], AES_GEN_4R_KEY3[:])
|
||||
AES_DEC_ROUND(states[2][:], AES_GEN_4R_KEY7[:])
|
||||
AES_ENC_ROUND(states[3][:], AES_GEN_4R_KEY7[:])
|
||||
|
||||
// store bytes to output buffer
|
||||
for i := 0; i < 63; i += 4 {
|
||||
binary.BigEndian.PutUint32(output[outptr+i:], states[i/16][(i%16)/4])
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
226
config.go
Normal file
226
config.go
Normal file
|
@ -0,0 +1,226 @@
|
|||
package randomx
|
||||
|
||||
import "fmt"
|
||||
import "encoding/binary"
|
||||
import "golang.org/x/crypto/blake2b"
|
||||
|
||||
import _ "unsafe"
|
||||
import _ "golang.org/x/crypto/argon2"
|
||||
|
||||
// see reference configuration.h
|
||||
//Cache size in KiB. Must be a power of 2.
|
||||
const RANDOMX_ARGON_MEMORY = 262144
|
||||
|
||||
//Number of Argon2d iterations for Cache initialization.
|
||||
const RANDOMX_ARGON_ITERATIONS = 3
|
||||
|
||||
//Number of parallel lanes for Cache initialization.
|
||||
const RANDOMX_ARGON_LANES = 1
|
||||
|
||||
//Argon2d salt
|
||||
const RANDOMX_ARGON_SALT = "RandomX\x03"
|
||||
const ArgonSaltSize uint32 = 8 //sizeof("" RANDOMX_ARGON_SALT) - 1;
|
||||
|
||||
//Number of random Cache accesses per Dataset item. Minimum is 2.
|
||||
const RANDOMX_CACHE_ACCESSES = 8
|
||||
|
||||
//Target latency for SuperscalarHash (in cycles of the reference CPU).
|
||||
const RANDOMX_SUPERSCALAR_LATENCY = 170
|
||||
|
||||
//Dataset base size in bytes. Must be a power of 2.
|
||||
const RANDOMX_DATASET_BASE_SIZE = 2147483648
|
||||
|
||||
//Dataset extra size. Must be divisible by 64.
|
||||
const RANDOMX_DATASET_EXTRA_SIZE = 33554368
|
||||
|
||||
//Number of instructions in a RandomX program. Must be divisible by 8.
|
||||
const RANDOMX_PROGRAM_SIZE = 256
|
||||
|
||||
//Number of iterations during VM execution.
|
||||
const RANDOMX_PROGRAM_ITERATIONS = 2048
|
||||
|
||||
//Number of chained VM executions per hash.
|
||||
const RANDOMX_PROGRAM_COUNT = 8
|
||||
|
||||
//Scratchpad L3 size in bytes. Must be a power of 2.
|
||||
const RANDOMX_SCRATCHPAD_L3 = 2097152
|
||||
|
||||
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
|
||||
const RANDOMX_SCRATCHPAD_L2 = 262144
|
||||
|
||||
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
|
||||
const RANDOMX_SCRATCHPAD_L1 = 16384
|
||||
|
||||
//Jump condition mask size in bits.
|
||||
const RANDOMX_JUMP_BITS = 8
|
||||
|
||||
//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
|
||||
const RANDOMX_JUMP_OFFSET = 8
|
||||
|
||||
const DATASETEXTRAITEMS = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE
|
||||
|
||||
const ArgonBlockSize uint32 = 1024
|
||||
const SuperscalarMaxSize int = 3*RANDOMX_SUPERSCALAR_LATENCY + 2
|
||||
const RANDOMX_DATASET_ITEM_SIZE uint64 = 64
|
||||
const CacheLineSize uint64 = RANDOMX_DATASET_ITEM_SIZE
|
||||
const ScratchpadSize uint32 = RANDOMX_SCRATCHPAD_L3
|
||||
|
||||
const CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & (^(CacheLineSize - 1))
|
||||
|
||||
const CacheSize uint64 = RANDOMX_ARGON_MEMORY * uint64(ArgonBlockSize)
|
||||
|
||||
const ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / 8
|
||||
const ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / 8
|
||||
const ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / 8
|
||||
const ScratchpadL1Mask = (ScratchpadL1 - 1) * 8
|
||||
const ScratchpadL2Mask = (ScratchpadL2 - 1) * 8
|
||||
const ScratchpadL1Mask16 = (ScratchpadL1/2 - 1) * 16
|
||||
const ScratchpadL2Mask16 = (ScratchpadL2/2 - 1) * 16
|
||||
const ScratchpadL3Mask = (ScratchpadL3 - 1) * 8
|
||||
const ScratchpadL3Mask64 = (ScratchpadL3/8 - 1) * 64
|
||||
const CONDITIONOFFSET = RANDOMX_JUMP_OFFSET
|
||||
const CONDITIONMASK = ((1 << RANDOMX_JUMP_BITS) - 1)
|
||||
const STOREL3CONDITION = 14
|
||||
|
||||
const REGISTERSCOUNT = 8
|
||||
const REGISTERCOUNTFLT = 4
|
||||
|
||||
const mantissaSize = 52
|
||||
const exponentSize = 11
|
||||
const mantissaMask = (uint64(1) << mantissaSize) - 1
|
||||
const exponentMask = (uint64(1) << exponentSize) - 1
|
||||
const exponentBias = 1023
|
||||
const dynamicExponentBits = 4
|
||||
const staticExponentBits = 4
|
||||
const constExponentBits uint64 = 0x300
|
||||
const dynamicMantissaMask = (uint64(1) << (mantissaSize + dynamicExponentBits)) - 1
|
||||
|
||||
const RANDOMX_FLAG_DEFAULT = 0
|
||||
const RANDOMX_FLAG_JIT = 1
|
||||
const RANDOMX_FLAG_LARGE_PAGES = 2
|
||||
|
||||
func isZeroOrPowerOf2(x uint64) bool {
|
||||
return (x & (x - 1)) == 0
|
||||
}
|
||||
|
||||
type Blake2Generator struct {
|
||||
data [64]byte
|
||||
dataindex int
|
||||
}
|
||||
|
||||
func Init_Blake2Generator(key []byte, nonce uint32) *Blake2Generator {
|
||||
var b Blake2Generator
|
||||
b.dataindex = len(b.data)
|
||||
if len(key) > 60 {
|
||||
copy(b.data[:], key[0:60])
|
||||
} else {
|
||||
copy(b.data[:], key)
|
||||
}
|
||||
binary.LittleEndian.PutUint32(b.data[60:], nonce)
|
||||
|
||||
return &b
|
||||
}
|
||||
|
||||
func (b *Blake2Generator) checkdata(bytesNeeded int) {
|
||||
if b.dataindex+bytesNeeded > cap(b.data) {
|
||||
//blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
h := blake2b.Sum512(b.data[:])
|
||||
copy(b.data[:], h[:])
|
||||
b.dataindex = 0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (b *Blake2Generator) GetByte() byte {
|
||||
b.checkdata(1)
|
||||
ret := b.data[b.dataindex]
|
||||
fmt.Printf("returning byte %02x\n", ret)
|
||||
b.dataindex++
|
||||
return ret
|
||||
}
|
||||
func (b *Blake2Generator) GetUint32() uint32 {
|
||||
b.checkdata(4)
|
||||
ret := uint32(binary.LittleEndian.Uint32(b.data[b.dataindex:]))
|
||||
fmt.Printf("returning int32 %08x %08x\n", ret, binary.LittleEndian.Uint32(b.data[b.dataindex:]))
|
||||
b.dataindex += 4
|
||||
fmt.Printf("returning int32 %08x\n", ret)
|
||||
|
||||
if ret == 0xc5dac17e {
|
||||
// panic("exiting")
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
type Randomx_Cache struct {
|
||||
Blocks []block
|
||||
|
||||
Programs [RANDOMX_PROGRAM_COUNT]*SuperScalarProgram
|
||||
}
|
||||
|
||||
func Randomx_alloc_cache(flags uint64) *Randomx_Cache {
|
||||
|
||||
return &Randomx_Cache{}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) Randomx_init_cache(key []byte) {
|
||||
fmt.Printf("appending null byte is not necessary but only done for testing")
|
||||
kkey := append([]byte{}, key...)
|
||||
//kkey = append(kkey,0)
|
||||
//cache->initialize(cache, key, keySize);
|
||||
cache.Blocks = buildBlocks(argon2d, kkey, []byte(RANDOMX_ARGON_SALT), []byte{}, []byte{}, RANDOMX_ARGON_ITERATIONS, RANDOMX_ARGON_MEMORY, RANDOMX_ARGON_LANES, 0)
|
||||
|
||||
}
|
||||
|
||||
// fetch a 64 byte block in uint64 form
|
||||
func (cache *Randomx_Cache) GetBlock(addr uint64, out []uint64) {
|
||||
|
||||
mask := CacheSize/CacheLineSize - 1
|
||||
|
||||
addr = (addr & mask) * CacheLineSize
|
||||
|
||||
block := addr / 1024
|
||||
index_within_block := (addr % 1024) / 8
|
||||
|
||||
copy(out, cache.Blocks[block][index_within_block:])
|
||||
}
|
||||
|
||||
// some constants for argon
|
||||
const (
|
||||
argon2d = iota
|
||||
argon2i
|
||||
argon2id
|
||||
)
|
||||
|
||||
type block [128]uint64
|
||||
|
||||
const syncPoints = 4
|
||||
|
||||
//go:linkname argon2_initHash golang.org/x/crypto/argon2.initHash
|
||||
func argon2_initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte
|
||||
|
||||
//go:linkname argon2_initBlocks golang.org/x/crypto/argon2.initBlocks
|
||||
func argon2_initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block
|
||||
|
||||
//go:linkname argon2_processBlocks golang.org/x/crypto/argon2.processBlocks
|
||||
func argon2_processBlocks(B []block, time, memory, threads uint32, mode int)
|
||||
|
||||
func buildBlocks(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []block {
|
||||
if time < 1 {
|
||||
panic("argon2: number of rounds too small")
|
||||
}
|
||||
if threads < 1 {
|
||||
panic("argon2: parallelism degree too low")
|
||||
}
|
||||
h0 := argon2_initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode)
|
||||
|
||||
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads))
|
||||
if memory < 2*syncPoints*uint32(threads) {
|
||||
memory = 2 * syncPoints * uint32(threads)
|
||||
}
|
||||
B := argon2_initBlocks(&h0, memory, uint32(threads))
|
||||
argon2_processBlocks(B, time, memory, uint32(threads), mode)
|
||||
|
||||
return B
|
||||
//return extractKey(B, memory, uint32(threads), keyLen)
|
||||
}
|
65
example.go
Normal file
65
example.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
//+build ignore
|
||||
|
||||
package main
|
||||
|
||||
import "randomx"
|
||||
import "fmt"
|
||||
|
||||
func main() {
|
||||
c := randomx.Randomx_alloc_cache(0)
|
||||
|
||||
key := []byte("RandomX example key\x00")
|
||||
myinput := []byte("RandomX example input\x00")
|
||||
|
||||
c.Randomx_init_cache(key)
|
||||
|
||||
nonce := uint32(0) //uint32(len(key))
|
||||
gen := randomx.Init_Blake2Generator(key, nonce)
|
||||
for i := 0; i < 8; i++ {
|
||||
c.Programs[i] = randomx.Build_SuperScalar_Program(gen) // build a superscalar program
|
||||
}
|
||||
|
||||
vm := c.VM_Initialize()
|
||||
|
||||
_ = fmt.Sprintf("t")
|
||||
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(myinput, output_hash[:])
|
||||
|
||||
fmt.Printf("final output hash %x\n", output_hash)
|
||||
|
||||
vm.CalculateHash(myinput, output_hash[:])
|
||||
|
||||
fmt.Printf("final output hash %x\n", output_hash)
|
||||
|
||||
/*
|
||||
fmt.Printf("cache blocks %d block size %d %+v\n", len(c.Blocks), len(c.Blocks[0]), c.Blocks[0])
|
||||
|
||||
register_value := uint64(0x70c13c)
|
||||
mask := randomx.CacheSize / randomx.CacheLineSize - 1;
|
||||
|
||||
address := (register_value&mask)* randomx.CacheLineSize
|
||||
|
||||
|
||||
var block [8]uint64
|
||||
|
||||
c.GetBlock(address,block[:])
|
||||
|
||||
for i := range block{
|
||||
fmt.Printf("%d %16x\n", i, block[i])
|
||||
}
|
||||
|
||||
//block := address / 1024
|
||||
|
||||
//index_within_block := (address % 1024) / 8
|
||||
|
||||
//fmt.Printf("mask %x address %x block %d index_within_block %d data %16x\n",mask, address, block, index_within_block,c.Blocks[block][index_within_block])
|
||||
|
||||
/*
|
||||
for i := range c.Blocks[block]{
|
||||
fmt.Printf("%3d %16x\n", i,c.Blocks[block][i])
|
||||
}
|
||||
*/
|
||||
//c.InitDatasetItem(nil,0x70c13c)
|
||||
|
||||
}
|
1
randomx.go
Normal file
1
randomx.go
Normal file
|
@ -0,0 +1 @@
|
|||
package randomx
|
42
randomx_test.go
Normal file
42
randomx_test.go
Normal file
|
@ -0,0 +1,42 @@
|
|||
package randomx
|
||||
|
||||
import "fmt"
|
||||
import "testing"
|
||||
|
||||
func Test_Randomx(t *testing.T) {
|
||||
|
||||
var Tests = []struct {
|
||||
key []byte // key
|
||||
input []byte // input
|
||||
expected string // expected result
|
||||
}{
|
||||
{[]byte("RandomX example key\x00"), []byte("RandomX example input\x00"), "8a48e5f9db45ab79d9080574c4d81954fe6ac63842214aff73c244b26330b7c9"},
|
||||
{[]byte("test key 000"), []byte("This is a test"), "639183aae1bf4c9a35884cb46b09cad9175f04efd7684e7262a0ac1c2f0b4e3f"}, // test a
|
||||
// {[]byte("test key 000"), []byte("Lorem ipsum dolor sit amet"), "300a0adb47603dedb42228ccb2b211104f4da45af709cd7547cd049e9489c969" }, // test b
|
||||
{[]byte("test key 000"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "c36d4ed4191e617309867ed66a443be4075014e2b061bcdaf9ce7b721d2b77a8"}, // test c
|
||||
{[]byte("test key 001"), []byte("sed do eiusmod tempor incididunt ut labore et dolore magna aliqua"), "e9ff4503201c0c2cca26d285c93ae883f9b1d30c9eb240b820756f2d5a7905fc"}, // test d
|
||||
}
|
||||
|
||||
c := Randomx_alloc_cache(0)
|
||||
|
||||
for _, tt := range Tests {
|
||||
|
||||
c.Randomx_init_cache(tt.key)
|
||||
|
||||
nonce := uint32(0) //uint32(len(key))
|
||||
gen := Init_Blake2Generator(tt.key, nonce)
|
||||
for i := 0; i < 8; i++ {
|
||||
c.Programs[i] = Build_SuperScalar_Program(gen) // build a superscalar program
|
||||
}
|
||||
vm := c.VM_Initialize()
|
||||
|
||||
var output_hash [32]byte
|
||||
vm.CalculateHash(tt.input, output_hash[:])
|
||||
|
||||
actual := fmt.Sprintf("%x", output_hash)
|
||||
if actual != tt.expected {
|
||||
t.Errorf("Fib(%d): expected %s, actual %s", tt.key, tt.expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
998
superscalar.go
Normal file
998
superscalar.go
Normal file
|
@ -0,0 +1,998 @@
|
|||
package randomx
|
||||
|
||||
import "fmt"
|
||||
import "math"
|
||||
import "math/bits"
|
||||
|
||||
type ExecutionPort byte
|
||||
|
||||
const (
|
||||
Null ExecutionPort = iota
|
||||
P0 = 1
|
||||
P1 = 2
|
||||
P5 = 4
|
||||
P01 = P0 | P1
|
||||
P05 = P0 | P5
|
||||
P015 = P0 | P1 | P5
|
||||
)
|
||||
|
||||
type MacroOP struct {
|
||||
Name string
|
||||
Size int
|
||||
Latency int
|
||||
UOP1 ExecutionPort
|
||||
UOP2 ExecutionPort
|
||||
Dependent bool
|
||||
}
|
||||
|
||||
func (m *MacroOP) GetSize() int {
|
||||
return m.Size
|
||||
}
|
||||
func (m *MacroOP) GetLatency() int {
|
||||
return m.Latency
|
||||
}
|
||||
func (m *MacroOP) GetUOP1() ExecutionPort {
|
||||
return m.UOP1
|
||||
}
|
||||
func (m *MacroOP) GetUOP2() ExecutionPort {
|
||||
return m.UOP2
|
||||
}
|
||||
|
||||
func (m *MacroOP) IsSimple() bool {
|
||||
return m.UOP2 == Null
|
||||
}
|
||||
|
||||
func (m *MacroOP) IsEliminated() bool {
|
||||
return m.UOP1 == Null
|
||||
}
|
||||
|
||||
func (m *MacroOP) IsDependent() bool {
|
||||
return m.Dependent
|
||||
}
|
||||
|
||||
// 3 byte instructions
|
||||
var M_NOP = MacroOP{"NOP", 0, 0, Null, Null, false}
|
||||
var M_Add_rr = MacroOP{"add r,r", 3, 1, P015, Null, false}
|
||||
var M_Sub_rr = MacroOP{"sub r,r", 3, 1, P015, Null, false}
|
||||
var M_Xor_rr = MacroOP{"xor r,r", 3, 1, P015, Null, false}
|
||||
var M_Imul_r = MacroOP{"imul r", 3, 4, P1, P5, false}
|
||||
var M_Mul_r = MacroOP{"mul r", 3, 4, P1, P5, false}
|
||||
var M_Mov_rr = MacroOP{"mov r,r", 3, 0, Null, Null, false}
|
||||
|
||||
// latency is 1 lower
|
||||
var M_Imul_r_dependent = MacroOP{"imul r", 3, 3, P1, Null, true} // this is the dependent version where current instruction depends on previous instruction
|
||||
|
||||
//Size: 4 bytes
|
||||
var M_Lea_SIB = MacroOP{"lea r,r+r*s", 4, 1, P01, Null, false}
|
||||
var M_Imul_rr = MacroOP{"imul r,r", 4, 3, P1, Null, false}
|
||||
var M_Ror_ri = MacroOP{"ror r,i", 4, 1, P05, Null, false}
|
||||
|
||||
//Size: 7 bytes (can be optionally padded with nop to 8 or 9 bytes)
|
||||
var M_Add_ri = MacroOP{"add r,i", 7, 1, P015, Null, false}
|
||||
var M_Xor_ri = MacroOP{"xor r,i", 7, 1, P015, Null, false}
|
||||
|
||||
//Size: 10 bytes
|
||||
var M_Mov_ri64 = MacroOP{"mov rax,i64", 10, 1, P015, Null, false}
|
||||
|
||||
// unused are not implemented
|
||||
|
||||
type Instruction struct {
|
||||
Name string
|
||||
Opcode byte
|
||||
UOP MacroOP
|
||||
SrcOP int
|
||||
ResultOP int
|
||||
DstOP int
|
||||
UOP_Array []MacroOP
|
||||
}
|
||||
|
||||
func (ins *Instruction) GetUOPCount() int {
|
||||
if len(ins.UOP_Array) != 0 {
|
||||
return len(ins.UOP_Array)
|
||||
} else {
|
||||
if ins.Name == "NOP" { // nop is assumed to be zero bytes
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
func (ins *Instruction) GetSize() int {
|
||||
|
||||
if len(ins.UOP_Array) != 0 {
|
||||
sum_size := 0
|
||||
for i := range ins.UOP_Array {
|
||||
sum_size += ins.UOP_Array[i].GetSize()
|
||||
}
|
||||
return sum_size
|
||||
} else {
|
||||
return ins.UOP.GetSize()
|
||||
}
|
||||
}
|
||||
|
||||
func (ins *Instruction) IsSimple() bool {
|
||||
if ins.GetSize() == 1 {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ins *Instruction) GetLatency() int {
|
||||
if len(ins.UOP_Array) != 0 {
|
||||
sum := 0
|
||||
for i := range ins.UOP_Array {
|
||||
sum += ins.UOP_Array[i].GetLatency()
|
||||
}
|
||||
return sum
|
||||
} else {
|
||||
return ins.UOP.GetLatency()
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
S_INVALID int = -1
|
||||
S_ISUB_R = 0
|
||||
S_IXOR_R = 1
|
||||
S_IADD_RS = 2
|
||||
S_IMUL_R = 3
|
||||
S_IROR_C = 4
|
||||
S_IADD_C7 = 5
|
||||
S_IXOR_C7 = 6
|
||||
S_IADD_C8 = 7
|
||||
S_IXOR_C8 = 8
|
||||
S_IADD_C9 = 9
|
||||
S_IXOR_C9 = 10
|
||||
S_IMULH_R = 11
|
||||
S_ISMULH_R = 12
|
||||
S_IMUL_RCP = 13
|
||||
)
|
||||
|
||||
var Opcode_To_String = map[int]string{S_INVALID: "INVALID",
|
||||
S_ISUB_R: "ISUB_R",
|
||||
S_IXOR_R: "IXOR_R",
|
||||
S_IADD_RS: "IADD_RS",
|
||||
S_IMUL_R: "IMUL_R",
|
||||
S_IROR_C: "IROR_C",
|
||||
S_IADD_C7: "IADD_C7",
|
||||
S_IXOR_C7: "IXOR_C7",
|
||||
S_IADD_C8: "IADD_C8",
|
||||
S_IXOR_C8: "IXOR_C8",
|
||||
S_IADD_C9: "IADD_C9",
|
||||
S_IXOR_C9: "IXOR_C9",
|
||||
S_IMULH_R: "IMULH_R",
|
||||
S_ISMULH_R: "ISMULH_R",
|
||||
S_IMUL_RCP: "IMUL_RCP",
|
||||
}
|
||||
|
||||
// SrcOP/DstOp are used to selected registers
|
||||
var ISUB_R = Instruction{Name: "ISUB_R", Opcode: S_ISUB_R, UOP: M_Sub_rr, SrcOP: 0}
|
||||
var IXOR_R = Instruction{Name: "IXOR_R", Opcode: S_IXOR_R, UOP: M_Xor_rr, SrcOP: 0}
|
||||
var IADD_RS = Instruction{Name: "IADD_RS", Opcode: S_IADD_RS, UOP: M_Lea_SIB, SrcOP: 0}
|
||||
var IMUL_R = Instruction{Name: "IMUL_R", Opcode: S_IMUL_R, UOP: M_Imul_rr, SrcOP: 0}
|
||||
var IROR_C = Instruction{Name: "IROR_C", Opcode: S_IROR_C, UOP: M_Ror_ri, SrcOP: -1}
|
||||
|
||||
var IADD_C7 = Instruction{Name: "IADD_C7", Opcode: S_IADD_C7, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C7 = Instruction{Name: "IXOR_C7", Opcode: S_IXOR_C7, UOP: M_Xor_ri, SrcOP: -1}
|
||||
var IADD_C8 = Instruction{Name: "IADD_C8", Opcode: S_IADD_C8, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C8 = Instruction{Name: "IXOR_C8", Opcode: S_IXOR_C8, UOP: M_Xor_ri, SrcOP: -1}
|
||||
var IADD_C9 = Instruction{Name: "IADD_C9", Opcode: S_IADD_C9, UOP: M_Add_ri, SrcOP: -1}
|
||||
var IXOR_C9 = Instruction{Name: "IXOR_C9", Opcode: S_IXOR_C9, UOP: M_Xor_ri, SrcOP: -1}
|
||||
|
||||
var IMULH_R = Instruction{Name: "IMULH_R", Opcode: S_IMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Mul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
|
||||
var ISMULH_R = Instruction{Name: "ISMULH_R", Opcode: S_ISMULH_R, UOP_Array: []MacroOP{M_Mov_rr, M_Imul_r, M_Mov_rr}, ResultOP: 1, DstOP: 0, SrcOP: 1}
|
||||
var IMUL_RCP = Instruction{Name: "IMUL_RCP", Opcode: S_IMUL_RCP, UOP_Array: []MacroOP{M_Mov_ri64, M_Imul_r_dependent}, ResultOP: 1, DstOP: 1, SrcOP: -1}
|
||||
|
||||
var INOP = Instruction{Name: "NOP", UOP: M_NOP}
|
||||
|
||||
// how random 16 bytes are split into instructions
|
||||
var buffer0 = []int{4, 8, 4}
|
||||
var buffer1 = []int{7, 3, 3, 3}
|
||||
var buffer2 = []int{3, 7, 3, 3}
|
||||
var buffer3 = []int{4, 9, 3}
|
||||
var buffer4 = []int{4, 4, 4, 4}
|
||||
var buffer5 = []int{3, 3, 10}
|
||||
|
||||
var Decoder_To_Instruction_Length = [][]int{{4, 8, 4},
|
||||
{7, 3, 3, 3},
|
||||
{3, 7, 3, 3},
|
||||
{4, 9, 3},
|
||||
{4, 4, 4, 4},
|
||||
{3, 3, 10}}
|
||||
|
||||
type DecoderType int
|
||||
|
||||
const Decoder484 DecoderType = 0
|
||||
const Decoder7333 DecoderType = 1
|
||||
const Decoder3733 DecoderType = 2
|
||||
const Decoder493 DecoderType = 3
|
||||
const Decoder4444 DecoderType = 4
|
||||
const Decoder3310 DecoderType = 5
|
||||
|
||||
func (d DecoderType) GetSize() int {
|
||||
switch d {
|
||||
case Decoder484:
|
||||
return 3
|
||||
case Decoder7333:
|
||||
return 4
|
||||
case Decoder3733:
|
||||
return 4
|
||||
case Decoder493:
|
||||
return 3
|
||||
case Decoder4444:
|
||||
return 4
|
||||
case Decoder3310:
|
||||
return 3
|
||||
|
||||
default:
|
||||
panic("unknown decoder")
|
||||
}
|
||||
}
|
||||
func (d DecoderType) String() string {
|
||||
switch d {
|
||||
case Decoder484:
|
||||
return "Decoder484"
|
||||
case Decoder7333:
|
||||
return "Decoder7333"
|
||||
case Decoder3733:
|
||||
return "Decoder3733"
|
||||
case Decoder493:
|
||||
return "Decoder493"
|
||||
case Decoder4444:
|
||||
return "Decoder4444"
|
||||
case Decoder3310:
|
||||
return "Decoder3310"
|
||||
|
||||
default:
|
||||
panic("unknown decoder")
|
||||
}
|
||||
}
|
||||
|
||||
func FetchNextDecoder(ins *Instruction, cycle int, mulcount int, gen *Blake2Generator) DecoderType {
|
||||
|
||||
if ins.Name == IMULH_R.Name || ins.Name == ISMULH_R.Name {
|
||||
return Decoder3310
|
||||
}
|
||||
|
||||
// make sure multiplication port is satured, if number of multiplications les less than number of cycles, a 4444 is returned
|
||||
if mulcount < (cycle + 1) {
|
||||
return Decoder4444
|
||||
}
|
||||
|
||||
if ins.Name == IMUL_RCP.Name {
|
||||
if gen.GetByte()&1 == 1 {
|
||||
return Decoder484
|
||||
} else {
|
||||
return Decoder493
|
||||
}
|
||||
}
|
||||
|
||||
// we are here means selecta decoded randomly
|
||||
rnd_byte := gen.GetByte()
|
||||
|
||||
switch rnd_byte & 3 {
|
||||
case 0:
|
||||
return Decoder484
|
||||
case 1:
|
||||
return Decoder7333
|
||||
case 2:
|
||||
return Decoder3733
|
||||
case 3:
|
||||
return Decoder493
|
||||
}
|
||||
|
||||
panic("can never reach")
|
||||
return Decoder484
|
||||
}
|
||||
|
||||
var slot3 = []*Instruction{&ISUB_R, &IXOR_R} // 3 length instruction will be filled with these
|
||||
var slot3L = []*Instruction{&ISUB_R, &IXOR_R, &IMULH_R, &ISMULH_R}
|
||||
|
||||
var slot4 = []*Instruction{&IROR_C, &IADD_RS}
|
||||
var slot7 = []*Instruction{&IXOR_C7, &IADD_C7}
|
||||
var slot8 = []*Instruction{&IXOR_C8, &IADD_C8}
|
||||
var slot9 = []*Instruction{&IXOR_C9, &IADD_C9}
|
||||
var slot10 = []*Instruction{&IMUL_RCP}
|
||||
|
||||
// superscalar program is built with superscalara instructions
|
||||
type SuperScalarInstruction struct {
|
||||
Opcode byte
|
||||
Dst_Reg int
|
||||
Src_Reg int
|
||||
Mod byte
|
||||
Imm32 uint32
|
||||
Type int
|
||||
Name string
|
||||
OpGroup int
|
||||
OpGroupPar int
|
||||
GroupParIsSource int
|
||||
ins *Instruction
|
||||
CanReuse bool
|
||||
}
|
||||
|
||||
func (sins SuperScalarInstruction) String() string {
|
||||
result := fmt.Sprintf("; %10s %2d ", sins.Name, sins.Opcode)
|
||||
result += fmt.Sprintf("dst r%d ", sins.Dst_Reg)
|
||||
|
||||
if sins.Src_Reg >= 0 {
|
||||
result += fmt.Sprintf("src r%d ", sins.Src_Reg)
|
||||
} else {
|
||||
result += fmt.Sprintf("src r%d ", sins.Dst_Reg)
|
||||
}
|
||||
|
||||
result += fmt.Sprintf("Mod %08x ", sins.Mod)
|
||||
result += fmt.Sprintf("Imm %08x ", sins.Imm32)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) FixSrcReg() {
|
||||
if sins.Src_Reg >= 0 {
|
||||
// do nothing
|
||||
} else {
|
||||
sins.Src_Reg = sins.Dst_Reg
|
||||
}
|
||||
|
||||
}
|
||||
func (sins *SuperScalarInstruction) Reset() {
|
||||
sins.Opcode = 99
|
||||
sins.Src_Reg = -1
|
||||
sins.Dst_Reg = -1
|
||||
sins.CanReuse = false
|
||||
sins.GroupParIsSource = 0
|
||||
}
|
||||
func create(sins *SuperScalarInstruction, ins *Instruction, gen *Blake2Generator) {
|
||||
sins.Reset()
|
||||
sins.ins = ins
|
||||
sins.Name = ins.Name
|
||||
sins.OpGroupPar = -1
|
||||
sins.Opcode = ins.Opcode
|
||||
|
||||
switch ins.Name {
|
||||
case ISUB_R.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case IXOR_R.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IXOR_R
|
||||
sins.GroupParIsSource = 1
|
||||
case IADD_RS.Name:
|
||||
fmt.Printf("q %s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = gen.GetByte()
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IADD_RS
|
||||
sins.GroupParIsSource = 1
|
||||
case IMUL_R.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMUL_R
|
||||
sins.GroupParIsSource = 1
|
||||
case IROR_C.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = 0
|
||||
|
||||
for sins.Imm32 = 0; sins.Imm32 == 0; {
|
||||
sins.Imm32 = uint32(gen.GetByte() & 63)
|
||||
}
|
||||
|
||||
sins.OpGroup = S_IROR_C
|
||||
sins.OpGroupPar = -1
|
||||
case IADD_C7.Name, IADD_C8.Name, IADD_C9.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IADD_C7
|
||||
sins.OpGroupPar = -1
|
||||
case IXOR_C7.Name, IXOR_C8.Name, IXOR_C9.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
sins.OpGroup = S_IXOR_C7
|
||||
sins.OpGroupPar = -1
|
||||
|
||||
case IMULH_R.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_IMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
case ISMULH_R.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
sins.CanReuse = true
|
||||
sins.Mod = 0
|
||||
sins.Imm32 = 0
|
||||
sins.OpGroup = S_ISMULH_R
|
||||
sins.OpGroupPar = int(gen.GetUint32())
|
||||
|
||||
case IMUL_RCP.Name:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
sins.Name = ins.Name
|
||||
|
||||
sins.Mod = 0
|
||||
for {
|
||||
sins.Imm32 = gen.GetUint32()
|
||||
if (sins.Imm32&sins.Imm32 - 1) != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
sins.OpGroup = S_IMUL_RCP
|
||||
|
||||
default:
|
||||
fmt.Printf("%s \n", ins.Name)
|
||||
panic("should not occur")
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
func CreateSuperScalarInstruction(sins *SuperScalarInstruction, gen *Blake2Generator, instruction_len int, decoder_type int, islast, isfirst bool) {
|
||||
|
||||
fmt.Printf("instruction len %d\n", instruction_len)
|
||||
switch instruction_len {
|
||||
case 3:
|
||||
if islast {
|
||||
create(sins, slot3L[gen.GetByte()&3], gen)
|
||||
} else {
|
||||
create(sins, slot3[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 4:
|
||||
//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
|
||||
if decoder_type == int(Decoder4444) && !islast {
|
||||
create(sins, &IMUL_R, gen)
|
||||
} else {
|
||||
create(sins, slot4[gen.GetByte()&1], gen)
|
||||
}
|
||||
case 7:
|
||||
create(sins, slot7[gen.GetByte()&1], gen)
|
||||
|
||||
case 8:
|
||||
fmt.Printf("creating 8\n")
|
||||
create(sins, slot8[gen.GetByte()&1], gen)
|
||||
|
||||
case 9:
|
||||
create(sins, slot9[gen.GetByte()&1], gen)
|
||||
case 10:
|
||||
create(sins, slot10[0], gen)
|
||||
|
||||
default:
|
||||
panic("should not be possible")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type SuperScalarProgram struct {
|
||||
Ins []SuperScalarInstruction // all instructions of program
|
||||
AddressReg int
|
||||
}
|
||||
|
||||
func Build_SuperScalar_Program(gen *Blake2Generator) *SuperScalarProgram {
|
||||
cycle := 0
|
||||
depcycle := 0
|
||||
retire_cycle := 0
|
||||
mulcount := 0
|
||||
ports_saturated := false
|
||||
program_size := 0
|
||||
current_instruction := INOP
|
||||
macro_op_index := 0
|
||||
macro_op_count := 0
|
||||
throwAwayCount := 0
|
||||
code_size := 0
|
||||
var program SuperScalarProgram
|
||||
|
||||
registers := make([]Register, 8, 8)
|
||||
|
||||
sins := &SuperScalarInstruction{}
|
||||
sins.ins = &Instruction{Name: "NOP"}
|
||||
|
||||
portbusy := make([][]int, CYCLE_MAP_SIZE)
|
||||
for i := range portbusy {
|
||||
portbusy[i] = make([]int, 3)
|
||||
}
|
||||
|
||||
done := 0
|
||||
|
||||
for decode_cycle := 0; decode_cycle < RANDOMX_SUPERSCALAR_LATENCY && !ports_saturated && program_size < SuperscalarMaxSize; decode_cycle++ {
|
||||
|
||||
decoder := FetchNextDecoder(sins.ins, decode_cycle, mulcount, gen)
|
||||
|
||||
fmt.Printf("; ------------- fetch cycle %d (%s)\n", cycle, decoder)
|
||||
|
||||
if cycle == 51 {
|
||||
// break
|
||||
}
|
||||
|
||||
/* for i := range portbusy {
|
||||
for j := range portbusy[i]{
|
||||
portbusy[i][j]=false
|
||||
}
|
||||
}*/
|
||||
|
||||
buffer_index := 0
|
||||
|
||||
for buffer_index < decoder.GetSize() { // generate instructions for the current decoder
|
||||
top_cycle := cycle
|
||||
|
||||
fmt.Printf("macro_op_index %d current_instruction %s actual instruction uop %d\n", macro_op_index, current_instruction.Name, sins.ins.GetUOPCount())
|
||||
|
||||
if macro_op_index >= sins.ins.GetUOPCount() {
|
||||
if ports_saturated || program_size >= SuperscalarMaxSize {
|
||||
//panic("breaking off") program built successfully
|
||||
break
|
||||
}
|
||||
CreateSuperScalarInstruction(sins, gen, Decoder_To_Instruction_Length[int(decoder)][buffer_index], int(decoder), len(Decoder_To_Instruction_Length[decoder]) == (buffer_index+1), buffer_index == 0)
|
||||
macro_op_index = 0
|
||||
|
||||
}
|
||||
|
||||
mop := sins.ins.UOP
|
||||
if sins.ins.GetUOPCount() == 1 {
|
||||
|
||||
} else {
|
||||
mop = sins.ins.UOP_Array[macro_op_index]
|
||||
}
|
||||
|
||||
fmt.Printf("MOP name %s depcycle %d\n", mop.Name, depcycle)
|
||||
|
||||
//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
|
||||
scheduleCycle := ScheduleMop(&mop, portbusy, cycle, depcycle, false)
|
||||
if scheduleCycle < 0 {
|
||||
fmt.Printf("Unable to map operation %s to execution port (cycle %d)", mop.Name, cycle)
|
||||
//__debugbreak();
|
||||
ports_saturated = true
|
||||
break
|
||||
}
|
||||
|
||||
fmt.Printf("scheduleCycle %d\n", scheduleCycle)
|
||||
|
||||
if macro_op_index == sins.ins.SrcOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectSource(scheduleCycle, registers, gen); forward++ {
|
||||
fmt.Printf(";src STALL at cycle %d\n", cycle)
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
||||
if forward == LOOK_FORWARD_CYCLES {
|
||||
if throwAwayCount < MAX_THROWAWAY_COUNT {
|
||||
throwAwayCount++
|
||||
macro_op_index = sins.ins.GetUOPCount()
|
||||
fmt.Printf(";throwAway %s\n", sins.Name)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("aborting at cycle %d source registers not available", cycle)
|
||||
break
|
||||
}
|
||||
|
||||
fmt.Printf("; src = r%d\n", sins.Src_Reg)
|
||||
|
||||
}
|
||||
|
||||
if macro_op_index == sins.ins.DstOP { // FIXME
|
||||
forward := 0
|
||||
for ; forward < LOOK_FORWARD_CYCLES && !sins.SelectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); forward++ {
|
||||
fmt.Printf(";dst STALL at cycle %d\n", cycle)
|
||||
scheduleCycle++
|
||||
cycle++
|
||||
}
|
||||
|
||||
if forward == LOOK_FORWARD_CYCLES {
|
||||
if throwAwayCount < MAX_THROWAWAY_COUNT {
|
||||
throwAwayCount++
|
||||
macro_op_index = sins.ins.GetUOPCount()
|
||||
fmt.Printf(";throwAway %s\n", sins.Name)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("aborting at cycle %d destination registers not available", cycle)
|
||||
break
|
||||
}
|
||||
|
||||
fmt.Printf("; dst = r%d\n", sins.Dst_Reg)
|
||||
|
||||
}
|
||||
throwAwayCount = 0
|
||||
// recalculate when the instruction can be scheduled based on operand availability
|
||||
scheduleCycle = ScheduleMop(&mop, portbusy, scheduleCycle, scheduleCycle, true)
|
||||
|
||||
depcycle = scheduleCycle + mop.GetLatency() // calculate when will the result be ready
|
||||
|
||||
if macro_op_index == sins.ins.ResultOP { // fix me
|
||||
retire_cycle = depcycle
|
||||
fmt.Printf("; RETIRED at cycle %d Dst_Reg %d\n", retire_cycle, sins.Dst_Reg)
|
||||
registers[sins.Dst_Reg].Latency = depcycle
|
||||
registers[sins.Dst_Reg].LastOpGroup = sins.OpGroup
|
||||
registers[sins.Dst_Reg].LastOpPar = sins.OpGroupPar
|
||||
|
||||
}
|
||||
|
||||
code_size += mop.GetSize()
|
||||
buffer_index++
|
||||
macro_op_index++
|
||||
macro_op_count++
|
||||
|
||||
// terminating condition for 99% case
|
||||
if scheduleCycle >= RANDOMX_SUPERSCALAR_LATENCY {
|
||||
ports_saturated = true
|
||||
}
|
||||
cycle = top_cycle
|
||||
|
||||
// when all uops of current instruction have been issued, add the instruction to supercalara program
|
||||
if macro_op_index >= sins.ins.GetUOPCount() {
|
||||
sins.FixSrcReg() // fix src register once and for all
|
||||
program.Ins = append(program.Ins, *sins)
|
||||
|
||||
if sins.ins.Name == "IMUL_R" || sins.ins.Name == "IMULH_R" || sins.ins.Name == "ISMULH_R" || sins.ins.Name == "IMUL_RCP" {
|
||||
mulcount++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
done++
|
||||
|
||||
// if done >= 20 {break}
|
||||
|
||||
}
|
||||
cycle++
|
||||
}
|
||||
|
||||
for i := range program.Ins {
|
||||
fmt.Printf("%d %s\n", i, program.Ins[i].String())
|
||||
}
|
||||
|
||||
var asic_latencies [8]int
|
||||
|
||||
for i := range program.Ins {
|
||||
//fmt.Printf("%d %s\n",i ,program[i].String() )
|
||||
lastdst := asic_latencies[program.Ins[i].Dst_Reg] + 1
|
||||
lastsrc := 0
|
||||
if program.Ins[i].Dst_Reg != program.Ins[i].Src_Reg {
|
||||
lastsrc = asic_latencies[program.Ins[i].Src_Reg] + 1
|
||||
}
|
||||
asic_latencies[program.Ins[i].Dst_Reg] = Max(lastdst, lastsrc)
|
||||
}
|
||||
|
||||
asic_latency_max := 0
|
||||
address_reg := 0
|
||||
|
||||
for i := range asic_latencies {
|
||||
fmt.Printf("latency[%d] %d\n", i, asic_latencies[i])
|
||||
if asic_latencies[i] > asic_latency_max {
|
||||
asic_latency_max = asic_latencies[i]
|
||||
address_reg = i
|
||||
}
|
||||
}
|
||||
|
||||
program.AddressReg = address_reg
|
||||
|
||||
fmt.Printf("address_reg %d\n", address_reg)
|
||||
|
||||
return &program
|
||||
|
||||
}
|
||||
|
||||
const CYCLE_MAP_SIZE int = RANDOMX_SUPERSCALAR_LATENCY + 4
|
||||
const LOOK_FORWARD_CYCLES int = 4
|
||||
const MAX_THROWAWAY_COUNT int = 256
|
||||
|
||||
// schedule the uop as early as possible
|
||||
func ScheduleUop(uop ExecutionPort, portbusy [][]int, cycle int, commit bool) int {
|
||||
//cycle++
|
||||
for ; cycle < CYCLE_MAP_SIZE; cycle++ { // since cycle is value based, its restored on return
|
||||
//fmt.Printf("port busy %+v\n", portbusy[cycle])
|
||||
fmt.Printf("current cycle %d portbusy %+v commit %+v\n", cycle, portbusy[cycle], commit)
|
||||
if (uop&P5) != 0 && portbusy[cycle][2] == 0 {
|
||||
if commit {
|
||||
fmt.Printf("; P5 at cycle %d\n", cycle)
|
||||
portbusy[cycle][2] = int(uop)
|
||||
}
|
||||
fmt.Printf("P5 available\n")
|
||||
return cycle
|
||||
}
|
||||
if (uop&P0) != 0 && portbusy[cycle][0] == 0 {
|
||||
if commit {
|
||||
fmt.Printf("; P0 at cycle %d\n", cycle)
|
||||
portbusy[cycle][0] = int(uop)
|
||||
}
|
||||
fmt.Printf("P0 available\n")
|
||||
return cycle
|
||||
}
|
||||
if (uop&P1) != 0 && portbusy[cycle][1] == 0 {
|
||||
if commit {
|
||||
fmt.Printf("; P1 at cycle %d\n", cycle)
|
||||
portbusy[cycle][1] = int(uop)
|
||||
}
|
||||
fmt.Printf("P1 available\n")
|
||||
return cycle
|
||||
}
|
||||
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func ScheduleMop(mop *MacroOP, portbusy [][]int, cycle int, depcycle int, commit bool) int {
|
||||
|
||||
if mop.IsDependent() {
|
||||
fmt.Printf("dependent\n")
|
||||
cycle = Max(cycle, depcycle)
|
||||
}
|
||||
|
||||
if mop.IsEliminated() {
|
||||
if commit {
|
||||
fmt.Printf("; (eliminated)\n")
|
||||
}
|
||||
return cycle
|
||||
} else if mop.IsSimple() {
|
||||
fmt.Printf("simple 1\n")
|
||||
|
||||
return ScheduleUop(mop.GetUOP1(), portbusy, cycle, commit)
|
||||
} else {
|
||||
for ; cycle < CYCLE_MAP_SIZE; cycle++ { // since cycle is value based, its restored on return
|
||||
cycle1 := ScheduleUop(mop.GetUOP1(), portbusy, cycle, false)
|
||||
cycle2 := ScheduleUop(mop.GetUOP2(), portbusy, cycle, false)
|
||||
|
||||
if cycle1 == cycle2 {
|
||||
if commit {
|
||||
ScheduleUop(mop.GetUOP1(), portbusy, cycle, true)
|
||||
ScheduleUop(mop.GetUOP2(), portbusy, cycle, true)
|
||||
}
|
||||
return cycle1
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
// Max returns the larger of x or y.
|
||||
func Max(x, y int) int {
|
||||
if x < y {
|
||||
return y
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
type Register struct {
|
||||
Value uint64
|
||||
Latency int
|
||||
LastOpGroup int
|
||||
LastOpPar int //-1 = immediate , 0 to 7 register
|
||||
Status int // can be RegisterNeedsDisplacement = 5; //x86 r13 register
|
||||
//RegisterNeedsSib = 4; //x86 r12 register
|
||||
}
|
||||
|
||||
const RegisterNeedsDisplacement = 5
|
||||
const RegisterNeedsSib = 4
|
||||
|
||||
func (sins *SuperScalarInstruction) SelectSource(cycle int, Registers []Register, gen *Blake2Generator) bool {
|
||||
var available_registers []int
|
||||
|
||||
for i := range Registers {
|
||||
fmt.Printf("\nchecking s reg %d latency %d cycle %d", i, Registers[i].Latency, cycle)
|
||||
if Registers[i].Latency <= cycle {
|
||||
available_registers = append(available_registers, i)
|
||||
fmt.Printf("available")
|
||||
}
|
||||
}
|
||||
|
||||
if len(available_registers) == 2 && sins.Name == "IADD_RS" {
|
||||
if available_registers[0] == RegisterNeedsDisplacement || available_registers[1] == RegisterNeedsDisplacement {
|
||||
sins.Src_Reg = RegisterNeedsDisplacement
|
||||
sins.OpGroupPar = sins.Src_Reg
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if selectRegister(available_registers, gen, &sins.Src_Reg) {
|
||||
|
||||
if sins.GroupParIsSource == 0 {
|
||||
|
||||
} else {
|
||||
sins.OpGroupPar = sins.Src_Reg
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (sins *SuperScalarInstruction) SelectDestination(cycle int, allowChainedMul bool, Registers []Register, gen *Blake2Generator) bool {
|
||||
var available_registers []int
|
||||
|
||||
for i := range Registers {
|
||||
fmt.Printf("\nchecking d reg %d cycle %d CanReuse %+v src %d latency %d chained_mul %+v | ", i, cycle, sins.CanReuse, sins.Src_Reg, Registers[i].Latency, allowChainedMul)
|
||||
fmt.Printf("%+v %+v %+v %+v %+v ", Registers[i].Latency <= cycle,
|
||||
(sins.CanReuse || i != sins.Src_Reg),
|
||||
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R),
|
||||
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar),
|
||||
(sins.Name != "IADD_RS" || i != RegisterNeedsDisplacement))
|
||||
//fmt.Printf("qq %+v %+v %+v qq",allowChainedMul, sins.OpGroup != S_IMUL_R, Registers[i].LastOpGroup != S_IMUL_R )
|
||||
fmt.Printf("yy %+v %+v yy ", Registers[i].LastOpPar, sins.OpGroupPar)
|
||||
|
||||
if Registers[i].Latency <= cycle && (sins.CanReuse || i != sins.Src_Reg) &&
|
||||
(allowChainedMul || sins.OpGroup != S_IMUL_R || Registers[i].LastOpGroup != S_IMUL_R) &&
|
||||
(Registers[i].LastOpGroup != sins.OpGroup || Registers[i].LastOpPar != sins.OpGroupPar) &&
|
||||
(sins.Name != "IADD_RS" || i != RegisterNeedsDisplacement) {
|
||||
available_registers = append(available_registers, i)
|
||||
fmt.Printf("available ")
|
||||
}
|
||||
}
|
||||
|
||||
return selectRegister(available_registers, gen, &sins.Dst_Reg)
|
||||
}
|
||||
|
||||
func selectRegister(available_registers []int, gen *Blake2Generator, reg *int) bool {
|
||||
index := 0
|
||||
if len(available_registers) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(available_registers) > 1 {
|
||||
tmp := gen.GetUint32()
|
||||
// fmt.Printf("GetUint32 %d len %d \n", tmp,uint32(len(available_registers)))
|
||||
|
||||
index = int(tmp % uint32(len(available_registers)))
|
||||
} else {
|
||||
index = 0
|
||||
}
|
||||
fmt.Printf("reg index %d\n", index)
|
||||
*reg = available_registers[index] // availableRegisters[index];
|
||||
return true
|
||||
}
|
||||
|
||||
const Mask = CacheSize/CacheLineSize - 1
|
||||
|
||||
func getMixBlock(register_value uint64, memory []byte) uint64 {
|
||||
return (register_value * Mask) * CacheLineSize
|
||||
}
|
||||
|
||||
const superscalarMul0 uint64 = 6364136223846793005
|
||||
const superscalarAdd1 uint64 = 9298411001130361340
|
||||
const superscalarAdd2 uint64 = 12065312585734608966
|
||||
const superscalarAdd3 uint64 = 9306329213124626780
|
||||
const superscalarAdd4 uint64 = 5281919268842080866
|
||||
const superscalarAdd5 uint64 = 10536153434571861004
|
||||
const superscalarAdd6 uint64 = 3398623926847679864
|
||||
const superscalarAdd7 uint64 = 9549104520008361294
|
||||
|
||||
func (cache *Randomx_Cache) InitDatasetItem(out []uint64, itemnumber uint64) {
|
||||
var rl_array, mix_array [8]uint64
|
||||
rl := rl_array[:]
|
||||
mix_block := mix_array[:]
|
||||
register_value := itemnumber
|
||||
_ = register_value
|
||||
|
||||
rl[0] = (itemnumber + 1) * superscalarMul0
|
||||
rl[1] = rl[0] ^ superscalarAdd1
|
||||
rl[2] = rl[0] ^ superscalarAdd2
|
||||
rl[3] = rl[0] ^ superscalarAdd3
|
||||
rl[4] = rl[0] ^ superscalarAdd4
|
||||
rl[5] = rl[0] ^ superscalarAdd5
|
||||
rl[6] = rl[0] ^ superscalarAdd6
|
||||
rl[7] = rl[0] ^ superscalarAdd7
|
||||
|
||||
for i := 0; i < RANDOMX_CACHE_ACCESSES; i++ {
|
||||
//mix_block_index := getMixBlock(register_value,nil)
|
||||
cache.Programs[i].executeSuperscalar_nocache(rl)
|
||||
|
||||
cache.GetBlock(register_value, mix_block)
|
||||
for q := range rl {
|
||||
// fmt.Printf("%d rl[%d] %16x mix %16x\n",i, q,rl[q], mix_block[q])
|
||||
rl[q] ^= mix_block[q]
|
||||
}
|
||||
|
||||
register_value = rl[cache.Programs[i].AddressReg]
|
||||
// fmt.Printf("%d\n",i)
|
||||
|
||||
}
|
||||
|
||||
for q := range rl {
|
||||
out[q] = rl[q]
|
||||
}
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) initDataset(start_item, end_item uint64) {
|
||||
for itemnumber := start_item; itemnumber < end_item; itemnumber++ {
|
||||
|
||||
cache.InitDatasetItem(nil, itemnumber)
|
||||
|
||||
// dataset_index += CacheLineSize
|
||||
fmt.Printf("exiting dataset item\n")
|
||||
break
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// execute the superscalar program
|
||||
func (p *SuperScalarProgram) executeSuperscalar_nocache(r []uint64) {
|
||||
for _, ins := range p.Ins {
|
||||
//fmt.Printf("%d %s\n",i ,program[i].String() )
|
||||
switch ins.Opcode {
|
||||
case S_ISUB_R:
|
||||
r[ins.Dst_Reg] -= r[ins.Src_Reg]
|
||||
case S_IXOR_R:
|
||||
r[ins.Dst_Reg] ^= r[ins.Src_Reg]
|
||||
case S_IADD_RS:
|
||||
mod_shift := (ins.Mod >> 2) % 4 // bits 2-3
|
||||
r[ins.Dst_Reg] += (r[ins.Src_Reg] << mod_shift)
|
||||
case S_IMUL_R:
|
||||
r[ins.Dst_Reg] *= r[ins.Src_Reg]
|
||||
case S_IROR_C:
|
||||
r[ins.Dst_Reg] = bits.RotateLeft64(r[ins.Dst_Reg], 0-int(ins.Imm32))
|
||||
// panic("check rotate right is working fine")
|
||||
case S_IADD_C7, S_IADD_C8, S_IADD_C9:
|
||||
r[ins.Dst_Reg] += signExtend2sCompl(ins.Imm32)
|
||||
case S_IXOR_C7, S_IXOR_C8, S_IXOR_C9:
|
||||
r[ins.Dst_Reg] ^= signExtend2sCompl(ins.Imm32)
|
||||
case S_IMULH_R:
|
||||
r[ins.Dst_Reg], _ = bits.Mul64(r[ins.Dst_Reg], r[ins.Src_Reg])
|
||||
case S_ISMULH_R:
|
||||
r[ins.Dst_Reg] = uint64(smulh(int64(r[ins.Dst_Reg]), int64(r[ins.Src_Reg])))
|
||||
case S_IMUL_RCP:
|
||||
r[ins.Dst_Reg] *= randomx_reciprocal(uint64(ins.Imm32))
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown opcode %d", ins.Opcode))
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func smulh(a, b int64) uint64 {
|
||||
hi_, _ := bits.Mul64(uint64(a), uint64(b))
|
||||
hi := int64(hi_)
|
||||
if a < 0 {
|
||||
hi -= b
|
||||
}
|
||||
if b < 0 {
|
||||
hi -= a
|
||||
}
|
||||
return uint64(hi)
|
||||
}
|
||||
|
||||
const p2exp63 uint64 = uint64(1) << 63
|
||||
|
||||
func randomx_reciprocal(divisor uint64) uint64 {
|
||||
quotient := p2exp63 / divisor
|
||||
remainder := p2exp63 % divisor
|
||||
|
||||
bsr := 0
|
||||
for bit := divisor; bit > 0; bit = bit >> 1 {
|
||||
bsr++
|
||||
}
|
||||
for shift := 0; shift < bsr; shift++ {
|
||||
if remainder >= divisor-remainder {
|
||||
quotient = quotient*2 + 1
|
||||
remainder = remainder*2 - divisor
|
||||
} else {
|
||||
quotient = quotient * 2
|
||||
remainder = remainder * 2
|
||||
}
|
||||
}
|
||||
return quotient
|
||||
}
|
||||
|
||||
func signExtend2sCompl(x uint32) uint64 {
|
||||
if -1 == (^0) {
|
||||
return uint64(int64(int32(x)))
|
||||
} else if x > math.MaxInt32 {
|
||||
return uint64(x) | 0xffffffff00000000
|
||||
} else {
|
||||
return uint64(x)
|
||||
}
|
||||
}
|
315
vm.go
Normal file
315
vm.go
Normal file
|
@ -0,0 +1,315 @@
|
|||
package randomx
|
||||
|
||||
import "fmt"
|
||||
import "math"
|
||||
import "math/big"
|
||||
import "math/bits"
|
||||
import "encoding/binary"
|
||||
import "golang.org/x/crypto/blake2b"
|
||||
|
||||
type REG struct {
|
||||
Hi uint64
|
||||
Lo uint64
|
||||
}
|
||||
|
||||
type VM struct {
|
||||
State_start [64]byte
|
||||
buffer [RANDOMX_PROGRAM_SIZE*8 + 16*8]byte // first 128 bytes are entropy below rest are program bytes
|
||||
Prog []byte
|
||||
ScratchPad []byte
|
||||
|
||||
ByteCode [RANDOMX_PROGRAM_SIZE]InstructionByteCode
|
||||
|
||||
// program configuration see program.hpp
|
||||
|
||||
entropy [16]uint64
|
||||
|
||||
reg REGISTER_FILE // the register file
|
||||
mem MemoryRegisters
|
||||
config Config // configuration
|
||||
datasetOffset uint64
|
||||
|
||||
RoundingMode big.RoundingMode
|
||||
|
||||
fresult, fdst, fsrc *big.Float
|
||||
|
||||
Cache *Randomx_Cache // randomx cache
|
||||
|
||||
}
|
||||
|
||||
func (cache *Randomx_Cache) VM_Initialize() *VM {
|
||||
|
||||
return &VM{Cache: cache, RoundingMode: big.ToNearestEven, fresult: &big.Float{}, fdst: &big.Float{}, fsrc: &big.Float{}} //// setup the cache
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
eMask [2]uint64
|
||||
readReg0, readReg1, readReg2, readReg3 uint64
|
||||
}
|
||||
|
||||
type REGISTER_FILE struct {
|
||||
r [8]uint64
|
||||
f [4][2]float64
|
||||
e [4][2]float64
|
||||
a [4][2]float64
|
||||
}
|
||||
type MemoryRegisters struct {
|
||||
mx, ma uint64 //addr_t mx, ma;
|
||||
mempry uint64 // uint8_t* memory = nullptr;
|
||||
}
|
||||
|
||||
const LOW = 0
|
||||
const HIGH = 1
|
||||
|
||||
// calculate hash based on input
|
||||
func (vm *VM) Run(input_hash []byte) {
|
||||
|
||||
var mix_block [8]uint64
|
||||
|
||||
fmt.Printf("%x \n", input_hash)
|
||||
|
||||
fillAes4Rx4(input_hash[:], vm.buffer[:])
|
||||
|
||||
for i := range vm.entropy {
|
||||
vm.entropy[i] = binary.LittleEndian.Uint64(vm.buffer[i*8:])
|
||||
}
|
||||
|
||||
vm.Prog = vm.buffer[len(vm.entropy)*8:]
|
||||
|
||||
for i := range vm.reg.r {
|
||||
vm.reg.r[i] = 0
|
||||
}
|
||||
|
||||
// do more initialization before we run
|
||||
|
||||
vm.reg.a[0][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[0]))
|
||||
vm.reg.a[0][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[1]))
|
||||
vm.reg.a[1][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[2]))
|
||||
vm.reg.a[1][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[3]))
|
||||
vm.reg.a[2][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[4]))
|
||||
vm.reg.a[2][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[5]))
|
||||
vm.reg.a[3][LOW] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[6]))
|
||||
vm.reg.a[3][HIGH] = math.Float64frombits(getSmallPositiveFloatBits(vm.entropy[7]))
|
||||
vm.mem.ma = vm.entropy[8] & CacheLineAlignMask
|
||||
vm.mem.mx = vm.entropy[10]
|
||||
addressRegisters := vm.entropy[12]
|
||||
vm.config.readReg0 = 0 + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
vm.config.readReg1 = 2 + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
vm.config.readReg2 = 4 + (addressRegisters & 1)
|
||||
addressRegisters >>= 1
|
||||
vm.config.readReg3 = 6 + (addressRegisters & 1)
|
||||
vm.datasetOffset = (vm.entropy[13] % (DATASETEXTRAITEMS + 1)) * CacheLineSize
|
||||
vm.config.eMask[0] = getFloatMask(vm.entropy[14])
|
||||
vm.config.eMask[1] = getFloatMask(vm.entropy[15])
|
||||
|
||||
fmt.Printf("prog %x entropy 0 %x %f \n", vm.buffer[:32], vm.entropy[0], vm.reg.a[0][HIGH])
|
||||
|
||||
vm.Compile_TO_Bytecode()
|
||||
|
||||
spAddr0 := vm.mem.mx
|
||||
spAddr1 := vm.mem.ma
|
||||
|
||||
for ic := 0; ic < RANDOMX_PROGRAM_ITERATIONS; ic++ {
|
||||
spMix := vm.reg.r[vm.config.readReg0] ^ vm.reg.r[vm.config.readReg1]
|
||||
|
||||
spAddr0 ^= spMix
|
||||
spAddr0 &= ScratchpadL3Mask64
|
||||
spAddr1 ^= spMix >> 32
|
||||
spAddr1 &= ScratchpadL3Mask64
|
||||
|
||||
//fmt.Printf("spAddr0 %x %x\n", spAddr0,spAddr1)
|
||||
|
||||
for i := uint64(0); i < REGISTERSCOUNT; i++ {
|
||||
vm.reg.r[i] ^= vm.Load64(spAddr0 + 8*i)
|
||||
//fmt.Printf("r[%d] %x \n", i,vm.reg.r[i]);
|
||||
}
|
||||
|
||||
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
|
||||
vm.reg.f[i][LOW] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*i)))
|
||||
vm.reg.f[i][HIGH] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*i + 4)))
|
||||
//fmt.Printf("lo %f %f\n", vm.reg.f[i][LOW] , vm.reg.f[i][HIGH] )
|
||||
}
|
||||
|
||||
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
|
||||
vm.reg.e[i][LOW] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*(i+REGISTERCOUNTFLT))))
|
||||
vm.reg.e[i][HIGH] = float64(unsigned32ToSigned2sCompl(vm.Load32(spAddr1 + 8*(i+REGISTERCOUNTFLT) + 4)))
|
||||
|
||||
// fmt.Printf("OR %x %x\n", (math.Float64bits(vm.reg.e[i][LOW]) & dynamicMantissaMask) | vm.config.eMask[LOW] , (math.Float64bits(vm.reg.e[i][HIGH]) & dynamicMantissaMask)| vm.config.eMask[HIGH] )
|
||||
|
||||
vm.reg.e[i][LOW] = math.Float64frombits((math.Float64bits(vm.reg.e[i][LOW]) & dynamicMantissaMask) | vm.config.eMask[LOW])
|
||||
vm.reg.e[i][HIGH] = math.Float64frombits((math.Float64bits(vm.reg.e[i][HIGH]) & dynamicMantissaMask) | vm.config.eMask[HIGH])
|
||||
|
||||
//fmt.Printf("lo e %f %f\n", vm.reg.e[i][LOW] , vm.reg.e[i][HIGH] )
|
||||
}
|
||||
|
||||
//for i := uint64(0); i < REGISTERCOUNTFLT; i++{
|
||||
//fmt.Printf("a low %f high %f\n", vm.reg.a[i][LOW] , vm.reg.a[i][HIGH] )
|
||||
//}
|
||||
|
||||
vm.InterpretByteCode()
|
||||
|
||||
vm.mem.mx ^= vm.reg.r[vm.config.readReg2] ^ vm.reg.r[vm.config.readReg3]
|
||||
vm.mem.mx &= CacheLineAlignMask
|
||||
|
||||
//fmt.Printf("mx %x\n",vm.mem.mx )
|
||||
|
||||
// execute diffuser superscalar program to get dataset 64 bytes
|
||||
{
|
||||
itemnumber := (vm.datasetOffset + vm.mem.ma) / CacheLineSize
|
||||
//fmt.Printf("qitem number %x\n", itemnumber)
|
||||
|
||||
vm.Cache.InitDatasetItem(mix_block[:], itemnumber)
|
||||
|
||||
for i := range vm.reg.r {
|
||||
vm.reg.r[i] ^= mix_block[i]
|
||||
}
|
||||
|
||||
}
|
||||
vm.mem.mx, vm.mem.ma = vm.mem.ma, vm.mem.mx // swap the elements
|
||||
|
||||
for i := uint64(0); i < REGISTERSCOUNT; i++ {
|
||||
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr1+(8*i):], bits.RotateLeft64(vm.reg.r[i], 32))
|
||||
|
||||
//fmt.Printf("reg r[%d] %x\n", i,vm.reg.r[i])
|
||||
|
||||
}
|
||||
|
||||
for i := uint64(0); i < REGISTERCOUNTFLT; i++ {
|
||||
vm.reg.f[i][LOW] = math.Float64frombits(math.Float64bits(vm.reg.f[i][LOW]) ^ math.Float64bits(vm.reg.e[i][LOW]))
|
||||
vm.reg.f[i][HIGH] = math.Float64frombits(math.Float64bits(vm.reg.f[i][HIGH]) ^ math.Float64bits(vm.reg.e[i][HIGH]))
|
||||
|
||||
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr0+(16*i):], bits.RotateLeft64(math.Float64bits(vm.reg.f[i][LOW]), 32))
|
||||
binary.BigEndian.PutUint64(vm.ScratchPad[spAddr0+(16*i)+8:], bits.RotateLeft64(math.Float64bits(vm.reg.f[i][HIGH]), 32))
|
||||
|
||||
// fmt.Printf("%d %+v\n", i, vm.reg.f[i])
|
||||
}
|
||||
|
||||
spAddr0 = 0
|
||||
spAddr1 = 0
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (vm *VM) CalculateHash(input []byte, output []byte) {
|
||||
var buf [8]byte
|
||||
|
||||
vm.RoundingMode = big.ToNearestEven // reset rounding mode if new hash eing calculated
|
||||
|
||||
input_hash := blake2b.Sum512(input)
|
||||
|
||||
vm.ScratchPad = make([]byte, ScratchpadSize, ScratchpadSize) // calculate and fill scratchpad
|
||||
fillAes1Rx4(input_hash[:], vm.ScratchPad)
|
||||
|
||||
hash512, _ := blake2b.New512(nil)
|
||||
|
||||
temp_hash := input_hash[:]
|
||||
|
||||
for chain := 0; chain < RANDOMX_PROGRAM_COUNT-1; chain++ {
|
||||
vm.Run(temp_hash)
|
||||
|
||||
hash512.Reset()
|
||||
for i := range vm.reg.r {
|
||||
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
for i := range vm.reg.f {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range vm.reg.e {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range vm.reg.a {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][LOW]))
|
||||
hash512.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.a[i][HIGH]))
|
||||
|
||||
hash512.Write(buf[:])
|
||||
}
|
||||
|
||||
temp_hash = hash512.Sum(nil)
|
||||
fmt.Printf("%d temphash %x\n", chain, temp_hash)
|
||||
}
|
||||
|
||||
// final loop executes here
|
||||
vm.Run(temp_hash)
|
||||
|
||||
// now hash the scratch pad and place into register a
|
||||
hashAes1Rx4(vm.ScratchPad, temp_hash)
|
||||
|
||||
hash256, _ := blake2b.New256(nil)
|
||||
|
||||
hash256.Reset()
|
||||
|
||||
for i := range vm.reg.r {
|
||||
binary.LittleEndian.PutUint64(buf[:], vm.reg.r[i])
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range vm.reg.f {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][LOW]))
|
||||
hash256.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.f[i][HIGH]))
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
|
||||
for i := range vm.reg.e {
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][LOW]))
|
||||
hash256.Write(buf[:])
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(vm.reg.e[i][HIGH]))
|
||||
hash256.Write(buf[:])
|
||||
}
|
||||
|
||||
// copy temp_hash as it first copied to register and then hashed
|
||||
hash256.Write(temp_hash)
|
||||
|
||||
final_hash := hash256.Sum(nil)
|
||||
|
||||
copy(output, final_hash)
|
||||
|
||||
fmt.Printf("final %x\n", final_hash)
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
const mantissaSize = 52;
|
||||
const exponentSize = 11;
|
||||
const mantissaMask = ( (uint64(1)) << mantissaSize) - 1;
|
||||
const exponentMask = (uint64(1) << exponentSize) - 1;
|
||||
const exponentBias = 1023;
|
||||
const dynamicExponentBits = 4;
|
||||
const staticExponentBits = 4;
|
||||
const constExponentBits uint64= 0x300;
|
||||
const dynamicMantissaMask = ( uint64(1) << (mantissaSize + dynamicExponentBits)) - 1;
|
||||
*/
|
||||
const mask22bit = (uint64(1) << 22) - 1
|
||||
|
||||
func getSmallPositiveFloatBits(entropy uint64) uint64 {
|
||||
exponent := entropy >> 59 //0..31
|
||||
mantissa := entropy & mantissaMask
|
||||
exponent += exponentBias
|
||||
exponent &= exponentMask
|
||||
exponent = exponent << mantissaSize
|
||||
return exponent | mantissa
|
||||
}
|
||||
|
||||
func getStaticExponent(entropy uint64) uint64 {
|
||||
exponent := constExponentBits
|
||||
exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits
|
||||
exponent <<= mantissaSize
|
||||
return exponent
|
||||
}
|
||||
|
||||
func getFloatMask(entropy uint64) uint64 {
|
||||
return (entropy & mask22bit) | getStaticExponent(entropy)
|
||||
}
|
860
vm_instruction.go
Normal file
860
vm_instruction.go
Normal file
|
@ -0,0 +1,860 @@
|
|||
package randomx
|
||||
|
||||
import "fmt"
|
||||
import "math"
|
||||
import "math/big"
|
||||
import "math/bits"
|
||||
import "encoding/binary"
|
||||
|
||||
//reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#51-instruction-encoding
|
||||
|
||||
var Zero uint64 = 0
|
||||
|
||||
// since go does not have union, use byte array
|
||||
type VM_Instruction []byte // it is hardcode 8 bytes
|
||||
|
||||
func (ins VM_Instruction) IMM() uint32 {
|
||||
return binary.LittleEndian.Uint32(ins[4:])
|
||||
}
|
||||
func (ins VM_Instruction) Mod() byte {
|
||||
return ins[3]
|
||||
}
|
||||
func (ins VM_Instruction) Src() byte {
|
||||
return ins[2]
|
||||
}
|
||||
func (ins VM_Instruction) Dst() byte {
|
||||
return ins[1]
|
||||
}
|
||||
func (ins VM_Instruction) Opcode() byte {
|
||||
return ins[0]
|
||||
}
|
||||
|
||||
type VM_Instruction_Type int
|
||||
|
||||
const (
|
||||
VM_IADD_RS VM_Instruction_Type = 0
|
||||
VM_IADD_M VM_Instruction_Type = 1
|
||||
VM_ISUB_R VM_Instruction_Type = 2
|
||||
VM_ISUB_M VM_Instruction_Type = 3
|
||||
VM_IMUL_R VM_Instruction_Type = 4
|
||||
VM_IMUL_M VM_Instruction_Type = 5
|
||||
VM_IMULH_R VM_Instruction_Type = 6
|
||||
VM_IMULH_M VM_Instruction_Type = 7
|
||||
VM_ISMULH_R VM_Instruction_Type = 8
|
||||
VM_ISMULH_M VM_Instruction_Type = 9
|
||||
VM_IMUL_RCP VM_Instruction_Type = 10
|
||||
VM_INEG_R VM_Instruction_Type = 11
|
||||
VM_IXOR_R VM_Instruction_Type = 12
|
||||
VM_IXOR_M VM_Instruction_Type = 13
|
||||
VM_IROR_R VM_Instruction_Type = 14
|
||||
VM_IROL_R VM_Instruction_Type = 15
|
||||
VM_ISWAP_R VM_Instruction_Type = 16
|
||||
VM_FSWAP_R VM_Instruction_Type = 17
|
||||
VM_FADD_R VM_Instruction_Type = 18
|
||||
VM_FADD_M VM_Instruction_Type = 19
|
||||
VM_FSUB_R VM_Instruction_Type = 20
|
||||
VM_FSUB_M VM_Instruction_Type = 21
|
||||
VM_FSCAL_R VM_Instruction_Type = 22
|
||||
VM_FMUL_R VM_Instruction_Type = 23
|
||||
VM_FDIV_M VM_Instruction_Type = 24
|
||||
VM_FSQRT_R VM_Instruction_Type = 25
|
||||
VM_CBRANCH VM_Instruction_Type = 26
|
||||
VM_CFROUND VM_Instruction_Type = 27
|
||||
VM_ISTORE VM_Instruction_Type = 28
|
||||
VM_NOP VM_Instruction_Type = 29
|
||||
)
|
||||
|
||||
var Names = map[VM_Instruction_Type]string{
|
||||
|
||||
VM_IADD_RS: "VM_IADD_RS",
|
||||
VM_IADD_M: "VM_IADD_M",
|
||||
VM_ISUB_R: "VM_ISUB_R",
|
||||
VM_ISUB_M: "VM_ISUB_M",
|
||||
VM_IMUL_R: "VM_IMUL_R",
|
||||
VM_IMUL_M: "VM_IMUL_M",
|
||||
VM_IMULH_R: "VM_IMULH_R",
|
||||
VM_IMULH_M: "VM_IMULH_M",
|
||||
VM_ISMULH_R: "VM_ISMULH_R",
|
||||
VM_ISMULH_M: "VM_ISMULH_M",
|
||||
VM_IMUL_RCP: "VM_IMUL_RCP",
|
||||
VM_INEG_R: "VM_INEG_R",
|
||||
VM_IXOR_R: "VM_IXOR_R",
|
||||
VM_IXOR_M: "VM_IXOR_M",
|
||||
VM_IROR_R: "VM_IROR_R",
|
||||
VM_IROL_R: "VM_IROL_R",
|
||||
VM_ISWAP_R: "VM_ISWAP_R",
|
||||
VM_FSWAP_R: "VM_FSWAP_R",
|
||||
VM_FADD_R: "VM_FADD_R",
|
||||
VM_FADD_M: "VM_FADD_M",
|
||||
VM_FSUB_R: "VM_FSUB_R",
|
||||
VM_FSUB_M: "VM_FSUB_M",
|
||||
VM_FSCAL_R: "VM_FSCAL_R",
|
||||
VM_FMUL_R: "VM_FMUL_R",
|
||||
VM_FDIV_M: "VM_FDIV_M",
|
||||
VM_FSQRT_R: "VM_FSQRT_R",
|
||||
VM_CBRANCH: "VM_CBRANCH",
|
||||
VM_CFROUND: "VM_CFROUND",
|
||||
VM_ISTORE: "VM_ISTORE",
|
||||
VM_NOP: "VM_NOP",
|
||||
}
|
||||
|
||||
// this will interpret single vm instruction
|
||||
// reference https://github.com/tevador/RandomX/blob/master/doc/specs.md#52-integer-instructions
|
||||
func (vm *VM) Compile_TO_Bytecode() {
|
||||
|
||||
var registerUsage [REGISTERSCOUNT]int
|
||||
for i := range registerUsage {
|
||||
registerUsage[i] = -1
|
||||
}
|
||||
|
||||
for i := 0; i < RANDOMX_PROGRAM_SIZE; i++ {
|
||||
instr := VM_Instruction(vm.Prog[i*8:])
|
||||
ibc := &vm.ByteCode[i]
|
||||
|
||||
opcode := instr.Opcode()
|
||||
dst := instr.Dst() % REGISTERSCOUNT // bit shift optimization
|
||||
src := instr.Src() % REGISTERSCOUNT
|
||||
ibc.dst = dst
|
||||
ibc.src = src
|
||||
switch opcode {
|
||||
case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15: // 16 frequency
|
||||
|
||||
// ibc.Opcode = VM_NOP; break; replace opcode by nop for testing
|
||||
// fmt.Printf("VM_IADD_RS %d\n", opcode)
|
||||
ibc.Opcode = VM_IADD_RS
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
if dst != RegisterNeedsDisplacement {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.shift = uint16((instr.Mod() >> 2) % 4)
|
||||
ibc.imm = 0
|
||||
} else {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.shift = uint16((instr.Mod() >> 2) % 4)
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
||||
case 16, 17, 18, 19, 20, 21, 22: // 7
|
||||
//fmt.Printf("IADD_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IADD_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.isrc = &Zero
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38: // 16
|
||||
//fmt.Printf("ISUB_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISUB_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 39, 40, 41, 42, 43, 44, 45: // 7
|
||||
//fmt.Printf("ISUB_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISUB_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.isrc = &Zero
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61: // 16
|
||||
|
||||
//fmt.Printf("IMUL_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMUL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 62, 63, 64, 65: //4
|
||||
|
||||
//fmt.Printf("IMUL_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMUL_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.isrc = &Zero
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 66, 67, 68, 69: //4
|
||||
|
||||
//fmt.Printf("IMULH_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMULH_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
case 70: //1
|
||||
//fmt.Printf("IMULH_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IMULH_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.isrc = &Zero
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 71, 72, 73, 74: //4
|
||||
//fmt.Printf("ISMULH_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISMULH_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
case 75: //1
|
||||
//fmt.Printf("ISMULH_M opcode %d\n", opcode)
|
||||
|
||||
ibc.Opcode = VM_ISMULH_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.isrc = &Zero
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 76, 77, 78, 79, 80, 81, 82, 83: // 8
|
||||
|
||||
//fmt.Printf("IMUL_RCP opcode %d\n", opcode)
|
||||
divisor := uint64(instr.IMM())
|
||||
if !isZeroOrPowerOf2(divisor) {
|
||||
ibc.Opcode = VM_IMUL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = randomx_reciprocal(divisor)
|
||||
ibc.isrc = &ibc.imm
|
||||
registerUsage[dst] = i
|
||||
} else {
|
||||
ibc.Opcode = VM_NOP
|
||||
}
|
||||
|
||||
case 84, 85: //2
|
||||
//fmt.Printf("INEG_R opcode %d\n", opcode)
|
||||
|
||||
ibc.Opcode = VM_INEG_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
registerUsage[dst] = i
|
||||
case 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100: //15
|
||||
|
||||
//fmt.Printf("IXOR_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IXOR_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 101, 102, 103, 104, 105: //5
|
||||
//fmt.Printf("IXOR_M opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IXOR_M
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
} else {
|
||||
ibc.isrc = &Zero
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 106, 107, 108, 109, 110, 111, 112, 113: //8
|
||||
|
||||
//fmt.Printf("IROR_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IROR_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
case 114, 115: // 2 IROL_R
|
||||
|
||||
//fmt.Printf("IROL_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_IROL_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
|
||||
if src != dst {
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
} else {
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
ibc.isrc = &ibc.imm // we are pointing within bytecode
|
||||
|
||||
}
|
||||
registerUsage[dst] = i
|
||||
|
||||
case 116, 117, 118, 119: //4
|
||||
|
||||
//fmt.Printf("ISWAP_R opcode %d\n", opcode)
|
||||
if src != dst {
|
||||
ibc.Opcode = VM_ISWAP_R
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
registerUsage[dst] = i
|
||||
registerUsage[src] = i
|
||||
} else {
|
||||
ibc.Opcode = VM_NOP
|
||||
|
||||
}
|
||||
|
||||
// below are floating point instructions
|
||||
case 120, 121, 122, 123: // 4
|
||||
|
||||
//fmt.Printf("FSWAP_R opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_FSWAP_R
|
||||
if dst < REGISTERCOUNTFLT {
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
} else {
|
||||
ibc.fdst = &vm.reg.e[dst-REGISTERCOUNTFLT]
|
||||
}
|
||||
case 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139: //16
|
||||
|
||||
//fmt.Printf("FADD_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
src := instr.Src() % REGISTERCOUNTFLT
|
||||
ibc.Opcode = VM_FADD_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
|
||||
case 140, 141, 142, 143, 144: //5
|
||||
|
||||
//fmt.Printf("FADD_M opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FADD_M
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
||||
case 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160: //16
|
||||
|
||||
//fmt.Printf("FSUB_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
src := instr.Src() % REGISTERCOUNTFLT
|
||||
ibc.Opcode = VM_FSUB_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
case 161, 162, 163, 164, 165: //5
|
||||
|
||||
//fmt.Printf("FSUB_M opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FSUB_M
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
|
||||
case 166, 167, 168, 169, 170, 171: //6
|
||||
|
||||
//fmt.Printf("FSCAL_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FSCAL_R
|
||||
ibc.fdst = &vm.reg.f[dst]
|
||||
case 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203: //32
|
||||
|
||||
//fmt.Printf("FMUL_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
src := instr.Src() % REGISTERCOUNTFLT
|
||||
ibc.Opcode = VM_FMUL_R
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
ibc.fsrc = &vm.reg.a[src]
|
||||
case 204, 205, 206, 207: //4
|
||||
|
||||
//fmt.Printf("FDIV_M opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FDIV_M
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
case 208, 209, 210, 211, 212, 213: //6
|
||||
//fmt.Printf("FSQRT_R opcode %d\n", opcode)
|
||||
dst := instr.Dst() % REGISTERCOUNTFLT // bit shift optimization
|
||||
ibc.Opcode = VM_FSQRT_R
|
||||
ibc.fdst = &vm.reg.e[dst]
|
||||
|
||||
case 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238: //25 // CBRANCH and CFROUND are interchanged
|
||||
|
||||
//fmt.Printf("CBRANCH opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_CBRANCH
|
||||
reg := instr.Dst() % REGISTERSCOUNT
|
||||
ibc.isrc = &vm.reg.r[reg]
|
||||
ibc.target = int16(registerUsage[reg])
|
||||
shift := uint64(instr.Mod()>>4) + CONDITIONOFFSET
|
||||
//conditionmask := CONDITIONMASK << shift
|
||||
ibc.imm = signExtend2sCompl(instr.IMM()) | (uint64(1) << shift)
|
||||
if CONDITIONOFFSET > 0 || shift > 0 {
|
||||
ibc.imm &= (^(uint64(1) << (shift - 1)))
|
||||
}
|
||||
ibc.memMask = CONDITIONMASK << shift
|
||||
|
||||
for j := 0; j < REGISTERSCOUNT; j++ {
|
||||
registerUsage[j] = i
|
||||
}
|
||||
|
||||
case 239: //1
|
||||
// ibc.Opcode = VM_NOP; break; // not supported
|
||||
//fmt.Printf("CFROUND opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_CFROUND
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.imm = uint64(instr.IMM() & 63)
|
||||
|
||||
case 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255: //16
|
||||
// ibc.Opcode = VM_NOP; break;
|
||||
//fmt.Printf("ISTORE opcode %d\n", opcode)
|
||||
ibc.Opcode = VM_ISTORE
|
||||
ibc.idst = &vm.reg.r[dst]
|
||||
ibc.isrc = &vm.reg.r[src]
|
||||
ibc.imm = signExtend2sCompl(instr.IMM())
|
||||
if (instr.Mod() >> 4) < STOREL3CONDITION {
|
||||
if (instr.Mod() % 4) != 0 {
|
||||
ibc.memMask = ScratchpadL1Mask
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL2Mask
|
||||
}
|
||||
|
||||
} else {
|
||||
ibc.memMask = ScratchpadL3Mask
|
||||
}
|
||||
|
||||
default:
|
||||
panic("unreachable")
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
type InstructionByteCode struct {
|
||||
dst, src byte
|
||||
idst, isrc *uint64
|
||||
fdst, fsrc *[2]float64
|
||||
imm uint64
|
||||
simm int64
|
||||
Opcode VM_Instruction_Type
|
||||
target int16
|
||||
shift uint16
|
||||
memMask uint32
|
||||
|
||||
RoundingMode big.RoundingMode
|
||||
/*
|
||||
union {
|
||||
int_reg_t* idst;
|
||||
rx_vec_f128* fdst;
|
||||
};
|
||||
union {
|
||||
int_reg_t* isrc;
|
||||
rx_vec_f128* fsrc;
|
||||
};
|
||||
union {
|
||||
uint64_t imm;
|
||||
int64_t simm;
|
||||
};
|
||||
InstructionType type;
|
||||
union {
|
||||
int16_t target;
|
||||
uint16_t shift;
|
||||
};
|
||||
uint32_t memMask;
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
func (ibc *InstructionByteCode) getScratchpadAddress() uint64 {
|
||||
return (*ibc.isrc + ibc.imm) & uint64(ibc.memMask)
|
||||
}
|
||||
|
||||
func (vm *VM) Load64(addr uint64) uint64 {
|
||||
//return uint64(binary.BigEndian.Uint32(vm.ScratchPad[addr:]))| (uint64(binary.BigEndian.Uint32(vm.ScratchPad[addr+4:])) <<32)
|
||||
return bits.RotateLeft64(binary.BigEndian.Uint64(vm.ScratchPad[addr:]), 32)
|
||||
}
|
||||
func (vm *VM) Load32(addr uint64) uint32 {
|
||||
return binary.BigEndian.Uint32(vm.ScratchPad[addr:])
|
||||
}
|
||||
|
||||
func unsigned32ToSigned2sCompl(x uint32) int32 {
|
||||
if -1 == (^0) {
|
||||
return int32(x)
|
||||
} else {
|
||||
if x > math.MaxInt32 {
|
||||
return (-(int32(math.MaxUint32-x) - 1))
|
||||
} else {
|
||||
return int32(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func unsigned64ToSigned2sCompl(x uint64) int64 {
|
||||
if -1 == (^0) {
|
||||
return int64(x)
|
||||
} else {
|
||||
if x > math.MaxInt64 {
|
||||
return (-(int64(math.MaxUint64-x) - 1))
|
||||
} else {
|
||||
return int64(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (vm *VM) InterpretByteCode() {
|
||||
|
||||
for pc := 0; pc < RANDOMX_PROGRAM_SIZE; pc++ {
|
||||
|
||||
ibc := &vm.ByteCode[pc]
|
||||
//fmt.Printf("PCLOOP %d opcode %d %s dst %d src %d\n",pc,ibc.Opcode, Names[ibc.Opcode], ibc.dst, ibc.src)
|
||||
|
||||
switch ibc.Opcode {
|
||||
case VM_IADD_RS:
|
||||
|
||||
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm
|
||||
|
||||
//panic("VM_IADD_RS")
|
||||
case VM_IADD_M:
|
||||
*ibc.idst += vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_IADD_M")
|
||||
case VM_ISUB_R:
|
||||
*ibc.idst -= *ibc.isrc
|
||||
|
||||
//panic("VM_ISUB_R")
|
||||
|
||||
case VM_ISUB_M:
|
||||
|
||||
*ibc.idst -= vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_ISUB_M")
|
||||
case VM_IMUL_R: // also handles imul_rcp
|
||||
|
||||
*ibc.idst *= *ibc.isrc
|
||||
|
||||
//panic("VM_IMUL_R")
|
||||
case VM_IMUL_M:
|
||||
*ibc.idst *= vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_IMUL_M")
|
||||
case VM_IMULH_R:
|
||||
|
||||
*ibc.idst, _ = bits.Mul64(*ibc.idst, *ibc.isrc)
|
||||
|
||||
// panic("VM_IMULH_R")
|
||||
case VM_IMULH_M:
|
||||
*ibc.idst, _ = bits.Mul64(*ibc.idst, vm.Load64(ibc.getScratchpadAddress()))
|
||||
// fmt.Printf("%x \n",*ibc.idst )
|
||||
// panic("VM_IMULH_M")
|
||||
case VM_ISMULH_R:
|
||||
*ibc.idst = uint64(smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc)))
|
||||
// fmt.Printf("dst %x\n", *ibc.idst)
|
||||
// panic("VM_ISMULH_R")
|
||||
case VM_ISMULH_M:
|
||||
*ibc.idst = uint64(smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(vm.Load64(ibc.getScratchpadAddress()))))
|
||||
//fmt.Printf("%x \n",*ibc.idst )
|
||||
// panic("VM_ISMULH_M")
|
||||
case VM_INEG_R:
|
||||
*ibc.idst = (^(*ibc.idst)) + 1 // 2's complement negative
|
||||
|
||||
//panic("VM_INEG_R")
|
||||
case VM_IXOR_R:
|
||||
*ibc.idst ^= *ibc.isrc
|
||||
|
||||
case VM_IXOR_M:
|
||||
*ibc.idst ^= vm.Load64(ibc.getScratchpadAddress())
|
||||
|
||||
//panic("VM_IXOR_M")
|
||||
case VM_IROR_R:
|
||||
*ibc.idst = bits.RotateLeft64(*ibc.idst, 0-int(*ibc.isrc&63))
|
||||
|
||||
//panic("VM_IROR_R")
|
||||
|
||||
case VM_IROL_R:
|
||||
*ibc.idst = bits.RotateLeft64(*ibc.idst, int(*ibc.isrc&63))
|
||||
|
||||
case VM_ISWAP_R:
|
||||
*ibc.idst, *ibc.isrc = *ibc.isrc, *ibc.idst
|
||||
//fmt.Printf("%x %x\n",*ibc.idst, *ibc.isrc )
|
||||
//panic("VM_ISWAP_R")
|
||||
case VM_FSWAP_R:
|
||||
|
||||
ibc.fdst[HIGH], ibc.fdst[LOW] = ibc.fdst[LOW], ibc.fdst[HIGH]
|
||||
// fmt.Printf("%+v \n",ibc.fdst )
|
||||
// panic("VM_FSWAP_R")
|
||||
case VM_FADD_R:
|
||||
//ibc.fdst[LOW] += ibc.fsrc[LOW]
|
||||
//ibc.fdst[HIGH] += ibc.fsrc[HIGH]
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[LOW])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(ibc.fsrc[LOW])
|
||||
vm.fresult.Add(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[LOW], _ = vm.fresult.Float64()
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[HIGH])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(ibc.fsrc[HIGH])
|
||||
vm.fresult.Add(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[HIGH], _ = vm.fresult.Float64()
|
||||
|
||||
//panic("VM_FADD_R")
|
||||
case VM_FADD_M:
|
||||
//ibc.fdst[LOW] += float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+0)))
|
||||
//ibc.fdst[HIGH] += float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+4)))
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[LOW])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 0))))
|
||||
vm.fresult.Add(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[LOW], _ = vm.fresult.Float64()
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[HIGH])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 4))))
|
||||
vm.fresult.Add(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[HIGH], _ = vm.fresult.Float64()
|
||||
|
||||
//panic("VM_FADD_M")
|
||||
case VM_FSUB_R:
|
||||
//fmt.Printf("Rounding mode %d\n", vm.RoundingMode)
|
||||
//ibc.fdst[LOW] -= ibc.fsrc[LOW]
|
||||
//ibc.fdst[HIGH] -= ibc.fsrc[HIGH]
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[LOW])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(ibc.fsrc[LOW])
|
||||
vm.fresult.Sub(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[LOW], _ = vm.fresult.Float64()
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[HIGH])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(ibc.fsrc[HIGH])
|
||||
vm.fresult.Sub(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[HIGH], _ = vm.fresult.Float64()
|
||||
|
||||
//fmt.Printf("fdst float %+v\n", ibc.fdst )
|
||||
//panic("VM_FSUB_R")
|
||||
case VM_FSUB_M:
|
||||
//ibc.fdst[LOW] -= float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+0)))
|
||||
//ibc.fdst[HIGH] -= float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress()+4)))
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[LOW])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 0))))
|
||||
vm.fresult.Sub(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[LOW], _ = vm.fresult.Float64()
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[HIGH])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 4))))
|
||||
vm.fresult.Sub(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[HIGH], _ = vm.fresult.Float64()
|
||||
|
||||
//panic("VM_FSUB_M")
|
||||
case VM_FSCAL_R: // no dependent on rounding modes
|
||||
//mask := math.Float64frombits(0x80F0000000000000)
|
||||
ibc.fdst[LOW] = math.Float64frombits(math.Float64bits(ibc.fdst[LOW]) ^ 0x80F0000000000000)
|
||||
ibc.fdst[HIGH] = math.Float64frombits(math.Float64bits(ibc.fdst[HIGH]) ^ 0x80F0000000000000)
|
||||
|
||||
//fmt.Printf("fdst float %+v\n", ibc.fdst )
|
||||
//panic("VM_FSCA_M")
|
||||
case VM_FMUL_R:
|
||||
|
||||
// ibc.fdst[LOW] *= ibc.fsrc[LOW]
|
||||
// ibc.fdst[HIGH] *= ibc.fsrc[HIGH]
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[LOW])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(ibc.fsrc[LOW])
|
||||
vm.fresult.Mul(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[LOW], _ = vm.fresult.Float64()
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[HIGH])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(ibc.fsrc[HIGH])
|
||||
vm.fresult.Mul(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[HIGH], _ = vm.fresult.Float64()
|
||||
|
||||
//panic("VM_FMUK_M")
|
||||
case VM_FDIV_M:
|
||||
lo := float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 0)))
|
||||
high := float64(unsigned32ToSigned2sCompl(vm.Load32(ibc.getScratchpadAddress() + 4)))
|
||||
|
||||
lo = math.Float64frombits((math.Float64bits(lo) & dynamicMantissaMask) | vm.config.eMask[LOW])
|
||||
high = math.Float64frombits((math.Float64bits(high) & dynamicMantissaMask) | vm.config.eMask[HIGH])
|
||||
|
||||
//ibc.fdst[LOW] /= lo
|
||||
//ibc.fdst[HIGH] /= high
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[LOW])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(lo)
|
||||
vm.fresult.Quo(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[LOW], _ = vm.fresult.Float64()
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[HIGH])
|
||||
vm.fsrc.SetPrec(0)
|
||||
vm.fsrc.SetFloat64(high)
|
||||
vm.fresult.Quo(vm.fdst, vm.fsrc)
|
||||
ibc.fdst[HIGH], _ = vm.fresult.Float64()
|
||||
|
||||
//panic("VM_FDIV_M")
|
||||
case VM_FSQRT_R:
|
||||
// ibc.fdst[LOW] = math.Sqrt(ibc.fdst[LOW])
|
||||
// ibc.fdst[HIGH] = math.Sqrt(ibc.fdst[HIGH])
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[LOW])
|
||||
vm.fdst.SetMode(vm.RoundingMode)
|
||||
vm.fresult.Sqrt(vm.fdst)
|
||||
ibc.fdst[LOW], _ = vm.fresult.Float64()
|
||||
|
||||
vm.fresult.SetMode(vm.RoundingMode)
|
||||
vm.fdst.SetPrec(0)
|
||||
vm.fdst.SetFloat64(ibc.fdst[HIGH])
|
||||
vm.fdst.SetMode(vm.RoundingMode)
|
||||
vm.fresult.Sqrt(vm.fdst)
|
||||
ibc.fdst[HIGH], _ = vm.fresult.Float64()
|
||||
|
||||
// panic("VM_FSQRT")
|
||||
case VM_CBRANCH:
|
||||
//fmt.Printf("pc %d src %x imm %x\n",pc ,*ibc.isrc, ibc.imm)
|
||||
*ibc.isrc += ibc.imm
|
||||
//fmt.Printf("pc %d\n",pc)
|
||||
if (*ibc.isrc & uint64(ibc.memMask)) == 0 {
|
||||
pc = int(ibc.target)
|
||||
|
||||
}
|
||||
|
||||
// fmt.Printf("pc %d\n",pc)
|
||||
//panic("VM_CBRANCH")
|
||||
case VM_CFROUND:
|
||||
|
||||
tmp := (bits.RotateLeft64(*ibc.isrc, 0-int(ibc.imm))) % 4 // rotate right
|
||||
switch tmp {
|
||||
case 0:
|
||||
vm.RoundingMode = big.ToNearestEven // RoundToNearest
|
||||
case 1:
|
||||
vm.RoundingMode = big.ToNegativeInf // RoundDown
|
||||
case 2:
|
||||
vm.RoundingMode = big.ToPositiveInf // RoundUp
|
||||
case 3:
|
||||
vm.RoundingMode = big.ToZero // RoundToZero
|
||||
|
||||
}
|
||||
|
||||
//panic("round not implemented")
|
||||
//panic("VM_CFROUND")
|
||||
case VM_ISTORE:
|
||||
binary.BigEndian.PutUint64(vm.ScratchPad[(*ibc.idst+ibc.imm)&uint64(ibc.memMask):], bits.RotateLeft64(*ibc.isrc, 32))
|
||||
|
||||
//panic("VM_ISTOREM")
|
||||
|
||||
case VM_NOP: // we do nothing
|
||||
|
||||
default:
|
||||
panic("instruction not implemented")
|
||||
|
||||
}
|
||||
/*fmt.Printf("REGS ")
|
||||
for j := 0; j <7;j++ {
|
||||
fmt.Printf("%16x, " , vm.reg.r[j])
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
*/
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
var umm888_ = fmt.Sprintf("")
|
Loading…
Reference in a new issue