diff --git a/extra_test.go b/extra_test.go index 7dcb188..e968a28 100644 --- a/extra_test.go +++ b/extra_test.go @@ -74,7 +74,9 @@ func TestMultByCofactor(t *testing.T) { checkOnCurve(t, p8) // 8 * p == (8 * s) * B - s.Multiply(s, &Scalar{[32]byte{8}}) + reprEight := [32]byte{8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + scEight, _ := (&Scalar{}).SetCanonicalBytes(reprEight[:]) + s.Multiply(s, scEight) pp := (&Point{}).ScalarBaseMult(s) if p8.Equal(pp) != 1 { return false @@ -104,13 +106,23 @@ func TestScalarInvert(t *testing.T) { xInv.Invert((*Scalar)(&x)) var check Scalar check.Multiply((*Scalar)(&x), &xInv) - return check == scOne && isReduced(&xInv) + + return check.Equal(&scOne) == 1 && isReduced(xInv.Bytes()) } if err := quick.Check(invertWorks, quickCheckConfig32); err != nil { t.Error(err) } + randomScalar := *dalekScalar + randomInverse := NewScalar().Invert(&randomScalar) + var check Scalar + check.Multiply(&randomScalar, randomInverse) + + if check.Equal(&scOne) == 0 || !isReduced(randomInverse.Bytes()) { + t.Error("inversion did not work") + } + zero := NewScalar() if xx := NewScalar().Invert(zero); xx.Equal(zero) != 1 { t.Errorf("inverting zero did not return zero") @@ -162,7 +174,7 @@ func BenchmarkMultiScalarMultSize8(t *testing.B) { x := dalekScalar for i := 0; i < t.N; i++ { - p.MultiScalarMult([]*Scalar{&x, &x, &x, &x, &x, &x, &x, &x}, + p.MultiScalarMult([]*Scalar{x, x, x, x, x, x, x, x}, []*Point{B, B, B, B, B, B, B, B}) } } diff --git a/fiat_scalar.go b/fiat_scalar.go new file mode 100644 index 0000000..4146d5e --- /dev/null +++ b/fiat_scalar.go @@ -0,0 +1,1777 @@ +/* Autogenerated: ./src/ExtractionOCaml/word_by_word_montgomery --lang Go sc255 64 '2^252 + 27742317777372353535851937790883648493' */ +/* curve description: sc255 */ +/* machine_wordsize = 64 (from "64") */ +/* requested operations: (all) */ +/* m = 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed (from "2^252 + 27742317777372353535851937790883648493") */ +/* */ +/* NOTE: In addition to the bounds specified above each function, all */ +/* functions synthesized for this Montgomery arithmetic require the */ +/* input to be strictly less than the prime modulus (m), and also */ +/* require the input to be in the unique saturated representation. */ +/* All functions also ensure that these two properties are true of */ +/* return values. */ +/* */ +/* Computed values: */ +/* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) */ +/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */ + +package edwards25519 + +import "math/bits" + +type fiat_sc255_uint1 uint8 +type fiat_sc255_int1 int8 + +/* The function fiat_sc255_addcarryx_u64 is a thin wrapper around bits.Add64 that uses fiat_sc255_uint1 rather than uint64 */ +func fiat_sc255_addcarryx_u64(x uint64, y uint64, carry fiat_sc255_uint1) (uint64, fiat_sc255_uint1) { + var sum uint64 + var carryOut uint64 + sum, carryOut = bits.Add64(x, y, uint64(carry)) + return sum, fiat_sc255_uint1(carryOut) +} + +/* The function fiat_sc255_subborrowx_u64 is a thin wrapper around bits.Sub64 that uses fiat_sc255_uint1 rather than uint64 */ +func fiat_sc255_subborrowx_u64(x uint64, y uint64, carry fiat_sc255_uint1) (uint64, fiat_sc255_uint1) { + var sum uint64 + var carryOut uint64 + sum, carryOut = bits.Sub64(x, y, uint64(carry)) + return sum, fiat_sc255_uint1(carryOut) +} + +/* + * The function fiat_sc255_cmovznz_u64 is a single-word conditional move. + * Postconditions: + * out1 = (if arg1 = 0 then arg2 else arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [0x0 ~> 0xffffffffffffffff] + * arg3: [0x0 ~> 0xffffffffffffffff] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + */ +/*inline*/ +func fiat_sc255_cmovznz_u64(out1 *uint64, arg1 fiat_sc255_uint1, arg2 uint64, arg3 uint64) { + // var x1 fiat_sc255_uint1 = (! /* TODO: FIX ME */ (! /* TODO: FIX ME */ arg1)) + // var x2 uint64 = (uint64((fiat_sc255_int1(0x0) - fiat_sc255_int1(x1))) & 0xffffffffffffffff) + // var x3 uint64 = ((x2 & arg3) | ((^x2) & arg2)) + // call with --cmovznz-by-mul for Go + var x1 uint64 = (uint64(arg1) * 0xffffffffffffffff) + var x2 uint64 = ((x1 & arg3) | ((^x1) & arg2)) + *out1 = x2 +} + +/* + * The function fiat_sc255_mul multiplies two field elements in the Montgomery domain. + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_mul(out1 *[4]uint64, arg1 *[4]uint64, arg2 *[4]uint64) { + var x1 uint64 = (arg1[1]) + var x2 uint64 = (arg1[2]) + var x3 uint64 = (arg1[3]) + var x4 uint64 = (arg1[0]) + var x5 uint64 + var x6 uint64 + x6, x5 = bits.Mul64(x4, (arg2[3])) + var x7 uint64 + var x8 uint64 + x8, x7 = bits.Mul64(x4, (arg2[2])) + var x9 uint64 + var x10 uint64 + x10, x9 = bits.Mul64(x4, (arg2[1])) + var x11 uint64 + var x12 uint64 + x12, x11 = bits.Mul64(x4, (arg2[0])) + var x13 uint64 + var x14 fiat_sc255_uint1 + x13, x14 = fiat_sc255_addcarryx_u64(x12, x9, 0x0) + var x15 uint64 + var x16 fiat_sc255_uint1 + x15, x16 = fiat_sc255_addcarryx_u64(x10, x7, x14) + var x17 uint64 + var x18 fiat_sc255_uint1 + x17, x18 = fiat_sc255_addcarryx_u64(x8, x5, x16) + var x19 uint64 = (uint64(x18) + x6) + var x20 uint64 + _, x20 = bits.Mul64(x11, 0xd2b51da312547e1b) + var x22 uint64 + var x23 uint64 + x23, x22 = bits.Mul64(x20, 0x1000000000000000) + var x24 uint64 + var x25 uint64 + x25, x24 = bits.Mul64(x20, 0x14def9dea2f79cd6) + var x26 uint64 + var x27 uint64 + x27, x26 = bits.Mul64(x20, 0x5812631a5cf5d3ed) + var x28 uint64 + var x29 fiat_sc255_uint1 + x28, x29 = fiat_sc255_addcarryx_u64(x27, x24, 0x0) + var x30 uint64 = (uint64(x29) + x25) + var x32 fiat_sc255_uint1 + _, x32 = fiat_sc255_addcarryx_u64(x11, x26, 0x0) + var x33 uint64 + var x34 fiat_sc255_uint1 + x33, x34 = fiat_sc255_addcarryx_u64(x13, x28, x32) + var x35 uint64 + var x36 fiat_sc255_uint1 + x35, x36 = fiat_sc255_addcarryx_u64(x15, x30, x34) + var x37 uint64 + var x38 fiat_sc255_uint1 + x37, x38 = fiat_sc255_addcarryx_u64(x17, x22, x36) + var x39 uint64 + var x40 fiat_sc255_uint1 + x39, x40 = fiat_sc255_addcarryx_u64(x19, x23, x38) + var x41 uint64 + var x42 uint64 + x42, x41 = bits.Mul64(x1, (arg2[3])) + var x43 uint64 + var x44 uint64 + x44, x43 = bits.Mul64(x1, (arg2[2])) + var x45 uint64 + var x46 uint64 + x46, x45 = bits.Mul64(x1, (arg2[1])) + var x47 uint64 + var x48 uint64 + x48, x47 = bits.Mul64(x1, (arg2[0])) + var x49 uint64 + var x50 fiat_sc255_uint1 + x49, x50 = fiat_sc255_addcarryx_u64(x48, x45, 0x0) + var x51 uint64 + var x52 fiat_sc255_uint1 + x51, x52 = fiat_sc255_addcarryx_u64(x46, x43, x50) + var x53 uint64 + var x54 fiat_sc255_uint1 + x53, x54 = fiat_sc255_addcarryx_u64(x44, x41, x52) + var x55 uint64 = (uint64(x54) + x42) + var x56 uint64 + var x57 fiat_sc255_uint1 + x56, x57 = fiat_sc255_addcarryx_u64(x33, x47, 0x0) + var x58 uint64 + var x59 fiat_sc255_uint1 + x58, x59 = fiat_sc255_addcarryx_u64(x35, x49, x57) + var x60 uint64 + var x61 fiat_sc255_uint1 + x60, x61 = fiat_sc255_addcarryx_u64(x37, x51, x59) + var x62 uint64 + var x63 fiat_sc255_uint1 + x62, x63 = fiat_sc255_addcarryx_u64(x39, x53, x61) + var x64 uint64 + var x65 fiat_sc255_uint1 + x64, x65 = fiat_sc255_addcarryx_u64(uint64(x40), x55, x63) + var x66 uint64 + _, x66 = bits.Mul64(x56, 0xd2b51da312547e1b) + var x68 uint64 + var x69 uint64 + x69, x68 = bits.Mul64(x66, 0x1000000000000000) + var x70 uint64 + var x71 uint64 + x71, x70 = bits.Mul64(x66, 0x14def9dea2f79cd6) + var x72 uint64 + var x73 uint64 + x73, x72 = bits.Mul64(x66, 0x5812631a5cf5d3ed) + var x74 uint64 + var x75 fiat_sc255_uint1 + x74, x75 = fiat_sc255_addcarryx_u64(x73, x70, 0x0) + var x76 uint64 = (uint64(x75) + x71) + var x78 fiat_sc255_uint1 + _, x78 = fiat_sc255_addcarryx_u64(x56, x72, 0x0) + var x79 uint64 + var x80 fiat_sc255_uint1 + x79, x80 = fiat_sc255_addcarryx_u64(x58, x74, x78) + var x81 uint64 + var x82 fiat_sc255_uint1 + x81, x82 = fiat_sc255_addcarryx_u64(x60, x76, x80) + var x83 uint64 + var x84 fiat_sc255_uint1 + x83, x84 = fiat_sc255_addcarryx_u64(x62, x68, x82) + var x85 uint64 + var x86 fiat_sc255_uint1 + x85, x86 = fiat_sc255_addcarryx_u64(x64, x69, x84) + var x87 uint64 = (uint64(x86) + uint64(x65)) + var x88 uint64 + var x89 uint64 + x89, x88 = bits.Mul64(x2, (arg2[3])) + var x90 uint64 + var x91 uint64 + x91, x90 = bits.Mul64(x2, (arg2[2])) + var x92 uint64 + var x93 uint64 + x93, x92 = bits.Mul64(x2, (arg2[1])) + var x94 uint64 + var x95 uint64 + x95, x94 = bits.Mul64(x2, (arg2[0])) + var x96 uint64 + var x97 fiat_sc255_uint1 + x96, x97 = fiat_sc255_addcarryx_u64(x95, x92, 0x0) + var x98 uint64 + var x99 fiat_sc255_uint1 + x98, x99 = fiat_sc255_addcarryx_u64(x93, x90, x97) + var x100 uint64 + var x101 fiat_sc255_uint1 + x100, x101 = fiat_sc255_addcarryx_u64(x91, x88, x99) + var x102 uint64 = (uint64(x101) + x89) + var x103 uint64 + var x104 fiat_sc255_uint1 + x103, x104 = fiat_sc255_addcarryx_u64(x79, x94, 0x0) + var x105 uint64 + var x106 fiat_sc255_uint1 + x105, x106 = fiat_sc255_addcarryx_u64(x81, x96, x104) + var x107 uint64 + var x108 fiat_sc255_uint1 + x107, x108 = fiat_sc255_addcarryx_u64(x83, x98, x106) + var x109 uint64 + var x110 fiat_sc255_uint1 + x109, x110 = fiat_sc255_addcarryx_u64(x85, x100, x108) + var x111 uint64 + var x112 fiat_sc255_uint1 + x111, x112 = fiat_sc255_addcarryx_u64(x87, x102, x110) + var x113 uint64 + _, x113 = bits.Mul64(x103, 0xd2b51da312547e1b) + var x115 uint64 + var x116 uint64 + x116, x115 = bits.Mul64(x113, 0x1000000000000000) + var x117 uint64 + var x118 uint64 + x118, x117 = bits.Mul64(x113, 0x14def9dea2f79cd6) + var x119 uint64 + var x120 uint64 + x120, x119 = bits.Mul64(x113, 0x5812631a5cf5d3ed) + var x121 uint64 + var x122 fiat_sc255_uint1 + x121, x122 = fiat_sc255_addcarryx_u64(x120, x117, 0x0) + var x123 uint64 = (uint64(x122) + x118) + var x125 fiat_sc255_uint1 + _, x125 = fiat_sc255_addcarryx_u64(x103, x119, 0x0) + var x126 uint64 + var x127 fiat_sc255_uint1 + x126, x127 = fiat_sc255_addcarryx_u64(x105, x121, x125) + var x128 uint64 + var x129 fiat_sc255_uint1 + x128, x129 = fiat_sc255_addcarryx_u64(x107, x123, x127) + var x130 uint64 + var x131 fiat_sc255_uint1 + x130, x131 = fiat_sc255_addcarryx_u64(x109, x115, x129) + var x132 uint64 + var x133 fiat_sc255_uint1 + x132, x133 = fiat_sc255_addcarryx_u64(x111, x116, x131) + var x134 uint64 = (uint64(x133) + uint64(x112)) + var x135 uint64 + var x136 uint64 + x136, x135 = bits.Mul64(x3, (arg2[3])) + var x137 uint64 + var x138 uint64 + x138, x137 = bits.Mul64(x3, (arg2[2])) + var x139 uint64 + var x140 uint64 + x140, x139 = bits.Mul64(x3, (arg2[1])) + var x141 uint64 + var x142 uint64 + x142, x141 = bits.Mul64(x3, (arg2[0])) + var x143 uint64 + var x144 fiat_sc255_uint1 + x143, x144 = fiat_sc255_addcarryx_u64(x142, x139, 0x0) + var x145 uint64 + var x146 fiat_sc255_uint1 + x145, x146 = fiat_sc255_addcarryx_u64(x140, x137, x144) + var x147 uint64 + var x148 fiat_sc255_uint1 + x147, x148 = fiat_sc255_addcarryx_u64(x138, x135, x146) + var x149 uint64 = (uint64(x148) + x136) + var x150 uint64 + var x151 fiat_sc255_uint1 + x150, x151 = fiat_sc255_addcarryx_u64(x126, x141, 0x0) + var x152 uint64 + var x153 fiat_sc255_uint1 + x152, x153 = fiat_sc255_addcarryx_u64(x128, x143, x151) + var x154 uint64 + var x155 fiat_sc255_uint1 + x154, x155 = fiat_sc255_addcarryx_u64(x130, x145, x153) + var x156 uint64 + var x157 fiat_sc255_uint1 + x156, x157 = fiat_sc255_addcarryx_u64(x132, x147, x155) + var x158 uint64 + var x159 fiat_sc255_uint1 + x158, x159 = fiat_sc255_addcarryx_u64(x134, x149, x157) + var x160 uint64 + _, x160 = bits.Mul64(x150, 0xd2b51da312547e1b) + var x162 uint64 + var x163 uint64 + x163, x162 = bits.Mul64(x160, 0x1000000000000000) + var x164 uint64 + var x165 uint64 + x165, x164 = bits.Mul64(x160, 0x14def9dea2f79cd6) + var x166 uint64 + var x167 uint64 + x167, x166 = bits.Mul64(x160, 0x5812631a5cf5d3ed) + var x168 uint64 + var x169 fiat_sc255_uint1 + x168, x169 = fiat_sc255_addcarryx_u64(x167, x164, 0x0) + var x170 uint64 = (uint64(x169) + x165) + var x172 fiat_sc255_uint1 + _, x172 = fiat_sc255_addcarryx_u64(x150, x166, 0x0) + var x173 uint64 + var x174 fiat_sc255_uint1 + x173, x174 = fiat_sc255_addcarryx_u64(x152, x168, x172) + var x175 uint64 + var x176 fiat_sc255_uint1 + x175, x176 = fiat_sc255_addcarryx_u64(x154, x170, x174) + var x177 uint64 + var x178 fiat_sc255_uint1 + x177, x178 = fiat_sc255_addcarryx_u64(x156, x162, x176) + var x179 uint64 + var x180 fiat_sc255_uint1 + x179, x180 = fiat_sc255_addcarryx_u64(x158, x163, x178) + var x181 uint64 = (uint64(x180) + uint64(x159)) + var x182 uint64 + var x183 fiat_sc255_uint1 + x182, x183 = fiat_sc255_subborrowx_u64(x173, 0x5812631a5cf5d3ed, 0x0) + var x184 uint64 + var x185 fiat_sc255_uint1 + x184, x185 = fiat_sc255_subborrowx_u64(x175, 0x14def9dea2f79cd6, x183) + var x186 uint64 + var x187 fiat_sc255_uint1 + x186, x187 = fiat_sc255_subborrowx_u64(x177, uint64(0x0), x185) + var x188 uint64 + var x189 fiat_sc255_uint1 + x188, x189 = fiat_sc255_subborrowx_u64(x179, 0x1000000000000000, x187) + var x191 fiat_sc255_uint1 + _, x191 = fiat_sc255_subborrowx_u64(x181, uint64(0x0), x189) + var x192 uint64 + fiat_sc255_cmovznz_u64(&x192, x191, x182, x173) + var x193 uint64 + fiat_sc255_cmovznz_u64(&x193, x191, x184, x175) + var x194 uint64 + fiat_sc255_cmovznz_u64(&x194, x191, x186, x177) + var x195 uint64 + fiat_sc255_cmovznz_u64(&x195, x191, x188, x179) + out1[0] = x192 + out1[1] = x193 + out1[2] = x194 + out1[3] = x195 +} + +/* + * The function fiat_sc255_square squares a field element in the Montgomery domain. + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_square(out1 *[4]uint64, arg1 *[4]uint64) { + var x1 uint64 = (arg1[1]) + var x2 uint64 = (arg1[2]) + var x3 uint64 = (arg1[3]) + var x4 uint64 = (arg1[0]) + var x5 uint64 + var x6 uint64 + x6, x5 = bits.Mul64(x4, (arg1[3])) + var x7 uint64 + var x8 uint64 + x8, x7 = bits.Mul64(x4, (arg1[2])) + var x9 uint64 + var x10 uint64 + x10, x9 = bits.Mul64(x4, (arg1[1])) + var x11 uint64 + var x12 uint64 + x12, x11 = bits.Mul64(x4, (arg1[0])) + var x13 uint64 + var x14 fiat_sc255_uint1 + x13, x14 = fiat_sc255_addcarryx_u64(x12, x9, 0x0) + var x15 uint64 + var x16 fiat_sc255_uint1 + x15, x16 = fiat_sc255_addcarryx_u64(x10, x7, x14) + var x17 uint64 + var x18 fiat_sc255_uint1 + x17, x18 = fiat_sc255_addcarryx_u64(x8, x5, x16) + var x19 uint64 = (uint64(x18) + x6) + var x20 uint64 + _, x20 = bits.Mul64(x11, 0xd2b51da312547e1b) + var x22 uint64 + var x23 uint64 + x23, x22 = bits.Mul64(x20, 0x1000000000000000) + var x24 uint64 + var x25 uint64 + x25, x24 = bits.Mul64(x20, 0x14def9dea2f79cd6) + var x26 uint64 + var x27 uint64 + x27, x26 = bits.Mul64(x20, 0x5812631a5cf5d3ed) + var x28 uint64 + var x29 fiat_sc255_uint1 + x28, x29 = fiat_sc255_addcarryx_u64(x27, x24, 0x0) + var x30 uint64 = (uint64(x29) + x25) + var x32 fiat_sc255_uint1 + _, x32 = fiat_sc255_addcarryx_u64(x11, x26, 0x0) + var x33 uint64 + var x34 fiat_sc255_uint1 + x33, x34 = fiat_sc255_addcarryx_u64(x13, x28, x32) + var x35 uint64 + var x36 fiat_sc255_uint1 + x35, x36 = fiat_sc255_addcarryx_u64(x15, x30, x34) + var x37 uint64 + var x38 fiat_sc255_uint1 + x37, x38 = fiat_sc255_addcarryx_u64(x17, x22, x36) + var x39 uint64 + var x40 fiat_sc255_uint1 + x39, x40 = fiat_sc255_addcarryx_u64(x19, x23, x38) + var x41 uint64 + var x42 uint64 + x42, x41 = bits.Mul64(x1, (arg1[3])) + var x43 uint64 + var x44 uint64 + x44, x43 = bits.Mul64(x1, (arg1[2])) + var x45 uint64 + var x46 uint64 + x46, x45 = bits.Mul64(x1, (arg1[1])) + var x47 uint64 + var x48 uint64 + x48, x47 = bits.Mul64(x1, (arg1[0])) + var x49 uint64 + var x50 fiat_sc255_uint1 + x49, x50 = fiat_sc255_addcarryx_u64(x48, x45, 0x0) + var x51 uint64 + var x52 fiat_sc255_uint1 + x51, x52 = fiat_sc255_addcarryx_u64(x46, x43, x50) + var x53 uint64 + var x54 fiat_sc255_uint1 + x53, x54 = fiat_sc255_addcarryx_u64(x44, x41, x52) + var x55 uint64 = (uint64(x54) + x42) + var x56 uint64 + var x57 fiat_sc255_uint1 + x56, x57 = fiat_sc255_addcarryx_u64(x33, x47, 0x0) + var x58 uint64 + var x59 fiat_sc255_uint1 + x58, x59 = fiat_sc255_addcarryx_u64(x35, x49, x57) + var x60 uint64 + var x61 fiat_sc255_uint1 + x60, x61 = fiat_sc255_addcarryx_u64(x37, x51, x59) + var x62 uint64 + var x63 fiat_sc255_uint1 + x62, x63 = fiat_sc255_addcarryx_u64(x39, x53, x61) + var x64 uint64 + var x65 fiat_sc255_uint1 + x64, x65 = fiat_sc255_addcarryx_u64(uint64(x40), x55, x63) + var x66 uint64 + _, x66 = bits.Mul64(x56, 0xd2b51da312547e1b) + var x68 uint64 + var x69 uint64 + x69, x68 = bits.Mul64(x66, 0x1000000000000000) + var x70 uint64 + var x71 uint64 + x71, x70 = bits.Mul64(x66, 0x14def9dea2f79cd6) + var x72 uint64 + var x73 uint64 + x73, x72 = bits.Mul64(x66, 0x5812631a5cf5d3ed) + var x74 uint64 + var x75 fiat_sc255_uint1 + x74, x75 = fiat_sc255_addcarryx_u64(x73, x70, 0x0) + var x76 uint64 = (uint64(x75) + x71) + var x78 fiat_sc255_uint1 + _, x78 = fiat_sc255_addcarryx_u64(x56, x72, 0x0) + var x79 uint64 + var x80 fiat_sc255_uint1 + x79, x80 = fiat_sc255_addcarryx_u64(x58, x74, x78) + var x81 uint64 + var x82 fiat_sc255_uint1 + x81, x82 = fiat_sc255_addcarryx_u64(x60, x76, x80) + var x83 uint64 + var x84 fiat_sc255_uint1 + x83, x84 = fiat_sc255_addcarryx_u64(x62, x68, x82) + var x85 uint64 + var x86 fiat_sc255_uint1 + x85, x86 = fiat_sc255_addcarryx_u64(x64, x69, x84) + var x87 uint64 = (uint64(x86) + uint64(x65)) + var x88 uint64 + var x89 uint64 + x89, x88 = bits.Mul64(x2, (arg1[3])) + var x90 uint64 + var x91 uint64 + x91, x90 = bits.Mul64(x2, (arg1[2])) + var x92 uint64 + var x93 uint64 + x93, x92 = bits.Mul64(x2, (arg1[1])) + var x94 uint64 + var x95 uint64 + x95, x94 = bits.Mul64(x2, (arg1[0])) + var x96 uint64 + var x97 fiat_sc255_uint1 + x96, x97 = fiat_sc255_addcarryx_u64(x95, x92, 0x0) + var x98 uint64 + var x99 fiat_sc255_uint1 + x98, x99 = fiat_sc255_addcarryx_u64(x93, x90, x97) + var x100 uint64 + var x101 fiat_sc255_uint1 + x100, x101 = fiat_sc255_addcarryx_u64(x91, x88, x99) + var x102 uint64 = (uint64(x101) + x89) + var x103 uint64 + var x104 fiat_sc255_uint1 + x103, x104 = fiat_sc255_addcarryx_u64(x79, x94, 0x0) + var x105 uint64 + var x106 fiat_sc255_uint1 + x105, x106 = fiat_sc255_addcarryx_u64(x81, x96, x104) + var x107 uint64 + var x108 fiat_sc255_uint1 + x107, x108 = fiat_sc255_addcarryx_u64(x83, x98, x106) + var x109 uint64 + var x110 fiat_sc255_uint1 + x109, x110 = fiat_sc255_addcarryx_u64(x85, x100, x108) + var x111 uint64 + var x112 fiat_sc255_uint1 + x111, x112 = fiat_sc255_addcarryx_u64(x87, x102, x110) + var x113 uint64 + _, x113 = bits.Mul64(x103, 0xd2b51da312547e1b) + var x115 uint64 + var x116 uint64 + x116, x115 = bits.Mul64(x113, 0x1000000000000000) + var x117 uint64 + var x118 uint64 + x118, x117 = bits.Mul64(x113, 0x14def9dea2f79cd6) + var x119 uint64 + var x120 uint64 + x120, x119 = bits.Mul64(x113, 0x5812631a5cf5d3ed) + var x121 uint64 + var x122 fiat_sc255_uint1 + x121, x122 = fiat_sc255_addcarryx_u64(x120, x117, 0x0) + var x123 uint64 = (uint64(x122) + x118) + var x125 fiat_sc255_uint1 + _, x125 = fiat_sc255_addcarryx_u64(x103, x119, 0x0) + var x126 uint64 + var x127 fiat_sc255_uint1 + x126, x127 = fiat_sc255_addcarryx_u64(x105, x121, x125) + var x128 uint64 + var x129 fiat_sc255_uint1 + x128, x129 = fiat_sc255_addcarryx_u64(x107, x123, x127) + var x130 uint64 + var x131 fiat_sc255_uint1 + x130, x131 = fiat_sc255_addcarryx_u64(x109, x115, x129) + var x132 uint64 + var x133 fiat_sc255_uint1 + x132, x133 = fiat_sc255_addcarryx_u64(x111, x116, x131) + var x134 uint64 = (uint64(x133) + uint64(x112)) + var x135 uint64 + var x136 uint64 + x136, x135 = bits.Mul64(x3, (arg1[3])) + var x137 uint64 + var x138 uint64 + x138, x137 = bits.Mul64(x3, (arg1[2])) + var x139 uint64 + var x140 uint64 + x140, x139 = bits.Mul64(x3, (arg1[1])) + var x141 uint64 + var x142 uint64 + x142, x141 = bits.Mul64(x3, (arg1[0])) + var x143 uint64 + var x144 fiat_sc255_uint1 + x143, x144 = fiat_sc255_addcarryx_u64(x142, x139, 0x0) + var x145 uint64 + var x146 fiat_sc255_uint1 + x145, x146 = fiat_sc255_addcarryx_u64(x140, x137, x144) + var x147 uint64 + var x148 fiat_sc255_uint1 + x147, x148 = fiat_sc255_addcarryx_u64(x138, x135, x146) + var x149 uint64 = (uint64(x148) + x136) + var x150 uint64 + var x151 fiat_sc255_uint1 + x150, x151 = fiat_sc255_addcarryx_u64(x126, x141, 0x0) + var x152 uint64 + var x153 fiat_sc255_uint1 + x152, x153 = fiat_sc255_addcarryx_u64(x128, x143, x151) + var x154 uint64 + var x155 fiat_sc255_uint1 + x154, x155 = fiat_sc255_addcarryx_u64(x130, x145, x153) + var x156 uint64 + var x157 fiat_sc255_uint1 + x156, x157 = fiat_sc255_addcarryx_u64(x132, x147, x155) + var x158 uint64 + var x159 fiat_sc255_uint1 + x158, x159 = fiat_sc255_addcarryx_u64(x134, x149, x157) + var x160 uint64 + _, x160 = bits.Mul64(x150, 0xd2b51da312547e1b) + var x162 uint64 + var x163 uint64 + x163, x162 = bits.Mul64(x160, 0x1000000000000000) + var x164 uint64 + var x165 uint64 + x165, x164 = bits.Mul64(x160, 0x14def9dea2f79cd6) + var x166 uint64 + var x167 uint64 + x167, x166 = bits.Mul64(x160, 0x5812631a5cf5d3ed) + var x168 uint64 + var x169 fiat_sc255_uint1 + x168, x169 = fiat_sc255_addcarryx_u64(x167, x164, 0x0) + var x170 uint64 = (uint64(x169) + x165) + var x172 fiat_sc255_uint1 + _, x172 = fiat_sc255_addcarryx_u64(x150, x166, 0x0) + var x173 uint64 + var x174 fiat_sc255_uint1 + x173, x174 = fiat_sc255_addcarryx_u64(x152, x168, x172) + var x175 uint64 + var x176 fiat_sc255_uint1 + x175, x176 = fiat_sc255_addcarryx_u64(x154, x170, x174) + var x177 uint64 + var x178 fiat_sc255_uint1 + x177, x178 = fiat_sc255_addcarryx_u64(x156, x162, x176) + var x179 uint64 + var x180 fiat_sc255_uint1 + x179, x180 = fiat_sc255_addcarryx_u64(x158, x163, x178) + var x181 uint64 = (uint64(x180) + uint64(x159)) + var x182 uint64 + var x183 fiat_sc255_uint1 + x182, x183 = fiat_sc255_subborrowx_u64(x173, 0x5812631a5cf5d3ed, 0x0) + var x184 uint64 + var x185 fiat_sc255_uint1 + x184, x185 = fiat_sc255_subborrowx_u64(x175, 0x14def9dea2f79cd6, x183) + var x186 uint64 + var x187 fiat_sc255_uint1 + x186, x187 = fiat_sc255_subborrowx_u64(x177, uint64(0x0), x185) + var x188 uint64 + var x189 fiat_sc255_uint1 + x188, x189 = fiat_sc255_subborrowx_u64(x179, 0x1000000000000000, x187) + var x191 fiat_sc255_uint1 + _, x191 = fiat_sc255_subborrowx_u64(x181, uint64(0x0), x189) + var x192 uint64 + fiat_sc255_cmovznz_u64(&x192, x191, x182, x173) + var x193 uint64 + fiat_sc255_cmovznz_u64(&x193, x191, x184, x175) + var x194 uint64 + fiat_sc255_cmovznz_u64(&x194, x191, x186, x177) + var x195 uint64 + fiat_sc255_cmovznz_u64(&x195, x191, x188, x179) + out1[0] = x192 + out1[1] = x193 + out1[2] = x194 + out1[3] = x195 +} + +/* + * The function fiat_sc255_add adds two field elements in the Montgomery domain. + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_add(out1 *[4]uint64, arg1 *[4]uint64, arg2 *[4]uint64) { + var x1 uint64 + var x2 fiat_sc255_uint1 + x1, x2 = fiat_sc255_addcarryx_u64((arg1[0]), (arg2[0]), 0x0) + var x3 uint64 + var x4 fiat_sc255_uint1 + x3, x4 = fiat_sc255_addcarryx_u64((arg1[1]), (arg2[1]), x2) + var x5 uint64 + var x6 fiat_sc255_uint1 + x5, x6 = fiat_sc255_addcarryx_u64((arg1[2]), (arg2[2]), x4) + var x7 uint64 + var x8 fiat_sc255_uint1 + x7, x8 = fiat_sc255_addcarryx_u64((arg1[3]), (arg2[3]), x6) + var x9 uint64 + var x10 fiat_sc255_uint1 + x9, x10 = fiat_sc255_subborrowx_u64(x1, 0x5812631a5cf5d3ed, 0x0) + var x11 uint64 + var x12 fiat_sc255_uint1 + x11, x12 = fiat_sc255_subborrowx_u64(x3, 0x14def9dea2f79cd6, x10) + var x13 uint64 + var x14 fiat_sc255_uint1 + x13, x14 = fiat_sc255_subborrowx_u64(x5, uint64(0x0), x12) + var x15 uint64 + var x16 fiat_sc255_uint1 + x15, x16 = fiat_sc255_subborrowx_u64(x7, 0x1000000000000000, x14) + var x18 fiat_sc255_uint1 + _, x18 = fiat_sc255_subborrowx_u64(uint64(x8), uint64(0x0), x16) + var x19 uint64 + fiat_sc255_cmovznz_u64(&x19, x18, x9, x1) + var x20 uint64 + fiat_sc255_cmovznz_u64(&x20, x18, x11, x3) + var x21 uint64 + fiat_sc255_cmovznz_u64(&x21, x18, x13, x5) + var x22 uint64 + fiat_sc255_cmovznz_u64(&x22, x18, x15, x7) + out1[0] = x19 + out1[1] = x20 + out1[2] = x21 + out1[3] = x22 +} + +/* + * The function fiat_sc255_sub subtracts two field elements in the Montgomery domain. + * Preconditions: + * 0 ≤ eval arg1 < m + * 0 ≤ eval arg2 < m + * Postconditions: + * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_sub(out1 *[4]uint64, arg1 *[4]uint64, arg2 *[4]uint64) { + var x1 uint64 + var x2 fiat_sc255_uint1 + x1, x2 = fiat_sc255_subborrowx_u64((arg1[0]), (arg2[0]), 0x0) + var x3 uint64 + var x4 fiat_sc255_uint1 + x3, x4 = fiat_sc255_subborrowx_u64((arg1[1]), (arg2[1]), x2) + var x5 uint64 + var x6 fiat_sc255_uint1 + x5, x6 = fiat_sc255_subborrowx_u64((arg1[2]), (arg2[2]), x4) + var x7 uint64 + var x8 fiat_sc255_uint1 + x7, x8 = fiat_sc255_subborrowx_u64((arg1[3]), (arg2[3]), x6) + var x9 uint64 + fiat_sc255_cmovznz_u64(&x9, x8, uint64(0x0), 0xffffffffffffffff) + var x10 uint64 + var x11 fiat_sc255_uint1 + x10, x11 = fiat_sc255_addcarryx_u64(x1, (x9 & 0x5812631a5cf5d3ed), 0x0) + var x12 uint64 + var x13 fiat_sc255_uint1 + x12, x13 = fiat_sc255_addcarryx_u64(x3, (x9 & 0x14def9dea2f79cd6), x11) + var x14 uint64 + var x15 fiat_sc255_uint1 + x14, x15 = fiat_sc255_addcarryx_u64(x5, uint64(0x0), x13) + var x16 uint64 + x16, _ = fiat_sc255_addcarryx_u64(x7, (x9 & 0x1000000000000000), x15) + out1[0] = x10 + out1[1] = x12 + out1[2] = x14 + out1[3] = x16 +} + +/* + * The function fiat_sc255_opp negates a field element in the Montgomery domain. + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_opp(out1 *[4]uint64, arg1 *[4]uint64) { + var x1 uint64 + var x2 fiat_sc255_uint1 + x1, x2 = fiat_sc255_subborrowx_u64(uint64(0x0), (arg1[0]), 0x0) + var x3 uint64 + var x4 fiat_sc255_uint1 + x3, x4 = fiat_sc255_subborrowx_u64(uint64(0x0), (arg1[1]), x2) + var x5 uint64 + var x6 fiat_sc255_uint1 + x5, x6 = fiat_sc255_subborrowx_u64(uint64(0x0), (arg1[2]), x4) + var x7 uint64 + var x8 fiat_sc255_uint1 + x7, x8 = fiat_sc255_subborrowx_u64(uint64(0x0), (arg1[3]), x6) + var x9 uint64 + fiat_sc255_cmovznz_u64(&x9, x8, uint64(0x0), 0xffffffffffffffff) + var x10 uint64 + var x11 fiat_sc255_uint1 + x10, x11 = fiat_sc255_addcarryx_u64(x1, (x9 & 0x5812631a5cf5d3ed), 0x0) + var x12 uint64 + var x13 fiat_sc255_uint1 + x12, x13 = fiat_sc255_addcarryx_u64(x3, (x9 & 0x14def9dea2f79cd6), x11) + var x14 uint64 + var x15 fiat_sc255_uint1 + x14, x15 = fiat_sc255_addcarryx_u64(x5, uint64(0x0), x13) + var x16 uint64 + x16, _ = fiat_sc255_addcarryx_u64(x7, (x9 & 0x1000000000000000), x15) + out1[0] = x10 + out1[1] = x12 + out1[2] = x14 + out1[3] = x16 +} + +/* + * The function fiat_sc255_from_montgomery translates a field element out of the Montgomery domain. + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_from_montgomery(out1 *[4]uint64, arg1 *[4]uint64) { + var x1 uint64 = (arg1[0]) + var x2 uint64 + _, x2 = bits.Mul64(x1, 0xd2b51da312547e1b) + var x4 uint64 + var x5 uint64 + x5, x4 = bits.Mul64(x2, 0x1000000000000000) + var x6 uint64 + var x7 uint64 + x7, x6 = bits.Mul64(x2, 0x14def9dea2f79cd6) + var x8 uint64 + var x9 uint64 + x9, x8 = bits.Mul64(x2, 0x5812631a5cf5d3ed) + var x10 uint64 + var x11 fiat_sc255_uint1 + x10, x11 = fiat_sc255_addcarryx_u64(x9, x6, 0x0) + var x13 fiat_sc255_uint1 + _, x13 = fiat_sc255_addcarryx_u64(x1, x8, 0x0) + var x14 uint64 + var x15 fiat_sc255_uint1 + x14, x15 = fiat_sc255_addcarryx_u64(uint64(0x0), x10, x13) + var x16 uint64 + var x17 fiat_sc255_uint1 + x16, x17 = fiat_sc255_addcarryx_u64(x14, (arg1[1]), 0x0) + var x18 uint64 + _, x18 = bits.Mul64(x16, 0xd2b51da312547e1b) + var x20 uint64 + var x21 uint64 + x21, x20 = bits.Mul64(x18, 0x1000000000000000) + var x22 uint64 + var x23 uint64 + x23, x22 = bits.Mul64(x18, 0x14def9dea2f79cd6) + var x24 uint64 + var x25 uint64 + x25, x24 = bits.Mul64(x18, 0x5812631a5cf5d3ed) + var x26 uint64 + var x27 fiat_sc255_uint1 + x26, x27 = fiat_sc255_addcarryx_u64(x25, x22, 0x0) + var x29 fiat_sc255_uint1 + _, x29 = fiat_sc255_addcarryx_u64(x16, x24, 0x0) + var x30 uint64 + var x31 fiat_sc255_uint1 + x30, x31 = fiat_sc255_addcarryx_u64((uint64(x17) + (uint64(x15) + (uint64(x11) + x7))), x26, x29) + var x32 uint64 + var x33 fiat_sc255_uint1 + x32, x33 = fiat_sc255_addcarryx_u64(x4, (uint64(x27) + x23), x31) + var x34 uint64 + var x35 fiat_sc255_uint1 + x34, x35 = fiat_sc255_addcarryx_u64(x5, x20, x33) + var x36 uint64 + var x37 fiat_sc255_uint1 + x36, x37 = fiat_sc255_addcarryx_u64(x30, (arg1[2]), 0x0) + var x38 uint64 + var x39 fiat_sc255_uint1 + x38, x39 = fiat_sc255_addcarryx_u64(x32, uint64(0x0), x37) + var x40 uint64 + var x41 fiat_sc255_uint1 + x40, x41 = fiat_sc255_addcarryx_u64(x34, uint64(0x0), x39) + var x42 uint64 + _, x42 = bits.Mul64(x36, 0xd2b51da312547e1b) + var x44 uint64 + var x45 uint64 + x45, x44 = bits.Mul64(x42, 0x1000000000000000) + var x46 uint64 + var x47 uint64 + x47, x46 = bits.Mul64(x42, 0x14def9dea2f79cd6) + var x48 uint64 + var x49 uint64 + x49, x48 = bits.Mul64(x42, 0x5812631a5cf5d3ed) + var x50 uint64 + var x51 fiat_sc255_uint1 + x50, x51 = fiat_sc255_addcarryx_u64(x49, x46, 0x0) + var x53 fiat_sc255_uint1 + _, x53 = fiat_sc255_addcarryx_u64(x36, x48, 0x0) + var x54 uint64 + var x55 fiat_sc255_uint1 + x54, x55 = fiat_sc255_addcarryx_u64(x38, x50, x53) + var x56 uint64 + var x57 fiat_sc255_uint1 + x56, x57 = fiat_sc255_addcarryx_u64(x40, (uint64(x51) + x47), x55) + var x58 uint64 + var x59 fiat_sc255_uint1 + x58, x59 = fiat_sc255_addcarryx_u64((uint64(x41) + (uint64(x35) + x21)), x44, x57) + var x60 uint64 + var x61 fiat_sc255_uint1 + x60, x61 = fiat_sc255_addcarryx_u64(x54, (arg1[3]), 0x0) + var x62 uint64 + var x63 fiat_sc255_uint1 + x62, x63 = fiat_sc255_addcarryx_u64(x56, uint64(0x0), x61) + var x64 uint64 + var x65 fiat_sc255_uint1 + x64, x65 = fiat_sc255_addcarryx_u64(x58, uint64(0x0), x63) + var x66 uint64 + _, x66 = bits.Mul64(x60, 0xd2b51da312547e1b) + var x68 uint64 + var x69 uint64 + x69, x68 = bits.Mul64(x66, 0x1000000000000000) + var x70 uint64 + var x71 uint64 + x71, x70 = bits.Mul64(x66, 0x14def9dea2f79cd6) + var x72 uint64 + var x73 uint64 + x73, x72 = bits.Mul64(x66, 0x5812631a5cf5d3ed) + var x74 uint64 + var x75 fiat_sc255_uint1 + x74, x75 = fiat_sc255_addcarryx_u64(x73, x70, 0x0) + var x77 fiat_sc255_uint1 + _, x77 = fiat_sc255_addcarryx_u64(x60, x72, 0x0) + var x78 uint64 + var x79 fiat_sc255_uint1 + x78, x79 = fiat_sc255_addcarryx_u64(x62, x74, x77) + var x80 uint64 + var x81 fiat_sc255_uint1 + x80, x81 = fiat_sc255_addcarryx_u64(x64, (uint64(x75) + x71), x79) + var x82 uint64 + var x83 fiat_sc255_uint1 + x82, x83 = fiat_sc255_addcarryx_u64((uint64(x65) + (uint64(x59) + x45)), x68, x81) + var x84 uint64 = (uint64(x83) + x69) + var x85 uint64 + var x86 fiat_sc255_uint1 + x85, x86 = fiat_sc255_subborrowx_u64(x78, 0x5812631a5cf5d3ed, 0x0) + var x87 uint64 + var x88 fiat_sc255_uint1 + x87, x88 = fiat_sc255_subborrowx_u64(x80, 0x14def9dea2f79cd6, x86) + var x89 uint64 + var x90 fiat_sc255_uint1 + x89, x90 = fiat_sc255_subborrowx_u64(x82, uint64(0x0), x88) + var x91 uint64 + var x92 fiat_sc255_uint1 + x91, x92 = fiat_sc255_subborrowx_u64(x84, 0x1000000000000000, x90) + var x94 fiat_sc255_uint1 + _, x94 = fiat_sc255_subborrowx_u64(uint64(0x0), uint64(0x0), x92) + var x95 uint64 + fiat_sc255_cmovznz_u64(&x95, x94, x85, x78) + var x96 uint64 + fiat_sc255_cmovznz_u64(&x96, x94, x87, x80) + var x97 uint64 + fiat_sc255_cmovznz_u64(&x97, x94, x89, x82) + var x98 uint64 + fiat_sc255_cmovznz_u64(&x98, x94, x91, x84) + out1[0] = x95 + out1[1] = x96 + out1[2] = x97 + out1[3] = x98 +} + +/* + * The function fiat_sc255_to_montgomery translates a field element into the Montgomery domain. + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * eval (from_montgomery out1) mod m = eval arg1 mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_to_montgomery(out1 *[4]uint64, arg1 *[4]uint64) { + var x1 uint64 = (arg1[1]) + var x2 uint64 = (arg1[2]) + var x3 uint64 = (arg1[3]) + var x4 uint64 = (arg1[0]) + var x5 uint64 + var x6 uint64 + x6, x5 = bits.Mul64(x4, 0x399411b7c309a3d) + var x7 uint64 + var x8 uint64 + x8, x7 = bits.Mul64(x4, 0xceec73d217f5be65) + var x9 uint64 + var x10 uint64 + x10, x9 = bits.Mul64(x4, 0xd00e1ba768859347) + var x11 uint64 + var x12 uint64 + x12, x11 = bits.Mul64(x4, 0xa40611e3449c0f01) + var x13 uint64 + var x14 fiat_sc255_uint1 + x13, x14 = fiat_sc255_addcarryx_u64(x12, x9, 0x0) + var x15 uint64 + var x16 fiat_sc255_uint1 + x15, x16 = fiat_sc255_addcarryx_u64(x10, x7, x14) + var x17 uint64 + var x18 fiat_sc255_uint1 + x17, x18 = fiat_sc255_addcarryx_u64(x8, x5, x16) + var x19 uint64 + _, x19 = bits.Mul64(x11, 0xd2b51da312547e1b) + var x21 uint64 + var x22 uint64 + x22, x21 = bits.Mul64(x19, 0x1000000000000000) + var x23 uint64 + var x24 uint64 + x24, x23 = bits.Mul64(x19, 0x14def9dea2f79cd6) + var x25 uint64 + var x26 uint64 + x26, x25 = bits.Mul64(x19, 0x5812631a5cf5d3ed) + var x27 uint64 + var x28 fiat_sc255_uint1 + x27, x28 = fiat_sc255_addcarryx_u64(x26, x23, 0x0) + var x30 fiat_sc255_uint1 + _, x30 = fiat_sc255_addcarryx_u64(x11, x25, 0x0) + var x31 uint64 + var x32 fiat_sc255_uint1 + x31, x32 = fiat_sc255_addcarryx_u64(x13, x27, x30) + var x33 uint64 + var x34 fiat_sc255_uint1 + x33, x34 = fiat_sc255_addcarryx_u64(x15, (uint64(x28) + x24), x32) + var x35 uint64 + var x36 fiat_sc255_uint1 + x35, x36 = fiat_sc255_addcarryx_u64(x17, x21, x34) + var x37 uint64 + var x38 uint64 + x38, x37 = bits.Mul64(x1, 0x399411b7c309a3d) + var x39 uint64 + var x40 uint64 + x40, x39 = bits.Mul64(x1, 0xceec73d217f5be65) + var x41 uint64 + var x42 uint64 + x42, x41 = bits.Mul64(x1, 0xd00e1ba768859347) + var x43 uint64 + var x44 uint64 + x44, x43 = bits.Mul64(x1, 0xa40611e3449c0f01) + var x45 uint64 + var x46 fiat_sc255_uint1 + x45, x46 = fiat_sc255_addcarryx_u64(x44, x41, 0x0) + var x47 uint64 + var x48 fiat_sc255_uint1 + x47, x48 = fiat_sc255_addcarryx_u64(x42, x39, x46) + var x49 uint64 + var x50 fiat_sc255_uint1 + x49, x50 = fiat_sc255_addcarryx_u64(x40, x37, x48) + var x51 uint64 + var x52 fiat_sc255_uint1 + x51, x52 = fiat_sc255_addcarryx_u64(x31, x43, 0x0) + var x53 uint64 + var x54 fiat_sc255_uint1 + x53, x54 = fiat_sc255_addcarryx_u64(x33, x45, x52) + var x55 uint64 + var x56 fiat_sc255_uint1 + x55, x56 = fiat_sc255_addcarryx_u64(x35, x47, x54) + var x57 uint64 + var x58 fiat_sc255_uint1 + x57, x58 = fiat_sc255_addcarryx_u64(((uint64(x36) + (uint64(x18) + x6)) + x22), x49, x56) + var x59 uint64 + _, x59 = bits.Mul64(x51, 0xd2b51da312547e1b) + var x61 uint64 + var x62 uint64 + x62, x61 = bits.Mul64(x59, 0x1000000000000000) + var x63 uint64 + var x64 uint64 + x64, x63 = bits.Mul64(x59, 0x14def9dea2f79cd6) + var x65 uint64 + var x66 uint64 + x66, x65 = bits.Mul64(x59, 0x5812631a5cf5d3ed) + var x67 uint64 + var x68 fiat_sc255_uint1 + x67, x68 = fiat_sc255_addcarryx_u64(x66, x63, 0x0) + var x70 fiat_sc255_uint1 + _, x70 = fiat_sc255_addcarryx_u64(x51, x65, 0x0) + var x71 uint64 + var x72 fiat_sc255_uint1 + x71, x72 = fiat_sc255_addcarryx_u64(x53, x67, x70) + var x73 uint64 + var x74 fiat_sc255_uint1 + x73, x74 = fiat_sc255_addcarryx_u64(x55, (uint64(x68) + x64), x72) + var x75 uint64 + var x76 fiat_sc255_uint1 + x75, x76 = fiat_sc255_addcarryx_u64(x57, x61, x74) + var x77 uint64 + var x78 uint64 + x78, x77 = bits.Mul64(x2, 0x399411b7c309a3d) + var x79 uint64 + var x80 uint64 + x80, x79 = bits.Mul64(x2, 0xceec73d217f5be65) + var x81 uint64 + var x82 uint64 + x82, x81 = bits.Mul64(x2, 0xd00e1ba768859347) + var x83 uint64 + var x84 uint64 + x84, x83 = bits.Mul64(x2, 0xa40611e3449c0f01) + var x85 uint64 + var x86 fiat_sc255_uint1 + x85, x86 = fiat_sc255_addcarryx_u64(x84, x81, 0x0) + var x87 uint64 + var x88 fiat_sc255_uint1 + x87, x88 = fiat_sc255_addcarryx_u64(x82, x79, x86) + var x89 uint64 + var x90 fiat_sc255_uint1 + x89, x90 = fiat_sc255_addcarryx_u64(x80, x77, x88) + var x91 uint64 + var x92 fiat_sc255_uint1 + x91, x92 = fiat_sc255_addcarryx_u64(x71, x83, 0x0) + var x93 uint64 + var x94 fiat_sc255_uint1 + x93, x94 = fiat_sc255_addcarryx_u64(x73, x85, x92) + var x95 uint64 + var x96 fiat_sc255_uint1 + x95, x96 = fiat_sc255_addcarryx_u64(x75, x87, x94) + var x97 uint64 + var x98 fiat_sc255_uint1 + x97, x98 = fiat_sc255_addcarryx_u64(((uint64(x76) + (uint64(x58) + (uint64(x50) + x38))) + x62), x89, x96) + var x99 uint64 + _, x99 = bits.Mul64(x91, 0xd2b51da312547e1b) + var x101 uint64 + var x102 uint64 + x102, x101 = bits.Mul64(x99, 0x1000000000000000) + var x103 uint64 + var x104 uint64 + x104, x103 = bits.Mul64(x99, 0x14def9dea2f79cd6) + var x105 uint64 + var x106 uint64 + x106, x105 = bits.Mul64(x99, 0x5812631a5cf5d3ed) + var x107 uint64 + var x108 fiat_sc255_uint1 + x107, x108 = fiat_sc255_addcarryx_u64(x106, x103, 0x0) + var x110 fiat_sc255_uint1 + _, x110 = fiat_sc255_addcarryx_u64(x91, x105, 0x0) + var x111 uint64 + var x112 fiat_sc255_uint1 + x111, x112 = fiat_sc255_addcarryx_u64(x93, x107, x110) + var x113 uint64 + var x114 fiat_sc255_uint1 + x113, x114 = fiat_sc255_addcarryx_u64(x95, (uint64(x108) + x104), x112) + var x115 uint64 + var x116 fiat_sc255_uint1 + x115, x116 = fiat_sc255_addcarryx_u64(x97, x101, x114) + var x117 uint64 + var x118 uint64 + x118, x117 = bits.Mul64(x3, 0x399411b7c309a3d) + var x119 uint64 + var x120 uint64 + x120, x119 = bits.Mul64(x3, 0xceec73d217f5be65) + var x121 uint64 + var x122 uint64 + x122, x121 = bits.Mul64(x3, 0xd00e1ba768859347) + var x123 uint64 + var x124 uint64 + x124, x123 = bits.Mul64(x3, 0xa40611e3449c0f01) + var x125 uint64 + var x126 fiat_sc255_uint1 + x125, x126 = fiat_sc255_addcarryx_u64(x124, x121, 0x0) + var x127 uint64 + var x128 fiat_sc255_uint1 + x127, x128 = fiat_sc255_addcarryx_u64(x122, x119, x126) + var x129 uint64 + var x130 fiat_sc255_uint1 + x129, x130 = fiat_sc255_addcarryx_u64(x120, x117, x128) + var x131 uint64 + var x132 fiat_sc255_uint1 + x131, x132 = fiat_sc255_addcarryx_u64(x111, x123, 0x0) + var x133 uint64 + var x134 fiat_sc255_uint1 + x133, x134 = fiat_sc255_addcarryx_u64(x113, x125, x132) + var x135 uint64 + var x136 fiat_sc255_uint1 + x135, x136 = fiat_sc255_addcarryx_u64(x115, x127, x134) + var x137 uint64 + var x138 fiat_sc255_uint1 + x137, x138 = fiat_sc255_addcarryx_u64(((uint64(x116) + (uint64(x98) + (uint64(x90) + x78))) + x102), x129, x136) + var x139 uint64 + _, x139 = bits.Mul64(x131, 0xd2b51da312547e1b) + var x141 uint64 + var x142 uint64 + x142, x141 = bits.Mul64(x139, 0x1000000000000000) + var x143 uint64 + var x144 uint64 + x144, x143 = bits.Mul64(x139, 0x14def9dea2f79cd6) + var x145 uint64 + var x146 uint64 + x146, x145 = bits.Mul64(x139, 0x5812631a5cf5d3ed) + var x147 uint64 + var x148 fiat_sc255_uint1 + x147, x148 = fiat_sc255_addcarryx_u64(x146, x143, 0x0) + var x150 fiat_sc255_uint1 + _, x150 = fiat_sc255_addcarryx_u64(x131, x145, 0x0) + var x151 uint64 + var x152 fiat_sc255_uint1 + x151, x152 = fiat_sc255_addcarryx_u64(x133, x147, x150) + var x153 uint64 + var x154 fiat_sc255_uint1 + x153, x154 = fiat_sc255_addcarryx_u64(x135, (uint64(x148) + x144), x152) + var x155 uint64 + var x156 fiat_sc255_uint1 + x155, x156 = fiat_sc255_addcarryx_u64(x137, x141, x154) + var x157 uint64 = ((uint64(x156) + (uint64(x138) + (uint64(x130) + x118))) + x142) + var x158 uint64 + var x159 fiat_sc255_uint1 + x158, x159 = fiat_sc255_subborrowx_u64(x151, 0x5812631a5cf5d3ed, 0x0) + var x160 uint64 + var x161 fiat_sc255_uint1 + x160, x161 = fiat_sc255_subborrowx_u64(x153, 0x14def9dea2f79cd6, x159) + var x162 uint64 + var x163 fiat_sc255_uint1 + x162, x163 = fiat_sc255_subborrowx_u64(x155, uint64(0x0), x161) + var x164 uint64 + var x165 fiat_sc255_uint1 + x164, x165 = fiat_sc255_subborrowx_u64(x157, 0x1000000000000000, x163) + var x167 fiat_sc255_uint1 + _, x167 = fiat_sc255_subborrowx_u64(uint64(0x0), uint64(0x0), x165) + var x168 uint64 + fiat_sc255_cmovznz_u64(&x168, x167, x158, x151) + var x169 uint64 + fiat_sc255_cmovznz_u64(&x169, x167, x160, x153) + var x170 uint64 + fiat_sc255_cmovznz_u64(&x170, x167, x162, x155) + var x171 uint64 + fiat_sc255_cmovznz_u64(&x171, x167, x164, x157) + out1[0] = x168 + out1[1] = x169 + out1[2] = x170 + out1[3] = x171 +} + +/* + * The function fiat_sc255_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise. + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0 + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + */ +/*inline*/ +func fiat_sc255_nonzero(out1 *uint64, arg1 *[4]uint64) { + var x1 uint64 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | (arg1[3])))) + *out1 = x1 +} + +/* + * The function fiat_sc255_selectznz is a multi-limb conditional select. + * Postconditions: + * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3) + * + * Input Bounds: + * arg1: [0x0 ~> 0x1] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_selectznz(out1 *[4]uint64, arg1 fiat_sc255_uint1, arg2 *[4]uint64, arg3 *[4]uint64) { + var x1 uint64 + fiat_sc255_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0])) + var x2 uint64 + fiat_sc255_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1])) + var x3 uint64 + fiat_sc255_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2])) + var x4 uint64 + fiat_sc255_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3])) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 +} + +/* + * The function fiat_sc255_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order. + * Preconditions: + * 0 ≤ eval arg1 < m + * Postconditions: + * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31] + * + * Input Bounds: + * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0x1fffffffffffffff]] + * Output Bounds: + * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1f]] + */ +/*inline*/ +func fiat_sc255_to_bytes(out1 *[32]byte, arg1 *[4]uint64) { + var x1 uint64 = (arg1[3]) + var x2 uint64 = (arg1[2]) + var x3 uint64 = (arg1[1]) + var x4 uint64 = (arg1[0]) + var x5 uint8 = (uint8(x4) & 0xff) + var x6 uint64 = (x4 >> 8) + var x7 uint8 = (uint8(x6) & 0xff) + var x8 uint64 = (x6 >> 8) + var x9 uint8 = (uint8(x8) & 0xff) + var x10 uint64 = (x8 >> 8) + var x11 uint8 = (uint8(x10) & 0xff) + var x12 uint64 = (x10 >> 8) + var x13 uint8 = (uint8(x12) & 0xff) + var x14 uint64 = (x12 >> 8) + var x15 uint8 = (uint8(x14) & 0xff) + var x16 uint64 = (x14 >> 8) + var x17 uint8 = (uint8(x16) & 0xff) + var x18 uint8 = uint8((x16 >> 8)) + var x19 uint8 = (uint8(x3) & 0xff) + var x20 uint64 = (x3 >> 8) + var x21 uint8 = (uint8(x20) & 0xff) + var x22 uint64 = (x20 >> 8) + var x23 uint8 = (uint8(x22) & 0xff) + var x24 uint64 = (x22 >> 8) + var x25 uint8 = (uint8(x24) & 0xff) + var x26 uint64 = (x24 >> 8) + var x27 uint8 = (uint8(x26) & 0xff) + var x28 uint64 = (x26 >> 8) + var x29 uint8 = (uint8(x28) & 0xff) + var x30 uint64 = (x28 >> 8) + var x31 uint8 = (uint8(x30) & 0xff) + var x32 uint8 = uint8((x30 >> 8)) + var x33 uint8 = (uint8(x2) & 0xff) + var x34 uint64 = (x2 >> 8) + var x35 uint8 = (uint8(x34) & 0xff) + var x36 uint64 = (x34 >> 8) + var x37 uint8 = (uint8(x36) & 0xff) + var x38 uint64 = (x36 >> 8) + var x39 uint8 = (uint8(x38) & 0xff) + var x40 uint64 = (x38 >> 8) + var x41 uint8 = (uint8(x40) & 0xff) + var x42 uint64 = (x40 >> 8) + var x43 uint8 = (uint8(x42) & 0xff) + var x44 uint64 = (x42 >> 8) + var x45 uint8 = (uint8(x44) & 0xff) + var x46 uint8 = uint8((x44 >> 8)) + var x47 uint8 = (uint8(x1) & 0xff) + var x48 uint64 = (x1 >> 8) + var x49 uint8 = (uint8(x48) & 0xff) + var x50 uint64 = (x48 >> 8) + var x51 uint8 = (uint8(x50) & 0xff) + var x52 uint64 = (x50 >> 8) + var x53 uint8 = (uint8(x52) & 0xff) + var x54 uint64 = (x52 >> 8) + var x55 uint8 = (uint8(x54) & 0xff) + var x56 uint64 = (x54 >> 8) + var x57 uint8 = (uint8(x56) & 0xff) + var x58 uint64 = (x56 >> 8) + var x59 uint8 = (uint8(x58) & 0xff) + var x60 uint8 = uint8((x58 >> 8)) + out1[0] = x5 + out1[1] = x7 + out1[2] = x9 + out1[3] = x11 + out1[4] = x13 + out1[5] = x15 + out1[6] = x17 + out1[7] = x18 + out1[8] = x19 + out1[9] = x21 + out1[10] = x23 + out1[11] = x25 + out1[12] = x27 + out1[13] = x29 + out1[14] = x31 + out1[15] = x32 + out1[16] = x33 + out1[17] = x35 + out1[18] = x37 + out1[19] = x39 + out1[20] = x41 + out1[21] = x43 + out1[22] = x45 + out1[23] = x46 + out1[24] = x47 + out1[25] = x49 + out1[26] = x51 + out1[27] = x53 + out1[28] = x55 + out1[29] = x57 + out1[30] = x59 + out1[31] = x60 +} + +/* + * The function fiat_sc255_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order. + * Preconditions: + * 0 ≤ bytes_eval arg1 < m + * Postconditions: + * eval out1 mod m = bytes_eval arg1 mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1f]] + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0x1fffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_from_bytes(out1 *[4]uint64, arg1 *[32]byte) { + var x1 uint64 = (uint64((arg1[31])) << 56) + var x2 uint64 = (uint64((arg1[30])) << 48) + var x3 uint64 = (uint64((arg1[29])) << 40) + var x4 uint64 = (uint64((arg1[28])) << 32) + var x5 uint64 = (uint64((arg1[27])) << 24) + var x6 uint64 = (uint64((arg1[26])) << 16) + var x7 uint64 = (uint64((arg1[25])) << 8) + var x8 uint8 = (arg1[24]) + var x9 uint64 = (uint64((arg1[23])) << 56) + var x10 uint64 = (uint64((arg1[22])) << 48) + var x11 uint64 = (uint64((arg1[21])) << 40) + var x12 uint64 = (uint64((arg1[20])) << 32) + var x13 uint64 = (uint64((arg1[19])) << 24) + var x14 uint64 = (uint64((arg1[18])) << 16) + var x15 uint64 = (uint64((arg1[17])) << 8) + var x16 uint8 = (arg1[16]) + var x17 uint64 = (uint64((arg1[15])) << 56) + var x18 uint64 = (uint64((arg1[14])) << 48) + var x19 uint64 = (uint64((arg1[13])) << 40) + var x20 uint64 = (uint64((arg1[12])) << 32) + var x21 uint64 = (uint64((arg1[11])) << 24) + var x22 uint64 = (uint64((arg1[10])) << 16) + var x23 uint64 = (uint64((arg1[9])) << 8) + var x24 uint8 = (arg1[8]) + var x25 uint64 = (uint64((arg1[7])) << 56) + var x26 uint64 = (uint64((arg1[6])) << 48) + var x27 uint64 = (uint64((arg1[5])) << 40) + var x28 uint64 = (uint64((arg1[4])) << 32) + var x29 uint64 = (uint64((arg1[3])) << 24) + var x30 uint64 = (uint64((arg1[2])) << 16) + var x31 uint64 = (uint64((arg1[1])) << 8) + var x32 uint8 = (arg1[0]) + var x33 uint64 = (x31 + uint64(x32)) + var x34 uint64 = (x30 + x33) + var x35 uint64 = (x29 + x34) + var x36 uint64 = (x28 + x35) + var x37 uint64 = (x27 + x36) + var x38 uint64 = (x26 + x37) + var x39 uint64 = (x25 + x38) + var x40 uint64 = (x23 + uint64(x24)) + var x41 uint64 = (x22 + x40) + var x42 uint64 = (x21 + x41) + var x43 uint64 = (x20 + x42) + var x44 uint64 = (x19 + x43) + var x45 uint64 = (x18 + x44) + var x46 uint64 = (x17 + x45) + var x47 uint64 = (x15 + uint64(x16)) + var x48 uint64 = (x14 + x47) + var x49 uint64 = (x13 + x48) + var x50 uint64 = (x12 + x49) + var x51 uint64 = (x11 + x50) + var x52 uint64 = (x10 + x51) + var x53 uint64 = (x9 + x52) + var x54 uint64 = (x7 + uint64(x8)) + var x55 uint64 = (x6 + x54) + var x56 uint64 = (x5 + x55) + var x57 uint64 = (x4 + x56) + var x58 uint64 = (x3 + x57) + var x59 uint64 = (x2 + x58) + var x60 uint64 = (x1 + x59) + out1[0] = x39 + out1[1] = x46 + out1[2] = x53 + out1[3] = x60 +} + +/* + * The function fiat_sc255_set_one returns the field element one in the Montgomery domain. + * Postconditions: + * eval (from_montgomery out1) mod m = 1 mod m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_set_one(out1 *[4]uint64) { + out1[0] = 0xd6ec31748d98951d + out1[1] = 0xc6ef5bf4737dcf70 + out1[2] = 0xfffffffffffffffe + out1[3] = 0xfffffffffffffff +} + +/* + * The function fiat_sc255_msat returns the saturated represtation of the prime modulus. + * Postconditions: + * twos_complement_eval out1 = m + * 0 ≤ eval out1 < m + * + * Input Bounds: + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_msat(out1 *[5]uint64) { + out1[0] = 0x5812631a5cf5d3ed + out1[1] = 0x14def9dea2f79cd6 + out1[2] = uint64(0x0) + out1[3] = 0x1000000000000000 + out1[4] = uint64(0x0) +} + +/* + * The function fiat_sc255_divstep_precomp returns the precomputed value for Bernstein-Yang-inversion (in montgomery form). + * Postconditions: + * eval (from_montgomery out1) = ⌊(m - 1) / 2⌋^(if (log2 m) + 1 < 46 then ⌊(49 * ((log2 m) + 1) + 80) / 17⌋ else ⌊(49 * ((log2 m) + 1) + 57) / 17⌋) + * 0 ≤ eval out1 < m + * + * Input Bounds: + * Output Bounds: + * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_divstep_precomp(out1 *[4]uint64) { + out1[0] = 0xd70af84436a7cb92 + out1[1] = 0x5f71c978b0b8b159 + out1[2] = 0xe76d816974947f1a + out1[3] = 0x19a2d36f193e4ff +} + +/* + * The function fiat_sc255_divstep computes a divstep. + * Preconditions: + * 0 ≤ eval arg4 < m + * 0 ≤ eval arg5 < m + * Postconditions: + * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1) + * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2) + * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋) + * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m) + * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m) + * 0 ≤ eval out5 < m + * 0 ≤ eval out5 < m + * 0 ≤ eval out2 < m + * 0 ≤ eval out3 < m + * + * Input Bounds: + * arg1: [0x0 ~> 0xffffffffffffffff] + * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * Output Bounds: + * out1: [0x0 ~> 0xffffffffffffffff] + * out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + * out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] + */ +/*inline*/ +func fiat_sc255_divstep(out1 *uint64, out2 *[5]uint64, out3 *[5]uint64, out4 *[4]uint64, out5 *[4]uint64, arg1 uint64, arg2 *[5]uint64, arg3 *[5]uint64, arg4 *[4]uint64, arg5 *[4]uint64) { + var x1 uint64 + x1, _ = fiat_sc255_addcarryx_u64((^arg1), uint64(0x1), 0x0) + var x3 fiat_sc255_uint1 = (fiat_sc255_uint1((x1 >> 63)) & (fiat_sc255_uint1((arg3[0])) & 0x1)) + var x4 uint64 + x4, _ = fiat_sc255_addcarryx_u64((^arg1), uint64(0x1), 0x0) + var x6 uint64 + fiat_sc255_cmovznz_u64(&x6, x3, arg1, x4) + var x7 uint64 + fiat_sc255_cmovznz_u64(&x7, x3, (arg2[0]), (arg3[0])) + var x8 uint64 + fiat_sc255_cmovznz_u64(&x8, x3, (arg2[1]), (arg3[1])) + var x9 uint64 + fiat_sc255_cmovznz_u64(&x9, x3, (arg2[2]), (arg3[2])) + var x10 uint64 + fiat_sc255_cmovznz_u64(&x10, x3, (arg2[3]), (arg3[3])) + var x11 uint64 + fiat_sc255_cmovznz_u64(&x11, x3, (arg2[4]), (arg3[4])) + var x12 uint64 + var x13 fiat_sc255_uint1 + x12, x13 = fiat_sc255_addcarryx_u64(uint64(0x1), (^(arg2[0])), 0x0) + var x14 uint64 + var x15 fiat_sc255_uint1 + x14, x15 = fiat_sc255_addcarryx_u64(uint64(0x0), (^(arg2[1])), x13) + var x16 uint64 + var x17 fiat_sc255_uint1 + x16, x17 = fiat_sc255_addcarryx_u64(uint64(0x0), (^(arg2[2])), x15) + var x18 uint64 + var x19 fiat_sc255_uint1 + x18, x19 = fiat_sc255_addcarryx_u64(uint64(0x0), (^(arg2[3])), x17) + var x20 uint64 + x20, _ = fiat_sc255_addcarryx_u64(uint64(0x0), (^(arg2[4])), x19) + var x22 uint64 + fiat_sc255_cmovznz_u64(&x22, x3, (arg3[0]), x12) + var x23 uint64 + fiat_sc255_cmovznz_u64(&x23, x3, (arg3[1]), x14) + var x24 uint64 + fiat_sc255_cmovznz_u64(&x24, x3, (arg3[2]), x16) + var x25 uint64 + fiat_sc255_cmovznz_u64(&x25, x3, (arg3[3]), x18) + var x26 uint64 + fiat_sc255_cmovznz_u64(&x26, x3, (arg3[4]), x20) + var x27 uint64 + fiat_sc255_cmovznz_u64(&x27, x3, (arg4[0]), (arg5[0])) + var x28 uint64 + fiat_sc255_cmovznz_u64(&x28, x3, (arg4[1]), (arg5[1])) + var x29 uint64 + fiat_sc255_cmovznz_u64(&x29, x3, (arg4[2]), (arg5[2])) + var x30 uint64 + fiat_sc255_cmovznz_u64(&x30, x3, (arg4[3]), (arg5[3])) + var x31 uint64 + var x32 fiat_sc255_uint1 + x31, x32 = fiat_sc255_addcarryx_u64(x27, x27, 0x0) + var x33 uint64 + var x34 fiat_sc255_uint1 + x33, x34 = fiat_sc255_addcarryx_u64(x28, x28, x32) + var x35 uint64 + var x36 fiat_sc255_uint1 + x35, x36 = fiat_sc255_addcarryx_u64(x29, x29, x34) + var x37 uint64 + var x38 fiat_sc255_uint1 + x37, x38 = fiat_sc255_addcarryx_u64(x30, x30, x36) + var x39 uint64 + var x40 fiat_sc255_uint1 + x39, x40 = fiat_sc255_subborrowx_u64(x31, 0x5812631a5cf5d3ed, 0x0) + var x41 uint64 + var x42 fiat_sc255_uint1 + x41, x42 = fiat_sc255_subborrowx_u64(x33, 0x14def9dea2f79cd6, x40) + var x43 uint64 + var x44 fiat_sc255_uint1 + x43, x44 = fiat_sc255_subborrowx_u64(x35, uint64(0x0), x42) + var x45 uint64 + var x46 fiat_sc255_uint1 + x45, x46 = fiat_sc255_subborrowx_u64(x37, 0x1000000000000000, x44) + var x48 fiat_sc255_uint1 + _, x48 = fiat_sc255_subborrowx_u64(uint64(x38), uint64(0x0), x46) + var x49 uint64 = (arg4[3]) + var x50 uint64 = (arg4[2]) + var x51 uint64 = (arg4[1]) + var x52 uint64 = (arg4[0]) + var x53 uint64 + var x54 fiat_sc255_uint1 + x53, x54 = fiat_sc255_subborrowx_u64(uint64(0x0), x52, 0x0) + var x55 uint64 + var x56 fiat_sc255_uint1 + x55, x56 = fiat_sc255_subborrowx_u64(uint64(0x0), x51, x54) + var x57 uint64 + var x58 fiat_sc255_uint1 + x57, x58 = fiat_sc255_subborrowx_u64(uint64(0x0), x50, x56) + var x59 uint64 + var x60 fiat_sc255_uint1 + x59, x60 = fiat_sc255_subborrowx_u64(uint64(0x0), x49, x58) + var x61 uint64 + fiat_sc255_cmovznz_u64(&x61, x60, uint64(0x0), 0xffffffffffffffff) + var x62 uint64 + var x63 fiat_sc255_uint1 + x62, x63 = fiat_sc255_addcarryx_u64(x53, (x61 & 0x5812631a5cf5d3ed), 0x0) + var x64 uint64 + var x65 fiat_sc255_uint1 + x64, x65 = fiat_sc255_addcarryx_u64(x55, (x61 & 0x14def9dea2f79cd6), x63) + var x66 uint64 + var x67 fiat_sc255_uint1 + x66, x67 = fiat_sc255_addcarryx_u64(x57, uint64(0x0), x65) + var x68 uint64 + x68, _ = fiat_sc255_addcarryx_u64(x59, (x61 & 0x1000000000000000), x67) + var x70 uint64 + fiat_sc255_cmovznz_u64(&x70, x3, (arg5[0]), x62) + var x71 uint64 + fiat_sc255_cmovznz_u64(&x71, x3, (arg5[1]), x64) + var x72 uint64 + fiat_sc255_cmovznz_u64(&x72, x3, (arg5[2]), x66) + var x73 uint64 + fiat_sc255_cmovznz_u64(&x73, x3, (arg5[3]), x68) + var x74 fiat_sc255_uint1 = (fiat_sc255_uint1(x22) & 0x1) + var x75 uint64 + fiat_sc255_cmovznz_u64(&x75, x74, uint64(0x0), x7) + var x76 uint64 + fiat_sc255_cmovznz_u64(&x76, x74, uint64(0x0), x8) + var x77 uint64 + fiat_sc255_cmovznz_u64(&x77, x74, uint64(0x0), x9) + var x78 uint64 + fiat_sc255_cmovznz_u64(&x78, x74, uint64(0x0), x10) + var x79 uint64 + fiat_sc255_cmovznz_u64(&x79, x74, uint64(0x0), x11) + var x80 uint64 + var x81 fiat_sc255_uint1 + x80, x81 = fiat_sc255_addcarryx_u64(x22, x75, 0x0) + var x82 uint64 + var x83 fiat_sc255_uint1 + x82, x83 = fiat_sc255_addcarryx_u64(x23, x76, x81) + var x84 uint64 + var x85 fiat_sc255_uint1 + x84, x85 = fiat_sc255_addcarryx_u64(x24, x77, x83) + var x86 uint64 + var x87 fiat_sc255_uint1 + x86, x87 = fiat_sc255_addcarryx_u64(x25, x78, x85) + var x88 uint64 + x88, _ = fiat_sc255_addcarryx_u64(x26, x79, x87) + var x90 uint64 + fiat_sc255_cmovznz_u64(&x90, x74, uint64(0x0), x27) + var x91 uint64 + fiat_sc255_cmovznz_u64(&x91, x74, uint64(0x0), x28) + var x92 uint64 + fiat_sc255_cmovznz_u64(&x92, x74, uint64(0x0), x29) + var x93 uint64 + fiat_sc255_cmovznz_u64(&x93, x74, uint64(0x0), x30) + var x94 uint64 + var x95 fiat_sc255_uint1 + x94, x95 = fiat_sc255_addcarryx_u64(x70, x90, 0x0) + var x96 uint64 + var x97 fiat_sc255_uint1 + x96, x97 = fiat_sc255_addcarryx_u64(x71, x91, x95) + var x98 uint64 + var x99 fiat_sc255_uint1 + x98, x99 = fiat_sc255_addcarryx_u64(x72, x92, x97) + var x100 uint64 + var x101 fiat_sc255_uint1 + x100, x101 = fiat_sc255_addcarryx_u64(x73, x93, x99) + var x102 uint64 + var x103 fiat_sc255_uint1 + x102, x103 = fiat_sc255_subborrowx_u64(x94, 0x5812631a5cf5d3ed, 0x0) + var x104 uint64 + var x105 fiat_sc255_uint1 + x104, x105 = fiat_sc255_subborrowx_u64(x96, 0x14def9dea2f79cd6, x103) + var x106 uint64 + var x107 fiat_sc255_uint1 + x106, x107 = fiat_sc255_subborrowx_u64(x98, uint64(0x0), x105) + var x108 uint64 + var x109 fiat_sc255_uint1 + x108, x109 = fiat_sc255_subborrowx_u64(x100, 0x1000000000000000, x107) + var x111 fiat_sc255_uint1 + _, x111 = fiat_sc255_subborrowx_u64(uint64(x101), uint64(0x0), x109) + var x112 uint64 + x112, _ = fiat_sc255_addcarryx_u64(x6, uint64(0x1), 0x0) + var x114 uint64 = ((x80 >> 1) | ((x82 << 63) & 0xffffffffffffffff)) + var x115 uint64 = ((x82 >> 1) | ((x84 << 63) & 0xffffffffffffffff)) + var x116 uint64 = ((x84 >> 1) | ((x86 << 63) & 0xffffffffffffffff)) + var x117 uint64 = ((x86 >> 1) | ((x88 << 63) & 0xffffffffffffffff)) + var x118 uint64 = ((x88 & 0x8000000000000000) | (x88 >> 1)) + var x119 uint64 + fiat_sc255_cmovznz_u64(&x119, x48, x39, x31) + var x120 uint64 + fiat_sc255_cmovznz_u64(&x120, x48, x41, x33) + var x121 uint64 + fiat_sc255_cmovznz_u64(&x121, x48, x43, x35) + var x122 uint64 + fiat_sc255_cmovznz_u64(&x122, x48, x45, x37) + var x123 uint64 + fiat_sc255_cmovznz_u64(&x123, x111, x102, x94) + var x124 uint64 + fiat_sc255_cmovznz_u64(&x124, x111, x104, x96) + var x125 uint64 + fiat_sc255_cmovznz_u64(&x125, x111, x106, x98) + var x126 uint64 + fiat_sc255_cmovznz_u64(&x126, x111, x108, x100) + *out1 = x112 + out2[0] = x7 + out2[1] = x8 + out2[2] = x9 + out2[3] = x10 + out2[4] = x11 + out3[0] = x114 + out3[1] = x115 + out3[2] = x116 + out3[3] = x117 + out3[4] = x118 + out4[0] = x119 + out4[1] = x120 + out4[2] = x121 + out4[3] = x122 + out5[0] = x123 + out5[1] = x124 + out5[2] = x125 + out5[3] = x126 +} diff --git a/scalar.go b/scalar.go index 4530bc3..062e874 100644 --- a/scalar.go +++ b/scalar.go @@ -21,17 +21,18 @@ import ( // // The zero value is a valid zero element. type Scalar struct { - // s is the Scalar value in little-endian. The value is always reduced - // modulo l between operations. - s [32]byte + // A Scalar is an integer modulo l = 2^252 + 27742317777372353535851937790883648493. + // Internally, this implementation keeps the scalar in the Montgomery domain. + s [4]uint64 } var ( - scZero = Scalar{[32]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}} + scZeroBytes = [32]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + scOneBytes = [32]byte{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + scMinusOneBytes = [32]byte{236, 211, 245, 92, 26, 99, 18, 88, 214, 156, 247, 162, 222, 249, 222, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16} - scOne = Scalar{[32]byte{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}} - - scMinusOne = Scalar{[32]byte{236, 211, 245, 92, 26, 99, 18, 88, 214, 156, 247, 162, 222, 249, 222, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16}} + scOne = Scalar{[4]uint64{0xd6ec31748d98951d, 0xc6ef5bf4737dcf70, 0xfffffffffffffffe, 0xfffffffffffffff}} + scMinusOne = Scalar{[4]uint64{0x812631a5cf5d3ed0, 0x4def9dea2f79cd65, 1, 0}} ) // NewScalar returns a new zero Scalar. @@ -39,37 +40,53 @@ func NewScalar() *Scalar { return &Scalar{} } -// MultiplyAdd sets s = x * y + z mod l, and returns s. +// MultiplyAdd sets s = x * y + z mod l, and returns s. DEPRECATED. Use the individual Add and Multiply methods instead. func (s *Scalar) MultiplyAdd(x, y, z *Scalar) *Scalar { - scMulAdd(&s.s, &x.s, &y.s, &z.s) + var tempFieldElement [4]uint64 + var xRepr, yRepr, zRepr, tempResult [32]byte + + fiat_sc255_from_montgomery(&tempFieldElement, (*[4]uint64)(&x.s)) + fiat_sc255_to_bytes(&xRepr, &tempFieldElement) + + fiat_sc255_from_montgomery(&tempFieldElement, (*[4]uint64)(&y.s)) + fiat_sc255_to_bytes(&yRepr, &tempFieldElement) + + fiat_sc255_from_montgomery(&tempFieldElement, (*[4]uint64)(&z.s)) + fiat_sc255_to_bytes(&zRepr, &tempFieldElement) + + scMulAdd(&tempResult, &xRepr, &yRepr, &zRepr) + + fiat_sc255_from_bytes(&tempFieldElement, &tempResult) + fiat_sc255_to_montgomery((*[4]uint64)(&s.s), &tempFieldElement) + return s } // Add sets s = x + y mod l, and returns s. func (s *Scalar) Add(x, y *Scalar) *Scalar { // s = 1 * x + y mod l - scMulAdd(&s.s, &scOne.s, &x.s, &y.s) + fiat_sc255_add((*[4]uint64)(&s.s), (*[4]uint64)(&x.s), (*[4]uint64)(&y.s)) return s } // Subtract sets s = x - y mod l, and returns s. func (s *Scalar) Subtract(x, y *Scalar) *Scalar { // s = -1 * y + x mod l - scMulAdd(&s.s, &scMinusOne.s, &y.s, &x.s) + fiat_sc255_sub((*[4]uint64)(&s.s), (*[4]uint64)(&x.s), (*[4]uint64)(&y.s)) return s } // Negate sets s = -x mod l, and returns s. func (s *Scalar) Negate(x *Scalar) *Scalar { // s = -1 * x + 0 mod l - scMulAdd(&s.s, &scMinusOne.s, &x.s, &scZero.s) + fiat_sc255_opp((*[4]uint64)(&s.s), (*[4]uint64)(&x.s)) return s } // Multiply sets s = x * y mod l, and returns s. func (s *Scalar) Multiply(x, y *Scalar) *Scalar { // s = x * y + 0 mod l - scMulAdd(&s.s, &x.s, &y.s, &scZero.s) + fiat_sc255_mul((*[4]uint64)(&s.s), (*[4]uint64)(&x.s), (*[4]uint64)(&y.s)) return s } @@ -91,7 +108,14 @@ func (s *Scalar) SetUniformBytes(x []byte) (*Scalar, error) { } var wideBytes [64]byte copy(wideBytes[:], x[:]) - scReduce(&s.s, &wideBytes) + + // TODO: We should deprecate scReduce as well, but we retain it here for consistent behavior + var reduced [32]byte + scReduce(&reduced, &wideBytes) + + fiat_sc255_from_bytes((*[4]uint64)(&s.s), &reduced) + fiat_sc255_to_montgomery((*[4]uint64)(&s.s), (*[4]uint64)(&s.s)) + return s, nil } @@ -102,22 +126,31 @@ func (s *Scalar) SetCanonicalBytes(x []byte) (*Scalar, error) { if len(x) != 32 { return nil, errors.New("invalid scalar length") } - ss := &Scalar{} - copy(ss.s[:], x) - if !isReduced(ss) { + + // Use bytes here because the original logic assumed the old 32-byte LE representation + ss := [32]byte{} + copy(ss[:], x) + if !isReduced(ss[:]) { return nil, errors.New("invalid scalar encoding") } - s.s = ss.s + + fiat_sc255_from_bytes((*[4]uint64)(&s.s), &ss) + fiat_sc255_to_montgomery((*[4]uint64)(&s.s), (*[4]uint64)(&s.s)) + return s, nil } -// isReduced returns whether the given scalar is reduced modulo l. -func isReduced(s *Scalar) bool { - for i := len(s.s) - 1; i >= 0; i-- { +// isReduced returns whether the given scalar in 32-byte little endian encoded form is reduced modulo l. +func isReduced(s []byte) bool { + if len(s) != 32 { + return false + } + + for i := len(s) - 1; i >= 0; i-- { switch { - case s.s[i] > scMinusOne.s[i]: + case s[i] > scMinusOneBytes[i]: return false - case s.s[i] < scMinusOne.s[i]: + case s[i] < scMinusOneBytes[i]: return true } } @@ -148,20 +181,34 @@ func (s *Scalar) SetBytesWithClamping(x []byte) (*Scalar, error) { wideBytes[0] &= 248 wideBytes[31] &= 63 wideBytes[31] |= 64 - scReduce(&s.s, &wideBytes) + var reduced [32]byte + scReduce(&reduced, &wideBytes) + fiat_sc255_from_bytes((*[4]uint64)(&s.s), &reduced) + fiat_sc255_to_montgomery((*[4]uint64)(&s.s), (*[4]uint64)(&s.s)) return s, nil } // Bytes returns the canonical 32-byte little-endian encoding of s. func (s *Scalar) Bytes() []byte { - buf := make([]byte, 32) - copy(buf, s.s[:]) - return buf + // This pattern, called "outlining", allows this function to inline so the + // allocations can occur on the caller stack rather than escaping to the heap. + // See https://blog.filippo.io/efficient-go-apis-with-the-inliner for more details. + var encoded [32]byte + return s.bytes(&encoded) +} + +func (s *Scalar) bytes(out *[32]byte) []byte { + var limbs [4]uint64 + fiat_sc255_from_montgomery(&limbs, (*[4]uint64)(&s.s)) + fiat_sc255_to_bytes(out, &limbs) + return out[:] } // Equal returns 1 if s and t are equal, and 0 otherwise. func (s *Scalar) Equal(t *Scalar) int { - return subtle.ConstantTimeCompare(s.s[:], t.s[:]) + st := t.Bytes() + ss := s.Bytes() + return subtle.ConstantTimeCompare(ss[:], st[:]) } // scMulAdd and scReduce are ported from the public domain, “ref10” @@ -950,7 +997,8 @@ func (s *Scalar) nonAdjacentForm(w uint) [256]int8 { // This implementation is adapted from the one // in curve25519-dalek and is documented there: // https://github.com/dalek-cryptography/curve25519-dalek/blob/f630041af28e9a405255f98a8a93adca18e4315b/src/scalar.rs#L800-L871 - if s.s[31] > 127 { + b := s.Bytes() + if b[31] > 127 { panic("scalar has high bit set illegally") } if w < 2 { @@ -963,7 +1011,7 @@ func (s *Scalar) nonAdjacentForm(w uint) [256]int8 { var digits [5]uint64 for i := 0; i < 4; i++ { - digits[i] = binary.LittleEndian.Uint64(s.s[i*8:]) + digits[i] = binary.LittleEndian.Uint64(b[i*8:]) } width := uint64(1 << w) @@ -1011,7 +1059,8 @@ func (s *Scalar) nonAdjacentForm(w uint) [256]int8 { } func (s *Scalar) signedRadix16() [64]int8 { - if s.s[31] > 127 { + b := s.Bytes() + if b[31] > 127 { panic("scalar has high bit set illegally") } @@ -1019,8 +1068,8 @@ func (s *Scalar) signedRadix16() [64]int8 { // Compute unsigned radix-16 digits: for i := 0; i < 32; i++ { - digits[2*i] = int8(s.s[i] & 15) - digits[2*i+1] = int8((s.s[i] >> 4) & 15) + digits[2*i] = int8(b[i] & 15) + digits[2*i+1] = int8((b[i] >> 4) & 15) } // Recenter coefficients: diff --git a/scalar_alias_test.go b/scalar_alias_test.go index 18d800d..4766202 100644 --- a/scalar_alias_test.go +++ b/scalar_alias_test.go @@ -14,12 +14,12 @@ func TestScalarAliasing(t *testing.T) { x1, v1 := x, x // Calculate a reference f(x) without aliasing. - if out := f(&v, &x); out != &v || !isReduced(out) { + if out := f(&v, &x); out != &v || !isReduced(out.Bytes()) { return false } // Test aliasing the argument and the receiver. - if out := f(&v1, &v1); out != &v1 || v1 != v || !isReduced(out) { + if out := f(&v1, &v1); out != &v1 || v1 != v || !isReduced(out.Bytes()) { return false } @@ -31,39 +31,39 @@ func TestScalarAliasing(t *testing.T) { x1, y1, v1 := x, y, Scalar{} // Calculate a reference f(x, y) without aliasing. - if out := f(&v, &x, &y); out != &v || !isReduced(out) { + if out := f(&v, &x, &y); out != &v || !isReduced(out.Bytes()) { return false } // Test aliasing the first argument and the receiver. v1 = x - if out := f(&v1, &v1, &y); out != &v1 || v1 != v || !isReduced(out) { + if out := f(&v1, &v1, &y); out != &v1 || v1 != v || !isReduced(out.Bytes()) { return false } // Test aliasing the second argument and the receiver. v1 = y - if out := f(&v1, &x, &v1); out != &v1 || v1 != v || !isReduced(out) { + if out := f(&v1, &x, &v1); out != &v1 || v1 != v || !isReduced(out.Bytes()) { return false } // Calculate a reference f(x, x) without aliasing. - if out := f(&v, &x, &x); out != &v || !isReduced(out) { + if out := f(&v, &x, &x); out != &v || !isReduced(out.Bytes()) { return false } // Test aliasing the first argument and the receiver. v1 = x - if out := f(&v1, &v1, &x); out != &v1 || v1 != v || !isReduced(out) { + if out := f(&v1, &v1, &x); out != &v1 || v1 != v || !isReduced(out.Bytes()) { return false } // Test aliasing the second argument and the receiver. v1 = x - if out := f(&v1, &x, &v1); out != &v1 || v1 != v || !isReduced(out) { + if out := f(&v1, &x, &v1); out != &v1 || v1 != v || !isReduced(out.Bytes()) { return false } // Test aliasing both arguments and the receiver. v1 = x - if out := f(&v1, &v1, &v1); out != &v1 || v1 != v || !isReduced(out) { + if out := f(&v1, &v1, &v1); out != &v1 || v1 != v || !isReduced(out.Bytes()) { return false } diff --git a/scalar_test.go b/scalar_test.go index 9d51b34..52f523d 100644 --- a/scalar_test.go +++ b/scalar_test.go @@ -17,31 +17,36 @@ import ( // Generate returns a valid (reduced modulo l) Scalar with a distribution // weighted towards high, low, and edge values. func (Scalar) Generate(rand *mathrand.Rand, size int) reflect.Value { - s := scZero + s := scZeroBytes diceRoll := rand.Intn(100) switch { case diceRoll == 0: case diceRoll == 1: - s = scOne + s = scOneBytes case diceRoll == 2: - s = scMinusOne + s = scMinusOneBytes case diceRoll < 5: // Generate a low scalar in [0, 2^125). - rand.Read(s.s[:16]) - s.s[15] &= (1 << 5) - 1 + rand.Read(s[:16]) + s[15] &= (1 << 5) - 1 case diceRoll < 10: // Generate a high scalar in [2^252, 2^252 + 2^124). - s.s[31] = 1 << 4 - rand.Read(s.s[:16]) - s.s[15] &= (1 << 4) - 1 + s[31] = 1 << 4 + rand.Read(s[:16]) + s[15] &= (1 << 4) - 1 default: // Generate a valid scalar in [0, l) by returning [0, 2^252) which has a // negligibly different distribution (the former has a 2^-127.6 chance // of being out of the latter range). - rand.Read(s.s[:]) - s.s[31] &= (1 << 4) - 1 + rand.Read(s[:]) + s[31] &= (1 << 4) - 1 } - return reflect.ValueOf(s) + + val := Scalar{} + fiat_sc255_from_bytes(&val.s, &s) + fiat_sc255_to_montgomery(&val.s, &val.s) + + return reflect.ValueOf(val) } // quickCheckConfig1024 will make each quickcheck test run (1024 * -quickchecks) @@ -50,7 +55,7 @@ var quickCheckConfig1024 = &quick.Config{MaxCountScale: 1 << 10} func TestScalarGenerate(t *testing.T) { f := func(sc Scalar) bool { - return isReduced(&sc) + return isReduced(sc.Bytes()) } if err := quick.Check(f, quickCheckConfig1024); err != nil { t.Errorf("generated unreduced scalar: %v", err) @@ -64,7 +69,8 @@ func TestScalarSetCanonicalBytes(t *testing.T) { if _, err := sc.SetCanonicalBytes(in[:]); err != nil { return false } - return bytes.Equal(in[:], sc.Bytes()) && isReduced(&sc) + repr := sc.Bytes() + return bytes.Equal(in[:], repr) && isReduced(repr) } if err := quick.Check(f1, quickCheckConfig1024); err != nil { t.Errorf("failed bytes->scalar->bytes round-trip: %v", err) @@ -80,7 +86,7 @@ func TestScalarSetCanonicalBytes(t *testing.T) { t.Errorf("failed scalar->bytes->scalar round-trip: %v", err) } - b := scMinusOne.s + b := scMinusOneBytes b[31] += 1 s := scOne if out, err := s.SetCanonicalBytes(b[:]); err == nil { @@ -97,10 +103,11 @@ func TestScalarSetUniformBytes(t *testing.T) { mod.Add(mod, new(big.Int).Lsh(big.NewInt(1), 252)) f := func(in [64]byte, sc Scalar) bool { sc.SetUniformBytes(in[:]) - if !isReduced(&sc) { + repr := sc.Bytes() + if !isReduced(repr) { return false } - scBig := bigIntFromLittleEndianBytes(sc.s[:]) + scBig := bigIntFromLittleEndianBytes(repr[:]) inBig := bigIntFromLittleEndianBytes(in[:]) return inBig.Mod(inBig, mod).Cmp(scBig) == 0 } @@ -159,7 +166,9 @@ func TestScalarMultiplyDistributesOverAdd(t *testing.T) { t3.Multiply(&y, &z) t2.Add(&t2, &t3) - return t1 == t2 && isReduced(&t1) && isReduced(&t3) + reprT1, reprT2 := t1.Bytes(), t2.Bytes() + + return t1 == t2 && isReduced(reprT1) && isReduced(reprT2) } if err := quick.Check(multiplyDistributesOverAdd, quickCheckConfig1024); err != nil { @@ -178,7 +187,7 @@ func TestScalarAddLikeSubNeg(t *testing.T) { t2.Negate(&y) t2.Add(&t2, &x) - return t1 == t2 && isReduced(&t1) + return t1 == t2 && isReduced(t1.Bytes()) } if err := quick.Check(addLikeSubNeg, quickCheckConfig1024); err != nil { @@ -187,12 +196,13 @@ func TestScalarAddLikeSubNeg(t *testing.T) { } func TestScalarNonAdjacentForm(t *testing.T) { - s := Scalar{[32]byte{ + s, _ := (&Scalar{}).SetCanonicalBytes([]byte{ 0x1a, 0x0e, 0x97, 0x8a, 0x90, 0xf6, 0x62, 0x2d, 0x37, 0x47, 0x02, 0x3f, 0x8a, 0xd8, 0x26, 0x4d, 0xa7, 0x58, 0xaa, 0x1b, 0x88, 0xe0, 0x40, 0xd1, 0x58, 0x9e, 0x7b, 0x7f, 0x23, 0x76, 0xef, 0x09, - }} + }) + expectedNaf := [256]int8{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, -9, 0, 0, 0, 0, -11, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 9, 0, 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 11, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, @@ -217,7 +227,8 @@ type notZeroScalar Scalar func (notZeroScalar) Generate(rand *mathrand.Rand, size int) reflect.Value { var s Scalar - for s == scZero { + var isNonZero uint64 + for fiat_sc255_nonzero(&isNonZero, (*[4]uint64)(&s.s)); isNonZero == 0; { s = Scalar{}.Generate(rand, size).Interface().(Scalar) } return reflect.ValueOf(notZeroScalar(s)) diff --git a/scalarmult_test.go b/scalarmult_test.go index 1760603..6c92ab3 100644 --- a/scalarmult_test.go +++ b/scalarmult_test.go @@ -15,7 +15,7 @@ var ( quickCheckConfig32 = &quick.Config{MaxCountScale: 1 << 5} // a random scalar generated using dalek. - dalekScalar = Scalar{[32]byte{219, 106, 114, 9, 174, 249, 155, 89, 69, 203, 201, 93, 92, 116, 234, 187, 78, 115, 103, 172, 182, 98, 62, 103, 187, 136, 13, 100, 248, 110, 12, 4}} + dalekScalar, _ = (&Scalar{}).SetCanonicalBytes([]byte{219, 106, 114, 9, 174, 249, 155, 89, 69, 203, 201, 93, 92, 116, 234, 187, 78, 115, 103, 172, 182, 98, 62, 103, 187, 136, 13, 100, 248, 110, 12, 4}) // the above, times the edwards25519 basepoint. dalekScalarBasepoint, _ = new(Point).SetBytes([]byte{0xf4, 0xef, 0x7c, 0xa, 0x34, 0x55, 0x7b, 0x9f, 0x72, 0x3b, 0xb6, 0x1e, 0xf9, 0x46, 0x9, 0x91, 0x1c, 0xb9, 0xc0, 0x6c, 0x17, 0x28, 0x2d, 0x8b, 0x43, 0x2b, 0x5, 0x18, 0x6a, 0x54, 0x3e, 0x48}) ) @@ -29,8 +29,8 @@ func TestScalarMultSmallScalars(t *testing.T) { } checkOnCurve(t, &p) - z = Scalar{[32]byte{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}} - p.ScalarMult(&z, B) + scEight, _ := (&Scalar{}).SetCanonicalBytes([]byte{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}) + p.ScalarMult(scEight, B) if B.Equal(&p) != 1 { t.Error("1*B != 1") } @@ -39,7 +39,7 @@ func TestScalarMultSmallScalars(t *testing.T) { func TestScalarMultVsDalek(t *testing.T) { var p Point - p.ScalarMult(&dalekScalar, B) + p.ScalarMult(dalekScalar, B) if dalekScalarBasepoint.Equal(&p) != 1 { t.Error("Scalar mul does not match dalek") } @@ -48,7 +48,7 @@ func TestScalarMultVsDalek(t *testing.T) { func TestBaseMultVsDalek(t *testing.T) { var p Point - p.ScalarBaseMult(&dalekScalar) + p.ScalarBaseMult(dalekScalar) if dalekScalarBasepoint.Equal(&p) != 1 { t.Error("Scalar mul does not match dalek") } @@ -58,12 +58,12 @@ func TestBaseMultVsDalek(t *testing.T) { func TestVarTimeDoubleBaseMultVsDalek(t *testing.T) { var p Point var z Scalar - p.VarTimeDoubleScalarBaseMult(&dalekScalar, B, &z) + p.VarTimeDoubleScalarBaseMult(dalekScalar, B, &z) if dalekScalarBasepoint.Equal(&p) != 1 { t.Error("VarTimeDoubleScalarBaseMult fails with b=0") } checkOnCurve(t, &p) - p.VarTimeDoubleScalarBaseMult(&z, B, &dalekScalar) + p.VarTimeDoubleScalarBaseMult(&z, B, dalekScalar) if dalekScalarBasepoint.Equal(&p) != 1 { t.Error("VarTimeDoubleScalarBaseMult fails with a=0") } @@ -188,7 +188,7 @@ func BenchmarkScalarBaseMult(b *testing.B) { var p Point for i := 0; i < b.N; i++ { - p.ScalarBaseMult(&dalekScalar) + p.ScalarBaseMult(dalekScalar) } } @@ -196,7 +196,7 @@ func BenchmarkScalarMult(b *testing.B) { var p Point for i := 0; i < b.N; i++ { - p.ScalarMult(&dalekScalar, B) + p.ScalarMult(dalekScalar, B) } } @@ -204,6 +204,6 @@ func BenchmarkVarTimeDoubleScalarBaseMult(b *testing.B) { var p Point for i := 0; i < b.N; i++ { - p.VarTimeDoubleScalarBaseMult(&dalekScalar, B, &dalekScalar) + p.VarTimeDoubleScalarBaseMult(dalekScalar, B, dalekScalar) } }