edwards25519/fe_generic.go
Filippo Valsorda d3569cbbb3 all: flatten the package and make FieldElement opaque
For the license changes, see gtank/ristretto255-private#28 and
gtank/ristretto255#32, that contribute all code in those repositories to
the Go project under the Google CLA.
2020-09-28 14:18:44 +02:00

180 lines
5.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright (c) 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package edwards25519
func feMulGeneric(v, x, y *FieldElement) {
x0 := x.l0
x1 := x.l1
x2 := x.l2
x3 := x.l3
x4 := x.l4
y0 := y.l0
y1 := y.l1
y2 := y.l2
y3 := y.l3
y4 := y.l4
// Reduction can be carried out simultaneously to multiplication. For
// example, we do not compute a coefficient r_5 . Whenever the result of a
// mul instruction belongs to r_5 , for example in the multiplication of
// x_3*y_2 , we multiply one of the inputs by 19 and add the result to r_0.
x1_19 := x1 * 19
x2_19 := x2 * 19
x3_19 := x3 * 19
x4_19 := x4 * 19
// calculate r0 = x0*y0 + 19*(x1*y4 + x2*y3 + x3*y2 + x4*y1)
r00, r01 := madd64(0, 0, x0, y0)
r00, r01 = madd64(r00, r01, x1_19, y4)
r00, r01 = madd64(r00, r01, x2_19, y3)
r00, r01 = madd64(r00, r01, x3_19, y2)
r00, r01 = madd64(r00, r01, x4_19, y1)
// calculate r1 = x0*y1 + x1*y0 + 19*(x2*y4 + x3*y3 + x4*y2)
r10, r11 := madd64(0, 0, x0, y1)
r10, r11 = madd64(r10, r11, x1, y0)
r10, r11 = madd64(r10, r11, x2_19, y4)
r10, r11 = madd64(r10, r11, x3_19, y3)
r10, r11 = madd64(r10, r11, x4_19, y2)
// calculate r2 = x0*y2 + x1*y1 + x2*y0 + 19*(x3*y4 + x4*y3)
r20, r21 := madd64(0, 0, x0, y2)
r20, r21 = madd64(r20, r21, x1, y1)
r20, r21 = madd64(r20, r21, x2, y0)
r20, r21 = madd64(r20, r21, x3_19, y4)
r20, r21 = madd64(r20, r21, x4_19, y3)
// calculate r3 = x0*y3 + x1*y2 + x2*y1 + x3*y0 + 19*x4*y4
r30, r31 := madd64(0, 0, x0, y3)
r30, r31 = madd64(r30, r31, x1, y2)
r30, r31 = madd64(r30, r31, x2, y1)
r30, r31 = madd64(r30, r31, x3, y0)
r30, r31 = madd64(r30, r31, x4_19, y4)
// calculate r4 = x0*y4 + x1*y3 + x2*y2 + x3*y1 + x4*y0
r40, r41 := madd64(0, 0, x0, y4)
r40, r41 = madd64(r40, r41, x1, y3)
r40, r41 = madd64(r40, r41, x2, y2)
r40, r41 = madd64(r40, r41, x3, y1)
r40, r41 = madd64(r40, r41, x4, y0)
// After the multiplication we need to reduce (carry) the 5 coefficients to
// obtain a result with coefficients that are at most slightly larger than
// 2^51 . Denote the two registers holding coefficient r_0 as r_00 and r_01
// with r_0 = 2^64*r_01 + r_00 . Similarly denote the two registers holding
// coefficient r_1 as r_10 and r_11 . We first shift r_01 left by 13, while
// shifting in the most significant bits of r_00 (shld instruction) and
// then compute the logical and of r_00 with 2^51 1. We do the same with
// r_10 and r_11 and add r_01 into r_10 after the logical and with 2^51
// 1. We proceed this way for coefficients r_2,...,r_4; register r_41 is
// multiplied by 19 before adding it to r_00 .
r01 = (r01 << 13) | (r00 >> 51)
r00 &= maskLow51Bits
r11 = (r11 << 13) | (r10 >> 51)
r10 &= maskLow51Bits
r10 += r01
r21 = (r21 << 13) | (r20 >> 51)
r20 &= maskLow51Bits
r20 += r11
r31 = (r31 << 13) | (r30 >> 51)
r30 &= maskLow51Bits
r30 += r21
r41 = (r41 << 13) | (r40 >> 51)
r40 &= maskLow51Bits
r40 += r31
r41 *= 19
r00 += r41
// Now all 5 coefficients fit into 64-bit registers but are still too large
// to be used as input to another multiplication. We therefore carry from
// r_0 to r_1 , from r_1 to r_2 , from r_2 to r_3 , from r_3 to r_4 , and
// finally from r_4 to r_0 . Each of these carries is done as one copy, one
// right shift by 51, one logical and with 2^51 1, and one addition.
*v = FieldElement{r00, r10, r20, r30, r40}
v.carryPropagate1().carryPropagate2()
}
func feSquareGeneric(v, x *FieldElement) {
// Squaring needs only 15 mul instructions. Some inputs are multiplied by 2;
// this is combined with multiplication by 19 where possible. The coefficient
// reduction after squaring is the same as for multiplication.
x0 := x.l0
x1 := x.l1
x2 := x.l2
x3 := x.l3
x4 := x.l4
x0_2 := x0 << 1
x1_2 := x1 << 1
x1_38 := x1 * 38
x2_38 := x2 * 38
x3_38 := x3 * 38
x3_19 := x3 * 19
x4_19 := x4 * 19
// r0 = x0*x0 + x1*38*x4 + x2*38*x3
r00, r01 := madd64(0, 0, x0, x0)
r00, r01 = madd64(r00, r01, x1_38, x4)
r00, r01 = madd64(r00, r01, x2_38, x3)
// r1 = x0*2*x1 + x2*38*x4 + x3*19*x3
r10, r11 := madd64(0, 0, x0_2, x1)
r10, r11 = madd64(r10, r11, x2_38, x4)
r10, r11 = madd64(r10, r11, x3_19, x3)
// r2 = x0*2*x2 + x1*x1 + x3*38*x4
r20, r21 := madd64(0, 0, x0_2, x2)
r20, r21 = madd64(r20, r21, x1, x1)
r20, r21 = madd64(r20, r21, x3_38, x4)
// r3 = x0*2*x3 + x1*2*x2 + x4*19*x4
r30, r31 := madd64(0, 0, x0_2, x3)
r30, r31 = madd64(r30, r31, x1_2, x2)
r30, r31 = madd64(r30, r31, x4_19, x4)
// r4 = x0*2*x4 + x1*2*x3 + x2*x2
r40, r41 := madd64(0, 0, x0_2, x4)
r40, r41 = madd64(r40, r41, x1_2, x3)
r40, r41 = madd64(r40, r41, x2, x2)
// Same reduction
r01 = (r01 << 13) | (r00 >> 51)
r00 &= maskLow51Bits
r11 = (r11 << 13) | (r10 >> 51)
r10 &= maskLow51Bits
r10 += r01
r21 = (r21 << 13) | (r20 >> 51)
r20 &= maskLow51Bits
r20 += r11
r31 = (r31 << 13) | (r30 >> 51)
r30 &= maskLow51Bits
r30 += r21
r41 = (r41 << 13) | (r40 >> 51)
r40 &= maskLow51Bits
r40 += r31
r41 *= 19
r00 += r41
*v = FieldElement{r00, r10, r20, r30, r40}
v.carryPropagate1().carryPropagate2()
}