From bb0ea50de861cc22aeada7e3a1cb43bc30ef381c Mon Sep 17 00:00:00 2001
From: Gregory Maxwell <greg@xiph.org>
Date: Wed, 22 Apr 2015 00:20:54 +0000
Subject: [PATCH] Replace set/add with cmov in secp256k1_gej_add_ge.

Use a conditional move of the same kind we use for the affine points
 in the storage  type instead of multiplying  with the infinity flag
 and adding.  This results in fewer constructions to worry about for
 sidechannel behavior.

It also might be faster: It doesn't appear to benchmark as slower for
 me at least; but I think  the CMOV is faster than the mul_int + add,
 but slower than the set+add;  making it a wash.
---
 src/field.h            |  3 +++
 src/field_10x26_impl.h | 20 ++++++++++++++++++++
 src/field_5x52_impl.h  | 15 +++++++++++++++
 src/group_impl.h       | 14 ++++++--------
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/src/field.h b/src/field.h
index 9e6d7d3c04..41b280892d 100644
--- a/src/field.h
+++ b/src/field.h
@@ -113,4 +113,7 @@ static void secp256k1_fe_from_storage(secp256k1_fe_t *r, const secp256k1_fe_stor
 /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
 static void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag);
 
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. */
+static void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag);
+
 #endif
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index b32a666f53..871b91f912 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -1068,6 +1068,26 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }
 
+static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
+    uint32_t mask0, mask1;
+    mask0 = flag + ~((uint32_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+    r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
+    r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
+    r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
+    r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
+    r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
+#ifdef VERIFY
+    r->magnitude = (r->magnitude & mask0) | (a->magnitude & mask1);
+    r->normalized = (r->normalized & mask0) | (a->normalized & mask1);
+#endif
+}
+
 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
     uint32_t mask0, mask1;
     mask0 = flag + ~((uint32_t)0);
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 874d3caaba..bda4c3dfc2 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -404,6 +404,21 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
 #endif
 }
 
+static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe_t *r, const secp256k1_fe_t *a, int flag) {
+    uint64_t mask0, mask1;
+    mask0 = flag + ~((uint64_t)0);
+    mask1 = ~mask0;
+    r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
+    r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
+    r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
+    r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
+    r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
+#ifdef VERIFY
+    r->magnitude = (r->magnitude & mask0) | (a->magnitude & mask1);
+    r->normalized = (r->normalized & mask0) | (a->normalized & mask1);
+#endif
+}
+
 static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage_t *r, const secp256k1_fe_storage_t *a, int flag) {
     uint64_t mask0, mask1;
     mask0 = flag + ~((uint64_t)0);
diff --git a/src/group_impl.h b/src/group_impl.h
index 0d1c7b02ff..131393ff7b 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -325,7 +325,8 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
 }
 
 static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
-    /* Operations: 7 mul, 5 sqr, 5 normalize, 19 mul_int/add/negate */
+    /* Operations: 7 mul, 5 sqr, 5 normalize, 17 mul_int/add/negate/cmov */
+    static const secp256k1_fe_t fe_1 = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
     secp256k1_fe_t zz, u1, u2, s1, s2, z, t, m, n, q, rr;
     int infinity;
     VERIFY_CHECK(!b->infinity);
@@ -387,14 +388,11 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
     secp256k1_fe_mul_int(&r->y, 4 * (1 - a->infinity)); /* r->y = Y3 = 4*R*(3*Q-2*R^2)-4*M^4 (4) */
 
     /** In case a->infinity == 1, the above code results in r->x, r->y, and r->z all equal to 0.
-     *  Add b->x to x, b->y to y, and 1 to z in that case.
+     *  Replace r with b->x, b->y, 1 in that case.
      */
-    t = b->x; secp256k1_fe_mul_int(&t, a->infinity);
-    secp256k1_fe_add(&r->x, &t);
-    t = b->y; secp256k1_fe_mul_int(&t, a->infinity);
-    secp256k1_fe_add(&r->y, &t);
-    secp256k1_fe_set_int(&t, a->infinity);
-    secp256k1_fe_add(&r->z, &t);
+    secp256k1_fe_cmov(&r->x, &b->x, a->infinity);
+    secp256k1_fe_cmov(&r->y, &b->y, a->infinity);
+    secp256k1_fe_cmov(&r->z, &fe_1, a->infinity);
     r->infinity = infinity;
 }