Remove 5x64 implementation - no actual benefits

2025-01-25 18:53:23 -03:00 · 2013-12-01 21:10:35 +01:00 · 2013-12-01 21:10:35 +01:00 · 60442b835f
commit 60442b835f
parent 399c03f227
9 changed files with 3 additions and 764 deletions
--- a/5
+++ b/5
@ -8,7 +8,7 @@ JAVA_FILES := src/java/org_bitcoin_NativeSecp256k1.h src/java/org_bitcoin_Native
 OBJS :=

 ifeq ($(USE_ASM), 1)
-    OBJS := $(OBJS) obj/field_5x$(HAVE_LIMB)_asm.o
+    OBJS := $(OBJS) obj/field_5x52_asm.o
 endif
 STD="gnu99"

@ -20,9 +20,6 @@ clean:
 obj/field_5x52_asm.o: src/field_5x52_asm.asm
 	$(YASM) -f elf64 -o obj/field_5x52_asm.o src/field_5x52_asm.asm

-obj/field_5x64_asm.o: src/field_5x64_asm.asm
-	$(YASM) -f elf64 -o obj/field_5x64_asm.o src/field_5x64_asm.asm
-
 obj/secp256k1.o: $(FILES) src/secp256k1.c include/secp256k1.h
 	$(CC) -fPIC -std=$(STD) $(CFLAGS) $(CFLAGS_EXTRA) -DNDEBUG -$(OPTLEVEL) src/secp256k1.c -c -o obj/secp256k1.o

--- a/14
+++ b/14
@ -1,14 +0,0 @@
-#!/bin/bash
-echo "Benchmark Results" >output.txt
-for j in yasm; do
-    echo "5x64 $j:" >>output.txt
-    for i in O0 O1 O2 O3; do
-        make clean
-	./configure --use-5x64 --$j
-	echo "OPTLEVEL=$i" >>config.mk
-	make bench
-	echo "OPTLEVEL=$i" >>output.txt
-	(time ./bench) |& grep real >>output.txt
-    done
-done
-    
--- a/11
+++ b/11
@ -97,9 +97,6 @@ if [ "$?" = 0 ]; then
    HAVE_INT128=1
 fi

-#default limb size
-HAVE_LIMB=52
-
 for arg in "$@"; do
    case "$arg" in
        --no-yasm)
@ -110,9 +107,6 @@ for arg in "$@"; do
            ;;
        --no-openssl)
            HAVE_OPENSSL=0
-	    ;;
-	--use-5x64)
-	    HAVE_LIMB=64
            ;;
        --use-endomorphism)
            USE_ENDOMORPHISM=1
@ -126,10 +120,10 @@ USE_ASM=0

 # select field implementation
 if [ "$HAVE_YASM" = "1" ]; then
-    CFLAGS_FIELD="-DUSE_FIELD_5X$HAVE_LIMB -DUSE_FIELD_5X${HAVE_LIMB}_ASM"
+    CFLAGS_FIELD="-DUSE_FIELD_5X52 -DUSE_FIELD_5X52_ASM"
    USE_ASM=1
 elif [ "$HAVE_INT128" = "1" ]; then
-    CFLAGS_FIELD="-DUSE_FIELD_5X$HAVE_LIMB -DUSE_FIELD_5X${HAVE_LIMB}_INT128"
+    CFLAGS_FIELD="-DUSE_FIELD_5X52 -DUSE_FIELD_5X52_INT128"
 elif [ "$HAVE_GMP" = "1" ]; then
    CFLAGS_FIELD="-DUSE_FIELD_GMP"
    LINK_GMP=1
@ -176,5 +170,4 @@ echo "CFLAGS_TEST_EXTRA=$CFLAGS_TEST_EXTRA" >> config.mk
 echo "LDFLAGS_EXTRA=$LDFLAGS_EXTRA" >> config.mk
 echo "LDFLAGS_TEST_EXTRA=$LDFLAGS_TEST_EXTRA" >> config.mk
 echo "USE_ASM=$USE_ASM" >>config.mk
-echo "HAVE_LIMB=$HAVE_LIMB" >>config.mk
 echo "OPTLEVEL=O2" >>config.mk
--- a/src/field.h
+++ b/src/field.h
@ -22,8 +22,6 @@
 #include "field_10x26.h"
 #elif defined(USE_FIELD_5X52)
 #include "field_5x52.h"
-#elif defined(USE_FIELD_5X64)
-#include "field_5x64.h"
 #else
 #error "Please select field implementation"
 #endif
--- a/src/field_5x64.h
+++ b/src/field_5x64.h
@ -1,19 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_REPR_
-#define _SECP256K1_FIELD_REPR_
-
-#include <stdint.h>
-
-typedef struct {
-    // X = sum(i=0..4, elem[i]*2^64) mod n
-    uint64_t n[5];
-#ifdef VERIFY
-    int reduced; // n[4] == 0
-    int normalized; // reduced and X < 2^256 - 0x100003D1
-#endif
-} secp256k1_fe_t;
-
-#endif
--- a/src/field_5x64_asm.asm
+++ b/src/field_5x64_asm.asm
@ -1,332 +0,0 @@
-	;; Added by Diederik Huys, March 2013
-	;;
-	;; Provided public procedures:
-	;; 	secp256k1_fe_mul_inner
-	;; 	secp256k1_fe_sqr_inner
-	;;
-	;; Needed tools: YASM (http://yasm.tortall.net)
-	;;
-	;; 
-
-	BITS 64
-
-COMP_LIMB EQU 000000001000003D1h
-	
-	;;  Procedure ExSetMult
-	;;  Register Layout:
-	;;  INPUT: 	rdi	= a->n
-	;; 	   	rsi  	= b->n
-	;; 	   	rdx  	= r->a
-	;; 
-	;;  INTERNAL:	rdx:rax  = multiplication accumulator
-	;; 		r8-r10   = c0-c2
-	;; 		r11-r15  = b.n[0]-b.n[4] / r3 - r7
-	;; 		rbx	 = r0
-	;; 		rcx	 = r1
-	;; 		rbp	 = r2
-	;; 	  
-	GLOBAL secp256k1_fe_mul_inner
-	ALIGN 32
-secp256k1_fe_mul_inner:
-	push rbp
-	push rbx
-	push r12
-	push r13
-	push r14
-	push r15
-	push rdx
-
-	mov r11,[rsi+8*0]	; preload b.n[0]
-
-	;;  step 1: mul_c2
-   	mov rax,[rdi+0*8]	; load a.n[0]
-	mul r11			; rdx:rax=a.n[0]*b.n[0]
-	mov r12,[rsi+1*8]	; preload b.n[1]
-	mov rbx,rax		; retire LO qword (r[0])
-	mov r8,rdx		; save overflow
-	xor r9,r9		; overflow HO qwords
-	xor r10,r10
-	
-	;; c+=a.n[0] * b.n[1] + a.n[1] * b.n[0]
-	mov rax,[rdi+0*8]
-	mul r12				
-	mov r13,[rsi+2*8]	; preload b.n[2]
-	add r8,rax		; still the same :-)
-	adc r9,rdx		; 
-	adc r10,0		; mmm...
-	
-	mov rax,[rdi+1*8]
-	mul r11			
-	add r8,rax
-	adc r9,rdx
-	adc r10,0
-	mov rcx,r8	; retire r[1]
-	xor r8,r8
-	
-	;; c+=a.n[0 1 2] * b.n[2 1 0]
-	mov rax,[rdi+0*8]
-	mul r13			
-	mov r14,[rsi+3*8]	; preload b.n[3]
-	add r9,rax
-	adc r10,rdx
-	adc r8,0
-	
-	mov rax,[rdi+1*8]
-	mul r12			
-	add r9,rax
-	adc r10,rdx
-	adc r8,0
-	
-	mov rax,[rdi+2*8]
-	mul r11
-	add r9,rax
-	adc r10,rdx
-	adc r8,0
-	mov rbp,r9		; retire r[2]
-	xor r9,r9
-
-	;; c+=a.n[0 1 2 3] * b.n[3 2 1 0]
-	mov rax,[rdi+0*8]
-	mul r14		
-	add r10,rax
-	adc r8,rdx
-	adc r9,0
-
-	mov rax,[rdi+1*8]
-	mul r13
-	add r10,rax
-	adc r8,rdx
-	adc r9,0
-
-	mov rax,[rdi+2*8]
-	mul r12
-	add r10,rax
-	adc r8,rdx
-	adc r9,0
-	
-	mov rax,[rdi+3*8]
-	mul r11			
-	add r10,rax
-	adc r8,rdx
-	adc r9,0
-	mov r11,r10		; retire r[3]
-	xor r10,r10
-
-	;; c+=a.n[1 2 3] * b.n[3 2 1]
-	mov rax,[rdi+1*8]
-	mul r14		
-	add r8,rax
-	adc r9,rdx
-	adc r10,0
-	
-	mov rax,[rdi+2*8]
-	mul r13		
-	add r8,rax
-	adc r9,rdx
-	adc r10,0
-	
-	mov rax,[rdi+3*8]
-	mul r12
-	add r8,rax
-	adc r9,rdx
-	adc r10,0
-	mov r12,r8		; retire r[4]
-	xor r8,r8		
-
-	;; c+=a.n[2 3] * b.n[3 2]
-	mov rax,[rdi+2*8]
-	mul r14
-	add r9,rax		; still the same :-)
-	adc r10,rdx		; 
-	adc r8,0		; mmm...
-	
-	mov rax,[rdi+3*8]
-	mul r13		
-	add r9,rax
-	adc r10,rdx
-	adc r8,0
-	mov r13,r9		; retire r[5]
-	xor r9,r9
-
-	;; c+=a.n[3] * b.n[3]
-	mov rax,[rdi+3*8]
-	mul r14
-	add r10,rax
-	adc r8,rdx
-	
-	mov r14,r10
-	mov r15,r8
-	
-
-	;; *******************************************************
-common_exit_norm:
-	mov rdi,COMP_LIMB
-	mov rax,r12
-	mul rdi
-	add rax,rbx
-	adc rcx,rdx
-	pop rbx
-	mov [rbx],rax
-
-	mov rax,r13		; get r5
-	mul rdi
-	add rax,rcx    		; +r1
-	adc rbp,rdx
-	mov [rbx+1*8],rax
-	
-	mov rax,r14		; get r6
-	mul rdi
-	add rax,rbp    		; +r2
-	adc r11,rdx
-	mov [rbx+2*8],rax
-	
-	mov rax,r15		; get r7
-	mul rdi
-	add rax,r11    		; +r3
-	adc rdx,0
-	mov [rbx+3*8],rax
-	mov [rbx+4*8],rdx
-	
-	pop r15
-	pop r14
-	pop r13
-	pop r12
-	pop rbx
-	pop rbp
-	ret
-
-	
-	;;  PROC ExSetSquare
-	;;  Register Layout:
-	;;  INPUT: 	rdi	 = a.n
-	;; 	   	rsi  	 = this.a
-	;;  INTERNAL:	rdx:rax  = multiplication accumulator
-	;; 		r8-r10   = c
-	;; 		r11-r15  = a.n[0]-a.n[4] / r3-r7
-	;; 		rbx	 = r0
-	;; 		rcx	 = r1
-	;; 		rbp	 = r2
-	GLOBAL secp256k1_fe_sqr_inner
-	
-	ALIGN 32
-secp256k1_fe_sqr_inner:
-	push rbp
-	push rbx
-	push r12
-	push r13
-	push r14
-	push r15
-	push rsi
-
-	mov r11,[rdi+8*0]	; preload a.n[0]
-	
-	;;  step 1: mul_c2
-   	mov rax,r11		; load a.n[0]
-	mul rax			; rdx:rax=a.n[0]²
-	mov r12,[rdi+1*8]	; preload a.n[1]
-	mov rbx,rax		; retire LO qword (r[0])
-	mov r8,rdx		; save overflow
-	xor r9,r9		; overflow HO qwords
-	xor r10,r10
-	
-	;; c+=2*a.n[0] * a.n[1]
-	mov rax,r11		; load a.n[0]
-	mul r12			; rdx:rax=a.n[0] * a.n[1]
-	mov r13,[rdi+2*8]	; preload a.n[2]
-	add rax,rax		; rdx:rax*=2
-	adc rdx,rdx
-	adc r10,0
-	add r8,rax		; still the same :-)
-	adc r9,rdx		
-	adc r10,0		; mmm...
-	
-	mov rcx,r8		; retire r[1]
-	xor r8,r8
-
-	;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1]
-	mov rax,r11		; load a.n[0]
-	mul r13			; * a.n[2]
-	mov r14,[rdi+3*8]	; preload a.n[3]
-	add rax,rax		; rdx:rax*=2
-	adc rdx,rdx
-	adc r8,0
-	add r9,rax
-	adc r10,rdx
-	adc r8,0
-
-	mov rax,r12
-	mul rax
-	add r9,rax
-	adc r10,rdx
-	adc r8,0
-	
-	
-	mov rbp,r9
-	xor r9,r9
-	
-	;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2]
-	mov rax,r11		; load a.n[0]
-	mul r14			; * a.n[3]
-	add rax,rax		; rdx:rax*=2
-	adc rdx,rdx
-	adc r9,0
-	add r10,rax
-	adc r8,rdx
-	adc r9,0
-
-	mov rax,r12		; load a.n[1]
-	mul r13			; * a.n[2]
-	add rax,rax
-	adc rdx,rdx
-	adc r9,0
-	add r10,rax
-	adc r8,rdx
-	adc r9,0
-		
-	mov r11,r10
-	xor r10,r10
-
-	;; c+=2*a.n[1]*a.n[3]+a.n[2]*a.n[2]
-	mov rax,r12		; load a.n[1]
-	mul r14			; * a.n[3]
-	add rax,rax		; rdx:rax*=2
-	adc rdx,rdx
-	adc r10,0
-	add r8,rax
-	adc r9,rdx
-	adc r10,0
-
-	mov rax,r13
-	mul rax
-	add r8,rax
-	adc r9,rdx
-	adc r10,0
-
-	mov r12,r8
-	xor r8,r8
-	;; c+=2*a.n[2]*a.n[3]
-	mov rax,r13		; load a.n[2]
-	mul r14			; * a.n[3]
-	add rax,rax		; rdx:rax*=2
-	adc rdx,rdx
-	adc r8,0
-	add r9,rax
-	adc r10,rdx
-	adc r8,0
-
-	mov r13,r9
-	xor r9,r9
-
-	;; c+=a.n[3]²
-	mov rax,r14
-	mul rax
-	add r10,rax
-	adc r8,rdx
-	
-	mov r14,r10
-	mov r15,r8
-	
-	jmp common_exit_norm
-	end
-
-	
--- a/src/impl/field.h
+++ b/src/impl/field.h
@ -11,8 +11,6 @@
 #include "field_10x26.h"
 #elif defined(USE_FIELD_5X52)
 #include "field_5x52.h"
-#elif defined(USE_FIELD_5X64)
-#include "field_5x64.h"
 #else
 #error "Please select field implementation"
 #endif
--- a/src/impl/field_5x64.h
+++ b/src/impl/field_5x64.h
@ -1,371 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_REPR_IMPL_H_
-#define _SECP256K1_FIELD_REPR_IMPL_H_
-
-#include <assert.h>
-#include <string.h>
-#include "../num.h"
-#include "../field.h"
-
-#include <stdio.h>
-#include "field_5x64_asm.h"
-
-/** Implements arithmetic modulo FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE FFFFFC2F,
- *  represented as 4 uint64_t's in base 2^64, and one overflow uint64_t.
- */
-
-#define FULL_LIMB (0xFFFFFFFFFFFFFFFFULL)
-#define LAST_LIMB (0xFFFFFFFEFFFFFC2FULL)
-#define COMP_LIMB (0x00000001000003D1ULL)
-
-void static secp256k1_fe_inner_start(void) {}
-void static secp256k1_fe_inner_stop(void) {}
-
-void static secp256k1_fe_reduce(secp256k1_fe_t *r) {
-    unsigned __int128 c = (unsigned __int128)r->n[4] * COMP_LIMB + r->n[0];
-    uint64_t n0 = c;
-    c = (c >> 64) + r->n[1];
-    uint64_t n1 = c;
-    c = (c >> 64) + r->n[2];
-    r->n[2] = c;
-    c = (c >> 64) + r->n[3];
-    r->n[3] = c;
-    c = (c >> 64) * COMP_LIMB + n0;
-    r->n[0] = c;
-    r->n[1] = n1 + (c >> 64);
-    assert(r->n[1] >= n1);
-    r->n[4] = 0;
-#ifdef VERIFY
-    r->reduced = 1;
-#endif
-}
-
-void static secp256k1_fe_normalize(secp256k1_fe_t *r) {
-    secp256k1_fe_reduce(r);
-
-    // Subtract p if result >= p
-    uint64_t mask = -(int64_t)((r->n[0] < LAST_LIMB) | (r->n[1] != ~0ULL) | (r->n[2] != ~0ULL) | (r->n[3] != ~0ULL));
-    r->n[0] -= (~mask & LAST_LIMB);
-    r->n[1] &= mask;
-    r->n[2] &= mask;
-    r->n[3] &= mask;
-    assert(r->n[4] == 0);
-
-#ifdef VERIFY
-    r->normalized = 1;
-#endif
-}
-
-void static inline secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
-    r->n[0] = a;
-    r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
-
-#ifdef VERIFY
-    r->reduced = 1;
-    r->normalized = 1;
-#endif
-}
-
-// TODO: not constant time!
-int static inline secp256k1_fe_is_zero(const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    return (a->n[0] == 0 && a->n[1] == 0 && a->n[2] == 0 && a->n[3] == 0);
-}
-
-int static inline secp256k1_fe_is_odd(const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    return a->n[0] & 1;
-}
-
-// TODO: not constant time!
-int static inline secp256k1_fe_equal(const secp256k1_fe_t *a, const secp256k1_fe_t *b) {
-#ifdef VERIFY
-    assert(a->normalized);
-    assert(b->normalized);
-#endif
-    return (a->n[0] == b->n[0] && a->n[1] == b->n[1] && a->n[2] == b->n[2] && a->n[3] == b->n[3]);
-}
-
-void static secp256k1_fe_set_b32(secp256k1_fe_t *r, const unsigned char *a) {
-    r->n[0] = r->n[1] = r->n[2] = r->n[3] = r->n[4] = 0;
-    for (int i=0; i<32; i++) {
-        r->n[i/8] |= (uint64_t)a[31-i] << (i&7)*8;
-    }
-#ifdef VERIFY
-    r->reduced = 1;
-    r->normalized = 0;
-#endif
-}
-
-/** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
-void static secp256k1_fe_get_b32(unsigned char *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    assert(a->normalized);
-#endif
-    for (int i=0; i<32; i++) {
-        r[31-i] = a->n[i/8] >> ((i&7)*8);
-    }
-}
-
-void static inline secp256k1_fe_negate(secp256k1_fe_t *r, const secp256k1_fe_t *ac, int m) {
-    secp256k1_fe_t a = *ac;
-    secp256k1_fe_reduce(&a);
-    unsigned __int128 c = (unsigned __int128)(~a.n[0]) + LAST_LIMB + 1;
-    r->n[0] = c;
-    c = (c >> 64) + (~a.n[1]) + FULL_LIMB;
-    r->n[1] = c;
-    c = (c >> 64) + (~a.n[2]) + FULL_LIMB;
-    r->n[2] = c;
-    c = (c >> 64) + (~a.n[3]) + FULL_LIMB;
-    r->n[3] = c;
-    r->n[4] = 0;
-#ifdef VERIFY
-    r->reduced = 1;
-    r->normalized = 0;
-#endif
-}
-
-void static inline secp256k1_fe_mul_int(secp256k1_fe_t *r, int a) {
-#ifdef VERIFY
-    r->reduced = 0;
-    r->normalized = 0;
-#endif
-    unsigned __int128 c = (unsigned __int128)r->n[0] * a;
-    r->n[0] = c;
-    c = (c >> 64) + (unsigned __int128)r->n[1] * a;
-    r->n[1] = c;
-    c = (c >> 64) + (unsigned __int128)r->n[2] * a;
-    r->n[2] = c;
-    c = (c >> 64) + (unsigned __int128)r->n[3] * a;
-    r->n[3] = c;
-    c = (c >> 64) + (unsigned __int128)r->n[4] * a;
-    r->n[4] = c;
-}
-
-void static inline secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-#ifdef VERIFY
-    r->reduced = 0;
-    r->normalized = 0;
-#endif
-    unsigned __int128 c = (unsigned __int128)r->n[0] + a->n[0];
-    r->n[0] = c;
-    c = (unsigned __int128)r->n[1] + a->n[1] + (c >> 64);
-    r->n[1] = c;
-    c = (unsigned __int128)r->n[2] + a->n[2] + (c >> 64);
-    r->n[2] = c;
-    c = (unsigned __int128)r->n[3] + a->n[3] + (c >> 64);
-    r->n[3] = c;
-    c = (unsigned __int128)r->n[4] + a->n[4] + (c >> 64);
-    r->n[4] = c;
-    assert((c >> 64) == 0);
-}
-
-#if 0
-#define muladd_c3(a,b,c0,c1,c2) { \
-    unsigned __int128 q1 = ((unsigned __int128)(a)) * (b) + (c0); \
-    (c0) = q1; \
-    unsigned __int128 q2 = (q1 >> 64) + (c1) + (((unsigned __int128)(c2)) << 64); \
-    (c1) = q2; \
-    (c2) = q2 >> 64; \
-}
-
-#define sqradd_c3(a,c0,c1,c2) muladd_c3(a,a,c0,c1,c2)
-
-/*#define muladd_c3(a,b,c0,c1,c2) { \
-    unsigned __int128 q = (unsigned __int128)(a) * (b) + (c0); \
-    (c0) = q; \
-    (c1) += (q >> 64); \
-    (c2) += ((c1) < (q >> 64))?1:0; \
-}*/
-
-#define muladd2_c3(a,b,c0,c1,c2) { \
-    unsigned __int128 q = (unsigned __int128)(a) * (b); \
-    uint64_t t1 = (q >> 64); \
-    uint64_t t0 = q; \
-    uint64_t t2 = t1+t1; (c2) += (t2<t1)?1:0; \
-    t1 = t0+t0; t2 += (t1<t0)?1:0; \
-    (c0) += t1; t2 += ((c0)<t1)?1:0; \
-    (c1) += t2; (c2) += ((c1)<t2)?1:0; \
-}
-
-/*#define muladd2_c3(a,b,c0,c1,c2) { \
-    muladd_c3(a,b,c0,c1,c2); \
-    muladd_c3(a,b,c0,c1,c2); \
-}*/
-#else
-
-#define muladd_c3(a,b,c0,c1,c2) {       \
-        register uint64_t t1, t2;       \
-        asm ("mulq %3"                  \
-                : "=a"(t1),"=d"(t2)     \
-                : "a"(a),"m"(b)         \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c0),"+d"(t2)     \
-                : "a"(t1),"g"(0)        \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c1),"+r"(c2)     \
-                : "d"(t2),"g"(0)        \
-                : "cc");                \
-        }
-
-#define sqradd_c3(a,c0,c1,c2) {         \
-        register uint64_t t1, t2;       \
-        asm ("mulq %2"                  \
-                : "=a"(t1),"=d"(t2)     \
-                : "a"(a)                \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c0),"+d"(t2)     \
-                : "a"(t1),"g"(0)        \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c1),"+r"(c2)     \
-                : "d"(t2),"g"(0)        \
-                : "cc");                \
-        }
-
-#define muladd2_c3(a,b,c0,c1,c2) {      \
-        register uint64_t t1, t2;       \
-        asm ("mulq %3"                  \
-                : "=a"(t1),"=d"(t2)     \
-                : "a"(a),"m"(b)         \
-                : "cc");                \
-        asm ("addq %0,%0; adcq %2,%1"   \
-                : "+d"(t2),"+r"(c2)     \
-                : "g"(0)                \
-                : "cc");                \
-        asm ("addq %0,%0; adcq %2,%1"   \
-                : "+a"(t1),"+d"(t2)     \
-                : "g"(0)                \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c0),"+d"(t2)     \
-                : "a"(t1),"g"(0)        \
-                : "cc");                \
-        asm ("addq %2,%0; adcq %3,%1"   \
-                : "+r"(c1),"+r"(c2)     \
-                : "d"(t2),"g"(0)        \
-                : "cc");                \
-        }
-#endif
-
-#define mul_c2(a,b,c0,c1) { \
-    unsigned __int128 q = (unsigned __int128)(a) * (b); \
-    (c0) = q; \
-    (c1) = (q >> 64); \
-}
-
-void static secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *ac, const secp256k1_fe_t *bc) {
-
-    secp256k1_fe_t a = *ac, b = *bc;
-    secp256k1_fe_reduce(&a);
-    secp256k1_fe_reduce(&b);
-
-#ifdef USE_FIELD_5X64_ASM
-    secp256k1_fe_mul_inner((&a)->n,(&b)->n,r->n);
-#else
-    uint64_t c1,c2,c3;
-    c3=0;
-    mul_c2(a.n[0], b.n[0], c1, c2);
-    uint64_t r0 = c1; c1 = 0;
-    muladd_c3(a.n[0], b.n[1], c2, c3, c1);
-    muladd_c3(a.n[1], b.n[0], c2, c3, c1);
-    uint64_t r1 = c2; c2 = 0;
-    muladd_c3(a.n[2], b.n[0], c3, c1, c2);
-    muladd_c3(a.n[1], b.n[1], c3, c1, c2);
-    muladd_c3(a.n[0], b.n[2], c3, c1, c2);
-    uint64_t r2 = c3; c3 = 0;
-    muladd_c3(a.n[0], b.n[3], c1, c2, c3);
-    muladd_c3(a.n[1], b.n[2], c1, c2, c3);
-    muladd_c3(a.n[2], b.n[1], c1, c2, c3);
-    muladd_c3(a.n[3], b.n[0], c1, c2, c3);
-    uint64_t r3 = c1; c1 = 0;
-    muladd_c3(a.n[3], b.n[1], c2, c3, c1);
-    muladd_c3(a.n[2], b.n[2], c2, c3, c1);
-    muladd_c3(a.n[1], b.n[3], c2, c3, c1);
-    uint64_t r4 = c2; c2 = 0;
-    muladd_c3(a.n[2], b.n[3], c3, c1, c2);
-    muladd_c3(a.n[3], b.n[2], c3, c1, c2);
-    uint64_t r5 = c3; c3 = 0;
-    muladd_c3(a.n[3], b.n[3], c1, c2, c3);
-    uint64_t r6 = c1;
-    uint64_t r7 = c2;
-    assert(c3 == 0);
-    unsigned __int128 c = (unsigned __int128)r4 * COMP_LIMB + r0;
-    r->n[0] = c;
-    c = (unsigned __int128)r5 * COMP_LIMB + r1 + (c >> 64);
-    r->n[1] = c;
-    c = (unsigned __int128)r6 * COMP_LIMB + r2 + (c >> 64);
-    r->n[2] = c;
-    c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64);
-    r->n[3] = c;
-    r->n[4] = c >> 64;
-#endif
-
-#ifdef VERIFY
-    r->normalized = 0;
-    r->reduced = 0;
-#endif
-    secp256k1_fe_reduce(r);
-}
-
-/*void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
-    secp256k1_fe_mul(r, a, a);
-}*/
-
-void static secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *ac) {
-    secp256k1_fe_t a = *ac;
-    secp256k1_fe_reduce(&a);
-
-#ifdef USE_FIELD_5X64_ASM
-    secp256k1_fe_sqr_inner((&a)->n,r->n);
-#else
-    uint64_t c1,c2,c3;
-    c3=0;
-    mul_c2(a.n[0], a.n[0], c1, c2);
-    uint64_t r0 = c1; c1 = 0;
-    muladd2_c3(a.n[0], a.n[1], c2, c3, c1);
-    uint64_t r1 = c2; c2 = 0;
-    muladd2_c3(a.n[2], a.n[0], c3, c1, c2);
-    sqradd_c3(a.n[1], c3, c1, c2);
-    uint64_t r2 = c3; c3 = 0;
-    muladd2_c3(a.n[0], a.n[3], c1, c2, c3);
-    muladd2_c3(a.n[1], a.n[2], c1, c2, c3);
-    uint64_t r3 = c1; c1 = 0;
-    muladd2_c3(a.n[3], a.n[1], c2, c3, c1);
-    sqradd_c3(a.n[2], c2, c3, c1);
-    uint64_t r4 = c2; c2 = 0;
-    muladd2_c3(a.n[2], a.n[3], c3, c1, c2);
-    uint64_t r5 = c3; c3 = 0;
-    sqradd_c3(a.n[3], c1, c2, c3);
-    uint64_t r6 = c1;
-    uint64_t r7 = c2;
-    assert(c3 == 0);
-    unsigned __int128 c = (unsigned __int128)r4 * COMP_LIMB + r0;
-    r->n[0] = c;
-    c = (unsigned __int128)r5 * COMP_LIMB + r1 + (c >> 64);
-    r->n[1] = c;
-    c = (unsigned __int128)r6 * COMP_LIMB + r2 + (c >> 64);
-    r->n[2] = c;
-    c = (unsigned __int128)r7 * COMP_LIMB + r3 + (c >> 64);
-    r->n[3] = c;
-    r->n[4] = c >> 64;
-#endif
-
-#ifdef VERIFY
-    r->normalized = 0;
-    r->reduced = 0;
-#endif
-    secp256k1_fe_reduce(r);
-}
-
-#endif
--- a/src/impl/field_5x64_asm.h
+++ b/src/impl/field_5x64_asm.h
@ -1,11 +0,0 @@
-// Copyright (c) 2013 Pieter Wuille
-// Distributed under the MIT/X11 software license, see the accompanying
-// file COPYING or http://www.opensource.org/licenses/mit-license.php.
-
-#ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_
-#define _SECP256K1_FIELD_INNER5X52_IMPL_H_
-
-void __attribute__ ((sysv_abi)) secp256k1_fe_mul_inner(const uint64_t *a, const uint64_t *b, uint64_t *r);
-void __attribute__ ((sysv_abi)) secp256k1_fe_sqr_inner(const uint64_t *a, uint64_t *r);
-
-#endif