There's not much point doing speed tests where all your doing is calling library functions. Yes oddly the ASM was faster on my windows machine and I have no explanation for that or even care to look at it but if you give it some native code to optimise you will see a remarkable improvement by orders of magnitude. 
I think I'll go with the c backend thank you very much. 
I can only surmise that the clang optimiser promoted the code to use avx instructions, now that's cool!  
Code: Select all
;mod EC ported by Idle, Danilo and Peter H
;implimentation of Curve25519 elliptic curve, public key function
;for use in EC Dieffie-Hellman key exchange  
;Updated 31/3/16 
;Version PB 5.42 LTS
;for production set the flag 
;#UseValidation = 0   
DeclareModule modEC
; /* Copyright 2008, Google Inc.
;  * All rights reserved.
;  *
;  * Redistribution And use in source And binary forms, With Or without
;  * modification, are permitted provided that the following conditions are
;  * met:
;  *
;  *     * Redistributions of source code must retain the above copyright
;  *       notice, this List of conditions And the following disclaimer.
;  *     * Redistributions in binary form must reproduce the above
;  *       copyright notice, this List of conditions And the following disclaimer
;  *       in the documentation And/Or other materials provided With the
;  *       distribution.
;  *     * Neither the name of Google Inc. nor the names of its
;  *       contributors may be used To endorse Or promote products derived from
;  *       this software without specific prior written permission.
;  *
;  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS And CONTRIBUTORS
;  * "AS IS" And ANY EXPRESS Or IMPLIED WARRANTIES, INCLUDING, BUT Not
;  * LIMITED To, THE IMPLIED WARRANTIES OF MERCHANTABILITY And FITNESS For
;  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;  * OWNER Or CONTRIBUTORS BE LIABLE For ANY DIRECT, INDIRECT, INCIDENTAL,
;  * SPECIAL, EXEMPLARY, Or CONSEQUENTIAL DAMAGES (INCLUDING, BUT Not
;  * LIMITED To, PROCUREMENT OF SUBSTITUTE GOODS Or SERVICES; LOSS OF USE,
;  * Data, Or PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
;  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, Or TORT
;  * (INCLUDING NEGLIGENCE Or OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
;  * OF THIS SOFTWARE, EVEN If ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;  *
;  * curve25519-donna: Curve25519 elliptic curve, public key function
;  *
;  * http://code.google.com/p/curve25519-donna/
;  *
;  * Adam Langley <agl@imperialviolet.org>
;  *
;  * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.To>
;  *
;  * More information about curve25519 can be found here
;  *   http://cr.yp.To/ecdh.html
;  *
;  * djb's sample implementation of curve25519 is written in a special assembly
;  * language called qhasm And uses the floating point registers.
;  *
;  * This is, almost, a clean room reimplementation from the curve25519 paper. It
;  * uses many of the tricks described therein. Only the crecip function is taken
;  * from the sample implementation.
;  */
;-Interface
;-myEc.modEC::iEC
Interface IEC 
   Genkeys.s()
   GetKey.s(public.s) 
   SaveKeys(files.s)
   LoadKeys.s(file.s) ;returns public key 
   Free() 
EndInterface 
;-Construcor
;-myEc = modEc::NewEc("salt")
Declare newEC(salt.s)
;-Use Validation 
#UseValidation = 1    ;turn this off for production 
CompilerIf #UseValidation
   Declare validate() 
CompilerEndIf
EndDeclareModule 
Module modEC 
EnableExplicit
UseSHA3Fingerprint()
Structure felem
   q.q[0]
EndStructure
Structure u8
   a.a[0]
EndStructure
Macro Comment(a) : EndMacro
; /* Sum two numbers: output += in */
Procedure fsum(*output.felem, *in.felem)
   Protected i
   While i < 10
      *output\q[  i] = (*output\q[  i] + *in\q[  i])
      *output\q[1+i] = (*output\q[1+i] + *in\q[1+i])
      i + 2
   Wend
EndProcedure
; /* Find the difference of two numbers: output = in - output
;  * (note the order of the arguments!)
;  */
Procedure fdifference(*output.felem, *in.felem)
   Protected i
   While i < 10
      *output\q[i] = (*in\q[i] - *output\q[i])
      i + 1
   Wend
EndProcedure
; /* Multiply a number my a scalar: output = in * scalar */
Procedure fscalar_product(*output.felem, *in.felem, scalar.q)
   Protected i
   While i < 10
      *output\q[i] = *in\q[i] * scalar
      i + 1
   Wend
EndProcedure
;/* Multiply two numbers: output = in2 * in
; *
; * output must be distinct to both inputs. The inputs are reduced coefficient
; * form, the output is not.
; */
Procedure fproduct(*output.felem, *in2.felem, *in.felem)
   *output\q[0] =       *in2\q[0] * *in\q[0]
   *output\q[1] =       *in2\q[0] * *in\q[1] +
                        *in2\q[1] * *in\q[0]
   *output\q[2] =  2 *  *in2\q[1] * *in\q[1] +
                   *in2\q[0] * *in\q[2] +
                   *in2\q[2] * *in\q[0]
   *output\q[3] =       *in2\q[1] * *in\q[2] +
                        *in2\q[2] * *in\q[1] +
                        *in2\q[0] * *in\q[3] +
                        *in2\q[3] * *in\q[0]
   *output\q[4] =       *in2\q[2] * *in\q[2] +
                        2 * (*in2\q[1] * *in\q[3] +
                             *in2\q[3] * *in\q[1]) +
                        *in2\q[0] * *in\q[4] +
                        *in2\q[4] * *in\q[0]
   *output\q[5] =       *in2\q[2] * *in\q[3] +
                        *in2\q[3] * *in\q[2] +
                        *in2\q[1] * *in\q[4] +
                        *in2\q[4] * *in\q[1] +
                        *in2\q[0] * *in\q[5] +
                        *in2\q[5] * *in\q[0]
   *output\q[6] =  2 * (*in2\q[3] * *in\q[3] +
                        *in2\q[1] * *in\q[5] +
                        *in2\q[5] * *in\q[1]) +
                   *in2\q[2] * *in\q[4] +
                   *in2\q[4] * *in\q[2] +
                   *in2\q[0] * *in\q[6] +
                   *in2\q[6] * *in\q[0]
   *output\q[7] =       *in2\q[3] * *in\q[4] +
                        *in2\q[4] * *in\q[3] +
                        *in2\q[2] * *in\q[5] +
                        *in2\q[5] * *in\q[2] +
                        *in2\q[1] * *in\q[6] +
                        *in2\q[6] * *in\q[1] +
                        *in2\q[0] * *in\q[7] +
                        *in2\q[7] * *in\q[0]
   *output\q[8] =       *in2\q[4] * *in\q[4] +
                        2 * (*in2\q[3] * *in\q[5] +
                             *in2\q[5] * *in\q[3] +
                             *in2\q[1] * *in\q[7] +
                             *in2\q[7] * *in\q[1]) +
                        *in2\q[2] * *in\q[6] +
                        *in2\q[6] * *in\q[2] +
                        *in2\q[0] * *in\q[8] +
                        *in2\q[8] * *in\q[0]
   *output\q[9] =       *in2\q[4] * *in\q[5] +
                        *in2\q[5] * *in\q[4] +
                        *in2\q[3] * *in\q[6] +
                        *in2\q[6] * *in\q[3] +
                        *in2\q[2] * *in\q[7] +
                        *in2\q[7] * *in\q[2] +
                        *in2\q[1] * *in\q[8] +
                        *in2\q[8] * *in\q[1] +
                        *in2\q[0] * *in\q[9] +
                        *in2\q[9] * *in\q[0]
   *output\q[10] = 2 * (*in2\q[5] * *in\q[5] +
                        *in2\q[3] * *in\q[7] +
                        *in2\q[7] * *in\q[3] +
                        *in2\q[1] * *in\q[9] +
                        *in2\q[9] * *in\q[1]) +
                   *in2\q[4] * *in\q[6] +
                   *in2\q[6] * *in\q[4] +
                   *in2\q[2] * *in\q[8] +
                   *in2\q[8] * *in\q[2]
   *output\q[11] =      *in2\q[5] * *in\q[6] +
                        *in2\q[6] * *in\q[5] +
                        *in2\q[4] * *in\q[7] +
                        *in2\q[7] * *in\q[4] +
                        *in2\q[3] * *in\q[8] +
                        *in2\q[8] * *in\q[3] +
                        *in2\q[2] * *in\q[9] +
                        *in2\q[9] * *in\q[2]
   *output\q[12] =      *in2\q[6] * *in\q[6] +
                        2 * (*in2\q[5] * *in\q[7] +
                             *in2\q[7] * *in\q[5] +
                             *in2\q[3] * *in\q[9] +
                             *in2\q[9] * *in\q[3]) +
                        *in2\q[4] * *in\q[8] +
                        *in2\q[8] * *in\q[4]
   *output\q[13] =      *in2\q[6] * *in\q[7] +
                        *in2\q[7] * *in\q[6] +
                        *in2\q[5] * *in\q[8] +
                        *in2\q[8] * *in\q[5] +
                        *in2\q[4] * *in\q[9] +
                        *in2\q[9] * *in\q[4]
   *output\q[14] = 2 * (*in2\q[7] * *in\q[7] +
                        *in2\q[5] * *in\q[9] +
                        *in2\q[9] * *in\q[5]) +
                   *in2\q[6] * *in\q[8] +
                   *in2\q[8] * *in\q[6]
   *output\q[15] =      *in2\q[7] * *in\q[8] +
                        *in2\q[8] * *in\q[7] +
                        *in2\q[6] * *in\q[9] +
                        *in2\q[9] * *in\q[6]
   *output\q[16] =      *in2\q[8] * *in\q[8] +
                        2 * (*in2\q[7] * *in\q[9] +
                             *in2\q[9] * *in\q[7])
   *output\q[17] =      *in2\q[8] * *in\q[9] +
                        *in2\q[9] * *in\q[8]
   *output\q[18] = 2 *  *in2\q[9] * *in\q[9]
EndProcedure
;/* Reduce a long form to a short form by taking the input mod 2^255 - 19. */
Procedure freduce_degree(*output.felem)
   *output\q[8] + (19 * *output\q[18])
   *output\q[7] + (19 * *output\q[17])
   *output\q[6] + (19 * *output\q[16])
   *output\q[5] + (19 * *output\q[15])
   *output\q[4] + (19 * *output\q[14])
   *output\q[3] + (19 * *output\q[13])
   *output\q[2] + (19 * *output\q[12])
   *output\q[1] + (19 * *output\q[11])
   *output\q[0] + (19 * *output\q[10])
EndProcedure
;/* Reduce all coefficients of the short form input to be -2**25 <= x <= 2**25
Procedure freduce_coefficients(*output.felem)
   Protected i, over.q, over2.q ; over and over2 = felem
   Repeat
      *output\q[10] = 0
      
      i=0
      While i < 10
         over = *output\q[i] / $2000000
         over2 = (over + ((over >> 63) * 2) + 1) / 2
         *output\q[i+1] + over2
         *output\q[i] - (over2 * $4000000)
         
         over = *output\q[i+1] / $2000000
         *output\q[i+2] + over
         *output\q[i+1] - (over * $2000000)
         i + 2
      Wend
      *output\q[0] + (19 * *output\q[10])
   Until Not *output\q[10]
EndProcedure
;/* A helpful wrapper around fproduct: output = in * in2.
; *
; * output must be distinct to both inputs. The output is reduced degree and
; * reduced coefficient.
; */
Procedure fmul(*output.felem, *in.felem, *in2.felem)
   Dim t.q(19) ; felem
   fproduct(@t(), *in, *in2);
   freduce_degree(@t())
   freduce_coefficients(@t())
   CopyMemory(@t(), *output, SizeOf(Quad) * 10) ; SizeOf(felem)
EndProcedure
Procedure fsquare_inner(*output.felem, *in.felem)
   *output\q[0] =       *in\q[0] * *in\q[0]
   *output\q[1] =  2 *  *in\q[0] * *in\q[1]
   *output\q[2] =  2 * (*in\q[1] * *in\q[1] +
                        *in\q[0] * *in\q[2])
   *output\q[3] =  2 * (*in\q[1] * *in\q[2] +
                        *in\q[0] * *in\q[3])
   *output\q[4] =       *in\q[2] * *in\q[2] +
                        4 *  *in\q[1] * *in\q[3] +
                        2 *  *in\q[0] * *in\q[4]
   *output\q[5] =  2 * (*in\q[2] * *in\q[3] +
                        *in\q[1] * *in\q[4] +
                        *in\q[0] * *in\q[5])
   *output\q[6] =  2 * (*in\q[3] * *in\q[3] +
                        *in\q[2] * *in\q[4] +
                        *in\q[0] * *in\q[6] +
                        2 *  *in\q[1] * *in\q[5])
   *output\q[7] =  2 * (*in\q[3] * *in\q[4] +
                        *in\q[2] * *in\q[5] +
                        *in\q[1] * *in\q[6] +
                        *in\q[0] * *in\q[7])
   *output\q[8] =       *in\q[4] * *in\q[4] +
                        2 * (*in\q[2] * *in\q[6] +
                             *in\q[0] * *in\q[8] +
                             2 * (*in\q[1] * *in\q[7] +
                                  *in\q[3] * *in\q[5]))
   *output\q[9] =  2 * (*in\q[4] * *in\q[5] +
                        *in\q[3] * *in\q[6] +
                        *in\q[2] * *in\q[7] +
                        *in\q[1] * *in\q[8] +
                        *in\q[0] * *in\q[9])
   *output\q[10] = 2 * (*in\q[5] * *in\q[5] +
                        *in\q[4] * *in\q[6] +
                        *in\q[2] * *in\q[8] +
                        2 * (*in\q[3] * *in\q[7] +
                             *in\q[1] * *in\q[9]))
   *output\q[11] = 2 * (*in\q[5] * *in\q[6] +
                        *in\q[4] * *in\q[7] +
                        *in\q[3] * *in\q[8] +
                        *in\q[2] * *in\q[9])
   *output\q[12] =      *in\q[6] * *in\q[6] +
                        2 * (*in\q[4] * *in\q[8] +
                             2 * (*in\q[5] * *in\q[7] +
                                  *in\q[3] * *in\q[9]))
   *output\q[13] = 2 * (*in\q[6] * *in\q[7] +
                        *in\q[5] * *in\q[8] +
                        *in\q[4] * *in\q[9])
   *output\q[14] = 2 * (*in\q[7] * *in\q[7] +
                        *in\q[6] * *in\q[8] +
                        2 *  *in\q[5] * *in\q[9])
   *output\q[15] = 2 * (*in\q[7] * *in\q[8] +
                        *in\q[6] * *in\q[9])
   *output\q[16] =      *in\q[8] * *in\q[8] +
                        4 *  *in\q[7] * *in\q[9]
   *output\q[17] = 2 *  *in\q[8] * *in\q[9]
   *output\q[18] = 2 *  *in\q[9] * *in\q[9]
EndProcedure
Procedure fsquare(*output.felem, *in.felem)
   Dim t.q(19) ; felem
   fsquare_inner(@t(), *in)
   freduce_degree(@t())
   freduce_coefficients(@t())
   CopyMemory(@t(),*output, SizeOf(Quad) * 10) ; SizeOf(felem)
EndProcedure
;/* Take a little-endian, 32-byte number and expand it into polynomial form */
Procedure fexpand(*output.felem, *input.u8)
   Protected q1.q, q2.q, q3.q, q4.q
   Macro F(n,start,shift,mask)
      q1 = (*input\a[start + 0] & $FF)
      q2 = (*input\a[start + 1] & $FF) : q2 << 8
      q3 = (*input\a[start + 2] & $FF) : q3 << 16
      q4 = (*input\a[start + 3] & $FF) : q4 << 24
      *output\q[n] = ((q1 | q2 | q3 | q4 ) >> shift) & mask
   EndMacro
   F(0, 0, 0, $3ffffff)
   F(1, 3, 2, $1ffffff)
   F(2, 6, 3, $3ffffff)
   F(3, 9, 5, $1ffffff)
   F(4, 12, 6, $3ffffff)
   F(5, 16, 0, $1ffffff)
   F(6, 19, 1, $3ffffff)
   F(7, 22, 3, $1ffffff)
   F(8, 25, 4, $3ffffff)
   F(9, 28, 6, $1ffffff)
   UndefineMacro F
EndProcedure
;/* Take a fully reduced polynomial form number and contract it into a
; * little-endian, 32-byte array
; */
Procedure fcontract(*output.u8, *input.felem)
   Protected i
   
   Repeat
      i = 0
      While i < 9
         If ((i & 1) = 1)
            While (*input\q[i] < 0)
               *input\q[i] + $2000000
               *input\q[i + 1] - 1
            Wend
         Else
            While (*input\q[i] < 0)
               *input\q[i] + $4000000
               *input\q[i + 1] - 1
            Wend
         EndIf
         i + 1
      Wend
      While (*input\q[9] < 0)
         *input\q[9] + $2000000
         *input\q[0] - 19
      Wend
   Until *input\q[0] >= 0
   
   *input\q[1] << 2
   *input\q[2] << 3
   *input\q[3] << 5
   *input\q[4] << 6
   *input\q[6] << 1
   *input\q[7] << 3
   *input\q[8] << 4
   *input\q[9] << 6
   
   Macro F(i, s)
      *output\a[s+0] | (*input\q[i] & $ff)
      *output\a[s+1] = ((*input\q[i] >>  8) & $ff)
      *output\a[s+2] = ((*input\q[i] >> 16) & $ff)
      *output\a[s+3] = ((*input\q[i] >> 24) & $ff)
   EndMacro
   
   *output\a[0]  = 0
   *output\a[16] = 0
   F(0,0)
   F(1,3)
   F(2,6)
   F(3,9)
   F(4,12)
   F(5,16)
   F(6,19)
   F(7,22)
   F(8,25)
   F(9,28)
   
   UndefineMacro F
   
EndProcedure
;/* Input: Q, Q', Q-Q'
; * Output: 2Q, Q+Q'
; *
; *   x2 z3: long form
; *   x3 z3: long form
; *   x z: short form, destroyed
; *   xprime zprime: short form, destroyed
; *   qmqp: short form, preserved
; */
Procedure fmonty(*x2.felem, *z2.felem          Comment("/* output 2Q     */") ,
                 *x3.felem, *z3.felem          Comment("/* output Q + Q' */") ,
                 *x.felem , *z.felem           Comment("/* input Q       */") ,
                 *xprime.felem, *zprime.felem  Comment("/* input Q'      */") ,
                 *qmqp.felem                   Comment("/* input Q - Q'  */") )
   ; following Dim's are all of type felem
   Dim origx.q(10)   : Dim origxprime.q(10)
   Dim zzz.q(19)     : Dim xx.q(19)         : Dim zz.q(19)
   Dim xxprime.q(19) : Dim zzprime.q(19)    : Dim zzzprime.q(19) : Dim xxxprime.q(19)
   
   CopyMemory(*x, @origx(), 10 * SizeOf(Quad)) ; SizeOf(felem)
   fsum(*x, *z)
   fdifference(*z, @origx())  ; does x - z
   
   CopyMemory(*xprime, @origxprime(), SizeOf(Quad) * 10) ; SizeOf(felem)
   fsum(*xprime, *zprime)
   fdifference(*zprime, @origxprime())
   fproduct(@xxprime(), *xprime, *z)
   fproduct(@zzprime(), *x, *zprime)
   freduce_degree(@xxprime())
   freduce_coefficients(@xxprime())
   freduce_degree(@zzprime())
   freduce_coefficients(@zzprime())
   CopyMemory(@xxprime(), @origxprime(), SizeOf(Quad) * 10) ; SizeOf(felem)
   fsum(@xxprime(), @zzprime())
   fdifference(@zzprime(), @origxprime())
   fsquare(@xxxprime(), @xxprime())
   fsquare(@zzzprime(), @zzprime())
   fproduct(@zzprime(), @zzzprime(), *qmqp)
   freduce_degree(@zzprime())
   freduce_coefficients(@zzprime())
   CopyMemory(@xxxprime(), *x3, SizeOf(Quad) * 10) ; SizeOf(felem)
   CopyMemory(@zzprime() , *z3, SizeOf(Quad) * 10) ; SizeOf(felem)
   
   fsquare(@xx(), *x)
   fsquare(@zz(), *z)
   fproduct(*x2, @xx(), @zz())
   freduce_degree(*x2)
   freduce_coefficients(*x2)
   fdifference(@zz(), @xx())  ; does zz = xx - zz
   FillMemory(@zzz()+10*SizeOf(Quad), SizeOf(Quad) * 9, 0, #PB_Byte) ; SizeOf(felem)
   fscalar_product(@zzz(), @zz(), 121665)
   freduce_degree(@zzz())
   freduce_coefficients(@zzz())
   fsum(@zzz(), @xx())
   fproduct(*z2, @zz(), @zzz())
   freduce_degree(*z2)
   freduce_coefficients(*z2)
EndProcedure
;/* Calculates nQ where Q is the x-coordinate of a point on the curve
; *
; *   resultx/resultz: the x coordinate of the resulting curve Point (short form)
; *   n: a little endian, 32-byte number
; *   q: a point of the curve (short form)
; */
Procedure cmult(*resultx.felem, *resultz.felem, *n.u8, *q.felem)
   Protected i, j, byte.a
   
   ; all of type felem
   Dim a.q(19)
   Dim b.q(19) : b(0) = 1
   Dim c.q(19) : c(0) = 1
   Dim d.q(19)
   
   Protected *nqpqx.felem = @a(), *nqpqz.felem = @b(), *nqx.felem = @c(), *nqz.felem = @d()  ;, *t.felem
   
   Dim e.q(19)
   Dim f.q(19) : f(0) = 1
   Dim g.q(19)
   Dim h.q(19) : h(0) = 1
   
   Protected *nqpqx2.felem = @e(), *nqpqz2.felem = @f(), *nqx2.felem = @g(), *nqz2.felem = @h()
   
   CopyMemory(*q, *nqpqx, SizeOf(Quad) * 10) ; Sizeof(felem)
   
   i = 0
   While i < 32
      byte = *n\a[31 - i] & $FF ; byte = type u8
      j = 0
      While j < 8
         If (byte & $80)
            fmonty(*nqpqx2, *nqpqz2,
                   *nqx2, *nqz2,
                   *nqpqx, *nqpqz,
                   *nqx, *nqz,
                   *q)
         Else
            fmonty(*nqx2, *nqz2,
                   *nqpqx2, *nqpqz2,
                   *nqx, *nqz,
                   *nqpqx, *nqpqz,
                   *q)
         EndIf
         
         ;*t = *nqx           ; can be replaced with Swap in PB
         ;*nqx = *nqx2
         ;*nqx2 = *t
         Swap *nqx, *nqx2
         
         ;*t = *nqz
         ;*nqz = *nqz2
         ;*nqz2 = *t
         Swap *nqz, *nqz2
         
         ;*t = *nqpqx
         ;*nqpqx = *nqpqx2
         ;*nqpqx2 = *t
         Swap *nqpqx, *nqpqx2
         
         ;*t = *nqpqz
         ;*nqpqz = *nqpqz2
         ;*nqpqz2 = *t
         Swap *nqpqz, *nqpqz2
         
         byte = (byte << 1)
         j + 1
      Wend
      i + 1
   Wend
   
   CopyMemory(*nqx, *resultx, SizeOf(Quad) * 10) ; SizeOf(felem)
   CopyMemory(*nqz, *resultz, SizeOf(Quad) * 10) ; SizeOf(felem)
EndProcedure
;// -----------------------------------------------------------------------------
;// Shamelessly copied from djb's code
;// -----------------------------------------------------------------------------
Procedure crecip(*out.felem, *z.felem)
   ; all Dim of type felem:
   Dim z2.q(10)
   Dim z9.q(10)
   Dim z11.q(10)
   Dim z2_5_0.q(10)
   Dim z2_10_0.q(10)
   Dim z2_20_0.q(10)
   Dim z2_50_0.q(10)
   Dim z2_100_0.q(10)
   Dim t0.q(10)
   Dim t1.q(10)
   
   Protected i
   
   Comment("/* 2              */") fsquare(@z2(),*z)
   Comment("/* 4              */") fsquare(@t1(),@z2())
   Comment("/* 8              */") fsquare(@t0(),@t1())
   Comment("/* 9              */") fmul(@z9(),@t0(),*z)
   Comment("/* 11             */") fmul(@z11(),@z9(),@z2())
   Comment("/* 22             */") fsquare(@t0(),@z11())
   Comment("/* 2^5 - 2^0 = 31 */") fmul(@z2_5_0(),@t0(),@z9())
   
   Comment("/* 2^6 - 2^1      */") fsquare(@t0(),@z2_5_0())
   Comment("/* 2^7 - 2^2      */") fsquare(@t1(),@t0())
   Comment("/* 2^8 - 2^3      */") fsquare(@t0(),@t1())
   Comment("/* 2^9 - 2^4      */") fsquare(@t1(),@t0())
   Comment("/* 2^10 - 2^5     */") fsquare(@t0(),@t1())
   Comment("/* 2^10 - 2^0     */") fmul(@z2_10_0(),@t0(),@z2_5_0())
   
   Comment("/* 2^11 - 2^1     */") fsquare(@t0(),@z2_10_0())
   Comment("/* 2^12 - 2^2     */") fsquare(@t1(),@t0())
   Comment("/* 2^20 - 2^10    */") i=2 : While i < 10 : fsquare(@t0(),@t1()) : fsquare(@t1(),@t0()) : i + 2 : Wend
   Comment("/* 2^20 - 2^0     */") fmul(@z2_20_0(),@t1(),@z2_10_0())
   
   Comment("/* 2^21 - 2^1     */") fsquare(@t0(),@z2_20_0())
   Comment("/* 2^22 - 2^2     */") fsquare(@t1(),@t0())
   Comment("/* 2^40 - 2^20    */") i=2 : While i < 20 : fsquare(@t0(),@t1()) : fsquare(@t1(),@t0()) : i + 2 : Wend
   Comment("/* 2^40 - 2^0     */") fmul(@t0(),@t1(),@z2_20_0())
   
   Comment("/* 2^41 - 2^1     */") fsquare(@t1(),@t0())
   Comment("/* 2^42 - 2^2     */") fsquare(@t0(),@t1())
   Comment("/* 2^50 - 2^10    */") i=2 : While i < 10 : fsquare(@t1(),@t0()) : fsquare(@t0(),@t1()) : i + 2 : Wend
   Comment("/* 2^50 - 2^0     */") fmul(@z2_50_0(),@t0(),@z2_10_0())
   
   Comment("/* 2^51 - 2^1     */") fsquare(@t0(),@z2_50_0())
   Comment("/* 2^52 - 2^2     */") fsquare(@t1(),@t0())
   Comment("/* 2^100 - 2^50   */") i=2 : While i < 50 : fsquare(@t0(),@t1()) : fsquare(@t1(),@t0()) : i + 2 : Wend
   Comment("/* 2^100 - 2^0    */") fmul(@z2_100_0(),@t1(),@z2_50_0())
   
   Comment("/* 2^101 - 2^1    */") fsquare(@t1(),@z2_100_0())
   Comment("/* 2^102 - 2^2    */") fsquare(@t0(),@t1())
   Comment("/* 2^200 - 2^100  */") i=2 : While i < 100 : fsquare(@t1(),@t0()) : fsquare(@t0(),@t1()): i + 2 : Wend
   Comment("/* 2^200 - 2^0    */") fmul(@t1(),@t0(),@z2_100_0())
   
   Comment("/* 2^201 - 2^1    */") fsquare(@t0(),@t1())
   Comment("/* 2^202 - 2^2    */") fsquare(@t1(),@t0())
   Comment("/* 2^250 - 2^50   */") i=2 : While i < 50 : fsquare(@t0(),@t1()) : fsquare(@t1(),@t0()) : i + 2 : Wend
   Comment("/* 2^250 - 2^0    */") fmul(@t0(),@t1(),@z2_50_0())
   
   Comment("/* 2^251 - 2^1    */") fsquare(@t1(),@t0())
   Comment("/* 2^252 - 2^2    */") fsquare(@t0(),@t1())
   Comment("/* 2^253 - 2^3    */") fsquare(@t1(),@t0())
   Comment("/* 2^254 - 2^4    */") fsquare(@t0(),@t1())
   Comment("/* 2^255 - 2^5    */") fsquare(@t1(),@t0())
   Comment("/* 2^255 - 21     */") fmul(*out,@t1(),@z11())
EndProcedure
Procedure.i curve25519_donna(*mypublic.u8, *secret.u8, *basepoint.u8)
   ; Dim of type felem:
   Dim bp.q(10)
   Dim x.q(10)
   Dim z.q(10)
   Dim zmone.q(10)
   
   ; Dim of type uint8_t
   Dim e.a(32)
   
   Protected i
   
   i = 0
   While i < 32
      e(i) = *secret\a[i]
      i + 1
   Wend
   
   
   e(0)  & 248
   e(31) & 127
   e(31) | 64
   
   
   fexpand(@bp(), *basepoint)
   cmult(@x(), @z(), @e(), @bp())
   crecip(@zmone(), @z())
   fmul(@z(), @x(), @zmone())
   fcontract(*mypublic, @z())
   ProcedureReturn 0
EndProcedure
Enumeration 1 
   #DHStateGenKeys
   #DHStateSendPublic
   #DHStateGenShared
   #DHStateComplete
EndEnumeration 
Structure DHKeys 
   Ksecret.a[32]
   Kpublic.a[32]
   Kshared.a[32]
   kBase.a[32] 
EndStructure 
Structure EC 
   *vt 
   salt.a[32]
   keys.DHkeys 
   status.i 
EndStructure 
Procedure NewEC(salt.s) 
   Protected *this.EC,SHA3.s,b,a  
   *this = AllocateMemory(SizeOf(EC)) 
   If *this 
      *this\vt = ?EC_vt 
      If salt <> "" 
         SHA3 = Fingerprint(@salt,Len(salt),#PB_Cipher_SHA3,256) 
         For a = 1 To 64 Step 2
            *this\salt[b] = Val("$"+Mid(SHA3,a,2))
            b+1
         Next 
      EndIf    
      ProcedureReturn *this 
   EndIf 
EndProcedure   
Procedure.s EC_GenKeys(*this.EC) 
   Protected *ptr.Ascii ,a,sout.s
   If OpenCryptRandom()  
      CryptRandomData(@*this\keys\Ksecret,32)
      CloseCryptRandom() 
      *ptr = @*this\keys\Ksecret 
      *ptr\a  & 248 
      *ptr+31
      *ptr\a & 127 
      *ptr\a | 64 
      FillMemory(@*this\keys\kBase,32) 
      FillMemory(@*this\keys\Kpublic,32) 
      *this\keys\kBase[0]=9 
      curve25519_donna(@*this\keys\Kpublic,@*this\keys\Ksecret,@*this\keys\kBase)
      
      For a=0 To 31
         *this\keys\kpublic[a] ! *this\salt[a] 
         sout+ RSet(Hex(*this\keys\Kpublic[a],#PB_Byte),2,"0")
      Next  
      
      ProcedureReturn sout  
   EndIf      
EndProcedure  
Procedure EC_SaveKeys(*this.EC,file.s) 
  Protected *ptr.Ascii,fn 
  If Not *this\keys\Ksecret 
    If OpenCryptRandom()  
      CryptRandomData(@*this\keys\Ksecret,32)
      CloseCryptRandom() 
      *ptr = @*this\keys\Ksecret 
      *ptr\a  & 248 
      *ptr+31
      *ptr\a & 127 
      *ptr\a | 64 
      FillMemory(@*this\keys\kBase,32) 
      FillMemory(@*this\keys\Kpublic,32) 
      *this\keys\kBase[0]=9 
      curve25519_donna(@*this\keys\Kpublic,@*this\keys\Ksecret,@*this\keys\kBase)
    EndIf 
  EndIf
  
  fn = OpenFile(#PB_Any,file)
  If fn 
    WriteData(fn,@*this\keys\Ksecret,32)
    WriteData(fn,@*this\keys\kBase,32) 
    WriteData(fn,@*this\keys\Kpublic,32) 
    CloseFile(fn) 
  EndIf   
        
EndProcedure 
Procedure.s EC_LoadKeys(*this.EC,file.s)
  Protected fn,a,sout.s
  fn = ReadFile(#PB_Any,file)
  If fn 
    ReadData(fn,@*this\keys\Ksecret,32)
    ReadData(fn,@*this\keys\kBase,32)
    ReadData(fn,@*this\keys\Kpublic,32) 
    CloseFile(fn) 
    For a=0 To 31
      sout+ RSet(Hex(*this\keys\Kpublic[a],#PB_Byte),2,"0")
    Next  
    ProcedureReturn sout 
  EndIf 
EndProcedure   
Procedure.s EC_GetKey(*this.EC,public.s) 
   Protected sout.s ,a.i,*mem,*key.Ascii,b  
   *mem = AllocateMemory(32) 
   *key= *mem 
   For a = 1 To 64 Step 2
      *key\a = Val("$"+Mid(public,a,2))
      *key\a ! *this\salt[b] 
      *key+1
      b+1
   Next 
   curve25519_donna(@*this\keys\Kshared,@*this\keys\Ksecret,*mem)
   For a=0 To 31
      sout+ RSet(Hex(*this\keys\Kshared[a],#PB_Byte),2,"0")
   Next  
   FreeMemory(*mem) 
   ProcedureReturn sout 
EndProcedure   
Procedure EC_Free(*this.EC) 
   FreeMemory(*this)
EndProcedure   
;-Curve validation code for testing purposes 
CompilerIf #UseValidation
   ;Test procedure for validation of the curve 
   Procedure.i Validation(*mypublic.u8, *secret.u8, *basepoint.u8)
      Dim bp.q(10)
      Dim x.q(10)
      Dim z.q(10)
      Dim zmone.q(10)
      Dim e.a(32)
      
      Protected i
      
      i = 0
      While i < 32
         e(i) = *secret\a[i]
         i + 1
      Wend
      
      fexpand(@bp(), *basepoint)
      cmult(@x(), @z(), @e(), @bp())
      crecip(@zmone(), @z())
      fmul(@z(), @x(), @zmone())
      fcontract(*mypublic, @z())
      ProcedureReturn 0
   EndProcedure
   ;
   Procedure validate()
      ;validation test of curve25519 
      
      Protected i,loop
      Protected et,st 
      
      st=ElapsedMilliseconds() 
      Protected Dim e1.a(31)
      Protected Dim e2.a(31)
      Protected Dim k.a(31)
      Protected Dim e1k.a(31)
      Protected Dim e2k.a(31)
      Protected Dim e1e2k.a(31)
      Protected Dim e2e1k.a(31)
      Protected Dim expp.a(31)
      
      e1(0) = 3
      e2(0) = 5
      k(0)  = 9
      
      expp( 0) = $be:expp( 1) = $4c:expp( 2) = $62:expp( 3) = $08:expp( 4) = $29:expp( 5) = $3f:expp( 6) = $81:expp( 7) = $1a
      expp( 8) = $15:expp( 9) = $4b:expp(10) = $9c:expp(11) = $42:expp(12) = $f7:expp(13) = $87:expp(14) = $dd:expp(15) = $90
      expp(16) = $9f:expp(17) = $07:expp(18) = $5c:expp(19) = $61:expp(20) = $1b:expp(21) = $82:expp(22) = $c3:expp(23) = $03
      expp(24) = $50:expp(25) = $ed:expp(26) = $c9:expp(27) = $fe:expp(28) = $6e:expp(29) = $83:expp(30) = $ad:expp(31) = $4a
      
      For loop=0 To 9
         Validation(@e1k(),  @e1(),  @k())
         Validation(@e2e1k(),@e2(),  @e1k())
         Validation(@e2k(),  @e2(),  @k())
         Validation(@e1e2k(),@e1(),  @e2k())
         
         For i=0 To 31
            If e1e2k(i) <> e2e1k(i)
               
               ProcedureReturn #False
            EndIf
         Next
         
         For i=0 To 31 : e1(i) = e1(i) ! e2k(i)   : Next i
         For i=0 To 31 : e2(i) = e2(i) ! e1k(i)   : Next i
         For i=0 To 31 : k (i) = k (i) ! e1e2k(i) : Next i
         
      Next loop
      
      For i=0 To 31
         If e1e2k(i) <> expp(i)
            
            ProcedureReturn #False
         EndIf
      Next
      
      et=ElapsedMilliseconds() 
      ProcedureReturn et-st ;#True
      
   EndProcedure   
CompilerEndIf 
;-end of validation code 
DataSection  : EC_vt: 
   Data.i @EC_Genkeys()
   Data.i @EC_GetKey()
   Data.i @EC_SaveKeys() 
   Data.i @EC_LoadKeys()
   Data.i @EC_Free() 
EndDataSection 
EndModule 
CompilerIf #PB_Compiler_IsMainFile
  
  CompilerIf #PB_Compiler_Backend = #PB_Backend_C 
    !//gccflags -O3 -mavx ;
    !//UseClang ;  
  CompilerEndIf 
  
   Define.modEC::iEC  client,server,man_client,man_server 
   Global clients_public_key.s, clients_shared_secret.s, servers_public_key.s,servers_shared_secret.s
   Global man_client_public_key.s,man_client_shared_secret.s,verify.s
   Global man_server_public_key.s,man_Server_shared_secret.s   
   
   OpenConsole()
   ConsoleTitle("Curve25519 Test")
   
   client = modEC::NewEC("salt n pepper")  ;Create new EC context with out of channel passphrase 
   server  = modEC::NewEC("salt n pepper") 
   
   clients_public_key = client\GenKeys()   ;Client generates keys  -> sends the public key  to server 
   servers_public_key = server\GenKeys()  ; Server generates keys -> returns the public key to client  
   
   client\SaveKeys("EC_Keys")   ;test save: saves the whole keyset    
   clients_public_key = client\LoadKeys("EC_Keys") ;loads a whole key set and returns the public key  
      
   PrintN("servers public key:" + servers_public_key)
   PrintN("clients public  key:" + clients_public_key)    
   
   Clients_shared_secret = client\getkey(servers_public_key)  ;Client plugs in the servers public key to get the secret encyption key   
   Servers_shared_secret = server\getkey(clients_public_key)  ;Server plugs in the Clients public key to get the secret encryption key 
   
   ;from this point the client and server can now use the encyption key to transfer encypted data to perform a log in...  
   ;the pass phrase mitigates the risk of a man in the middle attack
   
   PrintN("clients encryption key : " + clients_shared_secret)    
   PrintN("servers encyrption key : " + servers_shared_secret) 
      
   If clients_shared_secret = servers_shared_secret 
      PrintN("keys equal")
   Else
      PrintN("keys failed")
   EndIf
   
   client\free()
   server\Free() 
   
   ;Simulate a man in the middle attack without salt the process will succeed 
   PrintN("")
   PrintN("simulate man in the middle attack without a salt the process will succeed")
   
   client = modEC::NewEC("")   ;create a new EC context without a pass phrase 
   server  = modEC::NewEC("")
   man_client = modEC::newEC("")    ;man in the middle creates EC contexts between client and server 
   man_server = modEC::newEC("")
   
   clients_public_key = client\GenKeys()   ;Client generates keys  -> sends the public key  to server  but will get intercepted by man in middle 
   servers_public_key = server\GenKeys()  ; Server generates keys -> returns the public key but will get intercepted by man in middle   
   man_client_public_key = man_client\Genkeys()   ;man in middle creates key for the client  
   man_server_public_key = man_server\Genkeys()  ;man in middle creates key for server 
   
   Clients_shared_secret = client\getkey(man_client_public_key)  ;Client recieves man in middle key    
   man_client_shared_secret = man_client\GetKey(clients_public_key)   ; man gets shared secret with client   
   
   man_Server_shared_secret  = man_server\GetKey(servers_public_key)   ;man get's servers shared secret  
   Servers_shared_secret = server\Getkey(man_server_public_key)  ;Server gets mans shared secret  
      
   PrintN("clients shared secret: " + clients_shared_secret) 
   PrintN("man in middle secret: " + man_client_shared_secret)  
   
   If clients_shared_secret = man_client_shared_secret 
      PrintN("keys equal")
   Else
      PrintN("keys failed")
   EndIf
   
   
   PrintN("server shared secret: " + servers_shared_secret)
   PrintN("man in middle secret: " + man_Server_shared_secret)
   
   If servers_shared_secret = man_Server_shared_secret 
      PrintN("keys equal")
   Else
      PrintN("keys failed")
   EndIf
   
   client\Free()
   server\Free()
   man_client\Free()
   man_server\Free()
   
   ;simulate man in the middle attack with salt the process will fail 
   PrintN("")
   PrintN("simulate man in the middle attack wit salt the process will fail")
   
   client = modEC::NewEC("salt n pepper")   ;create a new EC context with an out of channel pass phrase which is only known to client and server know 
   server  = modEC::NewEC("salt n pepper")
   man_client = modEC::newEC("")    ;man in the middle creates EC contexts between client and server 
   man_server = modEC::newEC("")
   
   clients_public_key = client\GenKeys()   ;Client generates keys  -> sends the public key  to server  but will get intercepted by man in middle 
   servers_public_key = server\GenKeys()  ; Server generates keys -> returns the public key but will get intercepted by man in middle   
   man_client_public_key = man_client\Genkeys()   ;man in middle creates key for the client  
   man_server_public_key = man_server\Genkeys()  ;man in middle creates key for server 
   
   Clients_shared_secret = client\getkey(man_client_public_key)  ;Client recieves man in middle key    
   man_client_shared_secret = man_client\GetKey(clients_public_key)   ; man gets shared secret with client   
   
   man_Server_shared_secret  = man_server\Getkey(servers_public_key)   ;man get's servers shared secret  
   Servers_shared_secret = server\Getkey(man_server_public_key)  ;Server gets mans shared secret  
   
   
   PrintN("clients shared secret: " + clients_shared_secret) 
   PrintN("man in middle secret: " + man_client_shared_secret)  
   
   If clients_shared_secret = man_client_shared_secret 
      PrintN("keys equal")
   Else
      PrintN("keys failed")
   EndIf
   
   
   PrintN("server shared secret: " + servers_shared_secret)
   PrintN("man in middle secret: " + man_Server_shared_secret)
   
   If servers_shared_secret = man_Server_shared_secret 
      PrintN("keys equal")
   Else
      PrintN("keys failed")
   EndIf
   
   client\Free()
   server\Free()
        
   man_client\Free()
   man_server\Free()
   
   PrintN("")
   PrintN("Do validation test")
   Global validate 
   validate = modEC::validate() 
   If validate 
      PrintN("validated in " + validate)
   Else 
      PrintN("failed")
   EndIf     
   
   
   PrintN("")
   PrintN("PRESS 'RETURN' TO QUIT.")
   Input()
   CloseConsole()
   
CompilerEndIf