SHA-2/HMAC for 64-Bit-Windows

Share your advanced PureBasic knowledge/code with the community.
Helle
Enthusiast
Enthusiast
Posts: 178
Joined: Wed Apr 12, 2006 7:59 pm
Location: Germany
Contact:

SHA-2/HMAC for 64-Bit-Windows

Post by Helle »

In extension of http://www.purebasic.fr/english/viewtop ... 13&t=47067 is this the 64-Bit-Version for SHA224/256/384/512 and HMAC:

Code: Select all

;- SHA-2/HMAC, based on engl. Wikipedia (Pseudo-Codes)
;- "Helle" Klaus Helbing, 25.08.2011, PB 4.51 (x64)
;- 64-Bit-Windows-Version with SSE2
;- Optimierungen für die "Krücken" SHA-224 und SHA-384 habe ich mir verkniffen
;- Parts of Buffer: 0-7=Length.q, 8-15=Chunks.q, 16-23=MemAv.q, 24-31=pk.q, 32-103=p_a_h_A, 112-175=pHashA, 176-815=pWa, 816-823=ppAllBlocksA.q, 832-?=pAllBlocksA

Procedure Padding_224_256(BufferA)
  ;Size wird auf Vielfaches von 512 Bit (64 Byte) gebracht (Blocklänge ist 512 Bit)
!mov rcx,[rsp+48]          ;BufferA, if debugging on, can delete for an EXE    
  !mov r8,[rcx+816]          ;ppAllBlocksA       
  !add r8,rcx                ;zeigt auf pAllBlocks
  
  !mov rax,[rcx+16]          ;Value MemAv
  !mov r9,rax

  !mov r10,64                ;512 Bit
  !xor rdx,rdx               ;hier mal so (ist ja unsigned)
  !div r10                   ;Modulo steht in RDX

  !inc rax                   ;eine Runde wollen wir ja wenigstens! 
  !mov [rcx+8],rax           ;Value Chunks
  !mov rax,[rcx+16]          ;Value MemAv
  !mov byte[r8+rax],80h      ;1 gesetztes Bit anhängen
  !or rdx,rdx                ;RDX=0?
  ;1.Fall: RDX=0. Letzter 512-Bit-Block ist Vielfaches von 512 (64 Byte). Es wird ein kompletter 512-Bit-Block angehängt   
  !jz @f 
  !sub r9,rdx
  ;2.Fall: RDX<56. Letzter 512-Bit-Block ist kleiner als 448 Bit (56 Byte), aber größer Null
  !cmp rdx,56                ;448 Bit
  !jb @f
  ;3.Fall: RDX>=56. Letzter 512-Bit-Block ist größer/gleich als 448 Bit (56 Byte). Dieser Block wird aufgefüllt (Bitwert 1 drangehängt,
  ; Rest Null) und ein weiterer Block drangehängt mit Nullen und am Ende Original-Länge als 64-Bit-Big-Endian-Wert
  !add r8,64
  !add qword[rcx+8],1        ;es wurde ja ein Block drangehängt 1 Chunk mehr
!@@:  
  !mov rax,[rcx]             ;Value Size
  !shl rax,3                 ;reicht so für Filelänge ca. 2.3EB (ExaBytes, 10^18)
  !bswap rax
  !mov [r8+r9+56],rax        ;Original-Länge als 64-Bit-Big-Endian-Wert anhängen

EndProcedure 

Procedure Padding_384_512(BufferA)
  ;Size wird auf Vielfaches von 1024 Bit (128 Byte) gebracht (Blocklänge ist 1024 Bit)
!mov rcx,[rsp+48]          ;BufferA, if debugging on
  !mov r8,[rcx+816]          ;ppAllBlocksA
  !add r8,rcx                ;zeigt auf pAllBlocks

  !mov rax,[rcx+16]          ;Value MemAv
  !mov r9,rax

  !mov r10,128               ;1024 Bit
  !xor rdx,rdx               ;hier mal so (ist ja unsigned)
  !div r10                   ;Modulo steht in RDX

  !inc rax                   ;eine Runde wollen wir ja wenigstens! 
  !mov [rcx+8],rax           ;Value Chunks
  !mov rax,[rcx+16]          ;Value MemAv
  !mov byte[r8+rax],80h      ;1 gesetztes Bit anhängen
  !or rdx,rdx                ;RDX=0?
  ;1.Fall: RDX=0. Letzter 1024-Bit-Block ist Vielfaches von 1024 Bit (128 Byte). Es wird ein kompletter 1024-Bit-Block angehängt   
  !jz @f 
  !sub r9,rdx
  ;2.Fall: RDX<112. Letzter 1024-Bit-Block ist kleiner als 896 Bit (112 Byte), aber größer Null
  !cmp rdx,112               ;896 Bit
  !jb @f
  ;3.Fall: RDX>=112. Letzter 1024-Bit-Block ist größer/gleich als 896 Bit (112 Byte). Dieser Block wird aufgefüllt (Bitwert 1 drangehängt,
  ; Rest Null) und ein weiterer Block drangehängt mit Nullen und am Ende Original-Länge als 128-Bit-Big-Endian-Wert
  !add r8,128
  !add qword[rcx+8],1        ;es wurde ja ein Block drangehängt 1 Chunk mehr
!@@:  
  !mov rax,[rcx]             ;Value Size
  !shl rax,3                 ;reicht so für Filelänge ca. 2.3EB (ExaBytes, 10^18)
  !bswap rax
  !mov [r8+r9+120],rax       ;Original-Länge als 128-Bit-Big-Endian-Wert anhängen, hier nur der Low-Wert als Quad

EndProcedure 

Procedure Main_224_256(BufferA)
!mov rcx,[rsp+48]          ;BufferA, if debugging on
  ;W[0] bis W[15]
  !mov r11,rcx
  !add r11,[rcx+816]         ;ppAllBlocksA
  !mov r10,rcx
  !add r10,176               ;pWa  
  !mov rdx,16
!@@:
  !mov eax,[r11]
  !bswap eax
  !mov [r10],eax
  !add r10,4
  !add r11,4
  !sub rdx,1
  !jnz @b
  ;W[16] bis W[63]
  !mov r9,24                 ;64-16
  !mov r11,rcx
  !add r11,176+64            ;pWA+64  
!@@:
  ;s0
  !movq xmm0,[r11-60]        ;W[1] 2
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !psrld xmm0,7
  !pslld xmm1,25
  !por xmm0,xmm1
  !movdqa xmm1,xmm2
  !psrld xmm1,18
  !pslld xmm2,14
  !por xmm1,xmm2
  !psrld xmm3,3 
  !pxor xmm0,xmm1
  !pxor xmm0,xmm3            ;s0
  ;s1
  !movq xmm1,[r11-8]         ;W[14] 15
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !psrld xmm1,17
  !pslld xmm2,15
  !por xmm1,xmm2
  !movdqa xmm2,xmm3
  !psrld xmm2,19
  !pslld xmm3,13
  !por xmm2,xmm3
  !psrld xmm4,10
  !pxor xmm1,xmm2
  !pxor xmm1,xmm4            ;s1
  ;W[i]
  !movq xmm2,[r11-64]        ;W[0] 1
  !paddd xmm2,xmm0
  !paddd xmm2,xmm1
  !movq xmm3,[r11-28]        ;W[9] 10
  !paddd xmm2,xmm3
  !movq [r11],xmm2

  !add r11,8
  !sub r9,1
  !jnz @b
  ;Initialisierung
  !mov r9,rcx
  !add r9,32                 ;p_a_h_A
  !mov r11,rcx
  !add r11,112               ;pHashA
  !movdqa xmm0,[r11]
  !movdqa xmm1,[r11+16]
  !movdqa [r9],xmm0
  !movdqa [r9+16],xmm1
  ;Main Loop
  !mov r11,[rcx+24]          ;k_224_256, siehe Data
  !xor rcx,rcx               ;hier mal so
!@@:
  ;s0
  !mov eax,[r9]              ;a
  !mov edx,eax
  !mov r10d,eax
  !ror eax,2
  !ror edx,13
  !xor eax,edx
  !ror r10d,22
  !xor r10d,eax              ;R10d=s0
  ;maj (major)               maj = (a and b) + (c and (a xor b)) NEU!
  !mov eax,[r9]              ;a
  !mov edx,eax
  !mov r8d,[r9+4]            ;b
  !xor eax,r8d               ;(a xor b)
  !and r8d,edx               ;(a and b)
  !and eax,[r9+8]            ;c
  !add r8d,eax               ;R8d=maj
  ;t2
  !add r8d,r10d              ;R8d=t2, R10d=s0
  ;s1
  !mov eax,[r9+16]           ;e
  !mov edx,eax
  !mov r10d,eax
  !ror eax,6
  !ror edx,11
  !xor eax,edx
  !ror r10d,25
  !xor r10d,eax              ;R10d=s1
  ;ch                         ch = g xor (e and (f xor g)) NEU!
  !mov eax,[r9+24]           ;g
  !mov edx,eax
  !xor edx,[r9+20]           ;f
  !and edx,[r9+16]           ;e
  !xor edx,eax
  ;t1
  !mov eax,[r9+28]           ;h
  !add eax,r10d              ;s1
  !add eax,edx               ;ch
  !add eax,[r11+rcx]         ;k[i]
  !mov r10,r9                ;R9=BufferA+32
  !add r10,144               ;=176=pWA

  !add eax,[r10+rcx]         ;EAX=t1, R10+RCX=W[i]
  ;Vertauschungen
  !mov edx,[r9+12]           ;"old" d
  !add edx,eax               ;EAX=t1
  !movdqa xmm0,[r9]
  !movdqa xmm1,[r9+16]
  !movdqu [r9+4],xmm0        ;mit Versatz von 4 Bytes zurückkopieren 
  !movdqu [r9+20],xmm1

  !add eax,r8d               ;EAX=t1, R8d=t2
  !mov [r9],eax              ;"new" a
  !mov [r9+16],edx           ;"new" e 

  !add rcx,4
  !cmp rcx,256
  !jb @b

  !add qword[r10+816-176],64 ;ppAllBlocksA, 512 Bit (64 Byte) weiter, nächster Chunk. R10 ist 176 (=pWA)
  ;am Ende jedes Chunks Hashs zu vorhandenen Werten aufaddieren (Überträge werden ignoriert!)
  !sub r10,64                ;176-64=112=pHashA
  !movdqa xmm0,[r10]  
  !movdqa xmm1,[r10+16]
  !paddd xmm0,[r9]
  !paddd xmm1,[r9+16]
  !movdqa [r10],xmm0
  !movdqa [r10+16],xmm1

EndProcedure   

Procedure Main_384_512(BufferA)
!mov rcx,[rsp+48]          ;BufferA, if debugging on
  ;W[0] bis W[15]
  !mov r11,rcx
  !add r11,[rcx+816]         ;ppAllBlocksA
  !mov r10,rcx
  !add r10,176               ;pWA
  !mov rdx,16
!@@:
  !mov rax,[r11]
  !bswap rax
  !mov [r10],rax
  !add r10,8
  !add r11,8
  !sub rdx,1
  !jnz @b
  ;W[16] bis W[79]
  !mov r9,32                 ;80-16
  !mov r11,rcx
  !add r11,176+128           ;pWA+128
!@@:
  ;s0
  !movdqu xmm0,[r11-120]     ;W[1] 2
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !psrlq xmm0,1
  !psllq xmm1,63
  !por xmm0,xmm1
  !movdqa xmm1,xmm2
  !psrlq xmm1,8
  !psllq xmm2,56
  !por xmm1,xmm2
  !psrlq xmm3,7 
  !pxor xmm0,xmm1
  !pxor xmm0,xmm3            ;s0
  ;s1
  !movdqa xmm1,[r11-16]      ;W[14] 15  
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !psrlq xmm1,19
  !psllq xmm2,45
  !por xmm1,xmm2
  !movdqa xmm2,xmm3
  !psrlq xmm2,61
  !psllq xmm3,3
  !por xmm2,xmm3
  !psrlq xmm4,6
  !pxor xmm1,xmm2
  !pxor xmm1,xmm4            ;s1
  ;W[i]
  !movdqa xmm2,[r11-128]     ;W[0] 1
  !paddq xmm2,xmm0
  !paddq xmm2,xmm1
  !movdqu xmm3,[r11-56]      ;W[9] 10
  !paddq xmm2,xmm3
  !movdqa [r11],xmm2         ;W[16] 17

  !add r11,16
  !sub r9,1
  !jnz @b
  ;Initialisierung
  !mov r9,rcx
  !add r9,32                 ;p_a_h_A
  !mov r11,rcx
  !add r11,112               ;pHashA
  !movdqa xmm0,[r11]
  !movdqa xmm1,[r11+16]
  !movdqa xmm2,[r11+32]
  !movdqa xmm3,[r11+48]
  !movdqa [r9],xmm0
  !movdqa [r9+16],xmm1
  !movdqa [r9+32],xmm2
  !movdqa [r9+48],xmm3
  ;Main Loop
  !mov r11,[rcx+24]          ;k_384_512, siehe Data
  !xor rcx,rcx               ;hier mal so
!@@:
  ;s0
  !mov rax,[r9]              ;a
  !mov rdx,rax
  !mov r10,rax
  !ror rax,28
  !ror rdx,34
  !xor rax,rdx
  !ror r10,39
  !xor r10,rax               ;R10=s0
  ;maj (major)               maj = (a and b) + (c and (a xor b)) NEU!
  !mov rax,[r9]              ;a
  !mov rdx,rax
  !mov r8,[r9+8]             ;b
  !xor rax,r8                ;(a xor b)
  !and r8,rdx                ;(a and b)
  !and rax,[r9+16]           ;c
  !add r8,rax                ;R8=maj
  ;t2
  !add r8,r10                ;R8=t2, R10=s0
  ;s1
  !mov rax,[r9+32]           ;e
  !mov rdx,rax
  !mov r10,rax
  !ror rax,14
  !ror rdx,18
  !xor rax,rdx
  !ror r10,41
  !xor r10,rax               ;R10=s1
  ;ch                         ch = g xor (e and (f xor g)) NEU!
  !mov rax,[r9+48]           ;g
  !mov rdx,rax
  !xor rdx,[r9+40]           ;f
  !and rdx,[r9+32]           ;e
  !xor rdx,rax
  ;t1
  !mov rax,[r9+56]           ;h
  !add rax,r10               ;s1
  !add rax,rdx               ;ch
  !add rax,[r11+rcx]         ;k[i]
  !mov r10,r9                ;R9=BufferA+32
  !add r10,144               ;=176=pWA

  !add rax,[r10+rcx]         ;RAX=t1, R10+RCX=W[i]

  ;Vertauschungen
  !mov rdx,[r9+24]           ;"old" d
  !add rdx,rax               ;RAX=t1
  !movdqa xmm0,[r9]
  !movdqa xmm1,[r9+16]
  !movdqa xmm2,[r9+32]
  !movdqa xmm3,[r9+48]
  !movdqu [r9+8],xmm0        ;mit Versatz von 8 Bytes zurückkopieren 
  !movdqu [r9+24],xmm1
  !movdqu [r9+40],xmm2
  !movdqu [r9+56],xmm3

  !add rax,r8                ;RAX=t1, R8=t2
  !mov [r9],rax              ;"new" a
  !mov [r9+32],rdx           ;"new" e 

  !add rcx,8
  !cmp rcx,640
  !jb @b

  !add qword[r10+816-176],128     ;1024 Bit (128 Byte) weiter, nächster Chunk
  ;am Ende jedes Chunks Hashs zu vorhandenen Werten aufaddieren (Überträge werden ignoriert!)
  !sub r10,64                ;176-64=112=pHashA

  !movdqa xmm0,[r10]  
  !paddq xmm0,[r9]
  !movdqa [r10],xmm0
  !movdqa xmm0,[r10+16]
  !paddq xmm0,[r9+16]
  !movdqa [r10+16],xmm0
  !movdqa xmm0,[r10+32]  
  !paddq xmm0,[r9+32]
  !movdqa [r10+32],xmm0
  !movdqa xmm0,[r10+48]
  !paddq xmm0,[r9+48]
  !movdqa [r10+48],xmm0

EndProcedure   

Procedure.s SHA_Calculate_64(*Source, Length, Mode)
  SizeAv = Length

  HMAC = Mode & $40
  File = Mode & $80

  pMem = Mode 
  pMem & $FFFFFFFFFFFFFF00             ;is Alignment 256, Byte0 = 0
  Mode & $3F

  If Length > $40000 
    MemAv = $40000                     ;Split-Size 256KB
    LCopy = $40000
    If Mode < 2
      Chunks = $1000
     Else 
      Chunks = $800
    EndIf
    ChunksOld = Chunks
   Else
    MemAv = Length
    LCopy = Length
  EndIf

  Buffer = AllocateMemory(832 + MemAv + 128)
  i = Buffer % 16
  If i 
    BufferA = Buffer - i + 16          ;Alignment 16
   Else
    BufferA = Buffer
  EndIf

  Select Mode                          ;set HashBase
    Case 0
      HashBase = ?HashBase_224
      LHB = 32                         ;Length HashBase
      PokeQ(BufferA + 24, ?k_224_256)
      SZ = 24                          ;Loop-Counter
      ProgFac.d = 0.064                ;for Calculation-Progress
    Case 1
      HashBase = ?HashBase_256
      LHB = 32                         ;Length HashBase
      PokeQ(BufferA + 24, ?k_224_256)
      SZ = 28                          ;Loop-Counter
      ProgFac.d = 0.064                ;for Calculation-Progress
    Case 2
      HashBase = ?HashBase_384
      LHB = 64                         ;Length HashBase
      PokeQ(BufferA + 24, ?k_384_512)
      SZ = 40                          ;Loop-Counter
      ProgFac.d = 0.128                ;for Calculation-Progress
    Case 3
      HashBase = ?HashBase_512
      LHB = 64                         ;Length HashBase
      PokeQ(BufferA + 24, ?k_384_512)
      SZ = 56                          ;Loop-Counter
      ProgFac.d = 0.128                ;for Calculation-Progress
  EndSelect

  CopyMemory(HashBase, BufferA + 112, LHB)  ;to pHashA

  PokeQ(BufferA, Length)

  pAllBlocksStart = 832
  PokeQ(BufferA + 816, pAllBlocksStart)     ;Pointer in pAllBlocksA

  ChunksSum.d = 0
  ChunksSum1.d = 0      
  ChunksAll.d = (Length / (ProgFac * 1000)) + 1   ;for Calculation-Progress
  TF1 = 0
  TF2 = 0

  If HMAC
    HMAC = 0
    HMACVal = LHB << 1
    If File
      SizeAv - HMACVal
    EndIf
    CopyMemory(pMem, BufferA + pAllBlocksStart, HMACVal)   ;to pAllBlocksA
  EndIf

  If Length = 0                        ;damit auch Länge=0 berücksichtigt wird
    PokeQ(BufferA + 16, MemAv)         ;MemAv=0
    If Mode < 2
      Padding_224_256(BufferA)
      Main_224_256(BufferA)
     Else
      Padding_384_512(BufferA)
      Main_384_512(BufferA)
    EndIf
  EndIf  

  Seek = 0

  While SizeAv > 0

    If File
      BytesRead = ReadData(*Source, BufferA + pAllBlocksStart + HMACVal, MemAv - HMACVal)     ;Datei in Speicher einlesen, *Source=File-Handle
      Seek + BytesRead
      FileSeek(*Source, Seek)        
      SizeAv - BytesRead
     Else 
      CopyMemory(*Source + Seek, BufferA + pAllBlocksStart + HMACVal, LCopy - HMACVal)   ;to pAllBlocksA
      BytesCopy = LCopy
      Seek + BytesCopy
      SizeAv - BytesCopy
      If SizeAv < LCopy    
        LCopy = SizeAv  
      EndIf 
    EndIf

    HMACVal = 0

    If SizeAv = 0 
      PokeQ(BufferA + 16, MemAv)
      If Mode < 2
        Padding_224_256(BufferA)
       Else
        Padding_384_512(BufferA)
      EndIf
      Chunks = PeekQ(BufferA + 8)
    EndIf

    While Chunks                       ;512 Bit (64 Byte)
      If Mode < 2
        Main_224_256(BufferA)
       Else
        Main_384_512(BufferA)
      EndIf

      ChunksSum + 1                    ;Calculation-Progress
      TF = ElapsedMilliseconds()
      If TF - TF1 > 250
        If TF - TF2 > 500
          SetGadgetText(12, StrD((ChunksSum - ChunksSum1) * ProgFac / (TF - TF2), 3))
          SetGadgetText(15, StrD((ChunksSum / ChunksAll) * 100, 1) + "%")
          TF2 = TF
          ChunksSum1 = ChunksSum
        EndIf 
        TF1 = TF
        SetGadgetState(14, (ChunksSum / ChunksAll) * 100)
      EndIf

      Chunks - 1
    Wend

    PokeQ(BufferA + 816, pAllBlocksStart)

    If SizeAv <= MemAv 
      MemAv = SizeAv
      NewMem = MemAv + 128
      For i = 0 To NewMem Step 4
        PokeL(BufferA + 832 + i, 0)    ;clear Buffer
      Next
     Else
      Chunks = ChunksOld
    EndIf

  Wend 

  Hash$ = ""
  If Mode < 2                          ;224/256
    For i = 0 To SZ Step 4
      Hash$ + RSet(Hex(PeekL(BufferA + 112 + i) & $FFFFFFFF), 8, "0")
    Next 
   Else                                ;384/512
    For i = 0 To SZ Step 8
      Hash$ + RSet(Hex(PeekQ(BufferA + 112 + i)), 16, "0")
    Next
  EndIf

  FreeMemory(Buffer)

 ProcedureReturn Hash$

DataSection   ;Read only, evtl. ohne
HashBase_224: ;The second 32 bits of the fractional parts of the square roots of the 9th through 16th primes 23...53, Big-Endian!
  Data.l $c1059ed8, $367cd507, $3070dd17, $f70e5939, $ffc00b31, $68581511, $64f98fa7, $befa4fa4  
HashBase_256: ;The first 32 bits of the fractional parts of the square roots of the first 8 primes 2...19, Big-Endian!
  Data.l $6a09e667, $bb67ae85, $3c6ef372, $a54ff53a, $510e527f, $9b05688c, $1f83d9ab, $5be0cd19
HashBase_384: ;The first 64 bits of the fractional parts of the square roots of the 9th through 16th primes 23...53, Big-Endian! 
  Data.q $cbbb9d5dc1059ed8, $629a292a367cd507, $9159015a3070dd17, $152fecd8f70e5939
  Data.q $67332667ffc00b31, $8eb44a8768581511, $db0c2e0d64f98fa7, $47b5481dbefa4fa4  
HashBase_512: ;The first 64 bits of the fractional parts of the square roots of the first 8 primes 2...19, Big-Endian!
  Data.q $6a09e667f3bcc908, $bb67ae8584caa73b, $3c6ef372fe94f82b, $a54ff53a5f1d36f1
  Data.q $510e527fade682d1, $9b05688c2b3e6c1f, $1f83d9abfb41bd6b, $5be0cd19137e2179
k_224_256:    ;The first 32 bits of the fractional parts of the cube roots of the first 64 primes 2...311, Big-Endian!
  Data.l $428a2f98, $71374491, $b5c0fbcf, $e9b5dba5, $3956c25b, $59f111f1, $923f82a4, $ab1c5ed5
  Data.l $d807aa98, $12835b01, $243185be, $550c7dc3, $72be5d74, $80deb1fe, $9bdc06a7, $c19bf174
  Data.l $e49b69c1, $efbe4786, $0fc19dc6, $240ca1cc, $2de92c6f, $4a7484aa, $5cb0a9dc, $76f988da
  Data.l $983e5152, $a831c66d, $b00327c8, $bf597fc7, $c6e00bf3, $d5a79147, $06ca6351, $14292967
  Data.l $27b70a85, $2e1b2138, $4d2c6dfc, $53380d13, $650a7354, $766a0abb, $81c2c92e, $92722c85
  Data.l $a2bfe8a1, $a81a664b, $c24b8b70, $c76c51a3, $d192e819, $d6990624, $f40e3585, $106aa070
  Data.l $19a4c116, $1e376c08, $2748774c, $34b0bcb5, $391c0cb3, $4ed8aa4a, $5b9cca4f, $682e6ff3
  Data.l $748f82ee, $78a5636f, $84c87814, $8cc70208, $90befffa, $a4506ceb, $bef9a3f7, $c67178f2
k_384_512:    ;The first 64 bits of the fractional parts of the cube roots of the first 80 primes 2...409, Big-Endian! 
  Data.q $428a2f98d728ae22, $7137449123ef65cd, $b5c0fbcfec4d3b2f, $e9b5dba58189dbbc
  Data.q $3956c25bf348b538, $59f111f1b605d019, $923f82a4af194f9b, $ab1c5ed5da6d8118
  Data.q $d807aa98a3030242, $12835b0145706fbe, $243185be4ee4b28c, $550c7dc3d5ffb4e2
  Data.q $72be5d74f27b896f, $80deb1fe3b1696b1, $9bdc06a725c71235, $c19bf174cf692694
  Data.q $e49b69c19ef14ad2, $efbe4786384f25e3, $0fc19dc68b8cd5b5, $240ca1cc77ac9c65
  Data.q $2de92c6f592b0275, $4a7484aa6ea6e483, $5cb0a9dcbd41fbd4, $76f988da831153b5
  Data.q $983e5152ee66dfab, $a831c66d2db43210, $b00327c898fb213f, $bf597fc7beef0ee4
  Data.q $c6e00bf33da88fc2, $d5a79147930aa725, $06ca6351e003826f, $142929670a0e6e70
  Data.q $27b70a8546d22ffc, $2e1b21385c26c926, $4d2c6dfc5ac42aed, $53380d139d95b3df
  Data.q $650a73548baf63de, $766a0abb3c77b2a8, $81c2c92e47edaee6, $92722c851482353b
  Data.q $a2bfe8a14cf10364, $a81a664bbc423001, $c24b8b70d0f89791, $c76c51a30654be30
  Data.q $d192e819d6ef5218, $d69906245565a910, $f40e35855771202a, $106aa07032bbd1b8
  Data.q $19a4c116b8d2d0c8, $1e376c085141ab53, $2748774cdf8eeb99, $34b0bcb5e19b48a8
  Data.q $391c0cb3c5c95a63, $4ed8aa4ae3418acb, $5b9cca4f7763e373, $682e6ff3d6b2b8a3
  Data.q $748f82ee5defb2fc, $78a5636f43172f60, $84c87814a1f0ab72, $8cc702081a6439ec
  Data.q $90befffa23631e28, $a4506cebde82bde9, $bef9a3f7b2c67915, $c67178f2e372532b
  Data.q $ca273eceea26619c, $d186b8c721c0c207, $eada7dd6cde0eb1e, $f57d4f7fee6ed178
  Data.q $06f067aa72176fba, $0a637dc5a2c898a6, $113f9804bef90dae, $1b710b35131c471b
  Data.q $28db77f523047d84, $32caab7b40c72493, $3c9ebe0a15c9bebc, $431d67c49c100d4c
  Data.q $4cc5d4becb3e42b6, $597f299cfc657e2a, $5fcb6fab3ad6faec, $6c44198c4a475817 
EndDataSection
EndProcedure

Procedure Key_XOR_224_256(Buffer, Source)
!mov rcx,[rsp+48]          ;Buffer, if debugging on
!mov rdx,[rsp+56]          ;Source, if debugging on
  !movdqu xmm0,[rdx]
  !mov rax,4
!@@:  
  !movdqa xmm1,[rcx]
  !pxor xmm1,xmm0
  !movdqa [rcx],xmm1
  !add rcx,16
  !dec rax
  !jnz @b
EndProcedure

Procedure Concatenation_224_256(Buffer, Hash)
!mov rcx,[rsp+48]          ;Buffer, if debugging on 
!mov rdx,[rsp+56]          ;Hash, if debugging on
  !mov rax,8
!@@:  
  !mov r8d,[rdx]
  !bswap r8d
  !mov [rcx],r8d
  !add rcx,4
  !add rdx,4
  !dec rax
  !jnz @b
EndProcedure

Procedure Key_XOR_384_512(Buffer, Source)
!mov rcx,[rsp+48]          ;Buffer, if debugging on
!mov rdx,[rsp+56]          ;Source, if debugging on
  !movdqu xmm0,[rdx]
  !mov rax,8
!@@:  
  !movdqa xmm1,[rcx]
  !pxor xmm1,xmm0
  !movdqa [rcx],xmm1
  !add rcx,16
  !dec rax
  !jnz @b
EndProcedure

Procedure Concatenation_384_512(Buffer, Hash)
!mov rcx,[rsp+48]          ;Buffer, if debugging on
!mov rdx,[rsp+56]          ;Hash, if debugging on
  !mov rax,8
!@@:  
  !mov r8,[rdx]
  !bswap r8
  !mov [rcx],r8
  !add rcx,8
  !add rdx,8
  !dec rax
  !jnz @b
EndProcedure

Procedure.s HMAC_Calculate_64(*Source, LengthSource, pKey, LengthKey, Mode)
  Select (Mode & $3F)                  ;HashBase setzen, evtl. File-Tag weg
    Case 0
      LB = 64                          ;Length Block
      LH = 28                          ;Length Hash 
    Case 1
      LB = 64                          ;Length Block
      LH = 32                          ;Length Hash
    Case 2 
      LB = 128                         ;Length Block
      LH = 48                          ;Length Hash
    Case 3
      LB = 128                         ;Length Block
      LH = 64                          ;Length Hash
  EndSelect

  pMem = AllocateMemory(LB + 256)

  i = pMem % 256
  If i 
    pMemA = pMem - i + 256             ;Alignment 256
   Else
    pMemA = pMem
  EndIf

  If LengthKey <= LB
    CopyMemory(pKey, pMemA, LengthKey)
   Else
    HashKey$ = SHA_Calculate_64(pKey, LengthKey, Mode & $3F)  
    ;HashKey$ wieder zurück in Bytes
    pHashKey = AllocateMemory(LH + 16)
    i = pHashKey % 16
    If i 
      pHashKeyA = pHashKey - i + 16    ;Alignment 16
     Else
      pHashKeyA = pHashKey
    EndIf
    If Mode < 2                        ;224/256
      For i = 0 To 28 Step 4
        PokeL(pHashKeyA + i, Val("$" + Mid(HashKey$, (i * 2 ) + 1, 8)) & $FFFFFFFF)
      Next 
      Concatenation_224_256(pMemA, pHashKeyA)    ;bswap!
     Else                              ;384/512
      For i = 0 To 56 Step 8
        PokeQ(pHashKeyA + i, Val("$" + Mid(HashKey$, (i * 2 ) + 1, 16)))
      Next
      Concatenation_384_512(pMemA, pHashKeyA)    ;bswap!
    EndIf
  EndIf

  ;Key XOR ipad
  If Mode < 2                          ;224/256
    Key_XOR_224_256(pMemA, ?ipad)      ;$36
   Else
    Key_XOR_384_512(pMemA, ?ipad)      ;$36 
  EndIf

  If LengthSource
    Hash1$ = SHA_Calculate_64(*Source, LengthSource + LB, Mode | pMemA | $40)
   Else
    Hash1$ = SHA_Calculate_64(pMemA, LB, Mode & $3F)
  EndIf

  FreeMemory(pMem)

  pMem = AllocateMemory(LB + LH + 16)
  i = pMem % 16
  If i 
    pMemA = pMem - i + 16              ;Alignment 16
   Else
    pMemA = pMem
  EndIf

  If LengthKey <= LB
    CopyMemory(pKey, pMemA, LengthKey)
   Else
    If Mode < 2                        ;224/256
      Concatenation_224_256(pMemA, pHashKeyA)    ;bswap!
     Else
      Concatenation_384_512(pMemA, pHashKeyA)    ;bswap! 
    EndIf
    FreeMemory(pHashKey)
  EndIf

  pHash1 = AllocateMemory(LH + 16)
  i = pHash1 % 16
  If i 
    pHash1A = pHash1 - i + 16          ;Alignment 16
   Else
    pHash1A = pHash1
  EndIf

  Mode & $3F
  ;Hash1$ wieder zurück in Bytes
  If Mode < 2                          ;224/256
    For i = 0 To 28 Step 4
      PokeL(pHash1A + i, Val("$" + Mid(Hash1$, (i * 2 ) + 1, 8)) & $FFFFFFFF)
    Next 
    Key_XOR_224_256(pMemA, ?opad)      ;$5C
    Concatenation_224_256(pMemA + LB, pHash1A)   ;bswap!
   Else                                ;384/512
    For i = 0 To 56 Step 8
      PokeQ(pHash1A + i, Val("$" + Mid(Hash1$, (i * 2 ) + 1, 16)))
    Next
    Key_XOR_384_512(pMemA, ?opad)      ;$5C
    Concatenation_384_512(pMemA + LB, pHash1A)   ;bswap!
  EndIf

  FreeMemory(pHash1)

  HMAC$ = SHA_Calculate_64(pMemA, LB + LH, Mode)

  FreeMemory(pMem)

 ProcedureReturn HMAC$

DataSection
ipad:
  Data.q $3636363636363636, $3636363636363636
opad:
  Data.q $5c5c5c5c5c5c5c5c, $5c5c5c5c5c5c5c5c  
EndDataSection
EndProcedure

If OpenWindow(0, 0, 0, 1000, 480, "Helles SHA-2/HMAC, 64-Bit-Windows", #PB_Window_MinimizeGadget | #PB_Window_ScreenCentered)
  Dim Mode.s(3)
  Mode(0) = "SHA-224"
  Mode(1) = "SHA-256"
  Mode(2) = "SHA-384"
  Mode(3) = "SHA-512"

Repeat                                           ;hier rigoros für neue Abfrage

  OptionGadget(0, 10, 40, 80, 20, Mode(0))
  OptionGadget(1, 10, 65, 80, 20, Mode(1))
  OptionGadget(2, 10, 90, 80, 20, Mode(2))
  OptionGadget(3, 10, 115, 80, 20, Mode(3))
  SetGadgetState(1, 1)
  
  CheckBoxGadget(19, 10, 290, 50, 20, "HMAC")    
  TextGadget(20, 30, 325, 80, 15, "HMAC-Key : ")  
  EditorGadget(21, 110, 320, 880, 20)
  DisableGadget(20, 1)
  DisableGadget(21, 1) 
   
  PanelGadget(4, 110, 10, 880, 300)
    AddGadgetItem (4, -1, "Text ")
      EditorGadget(5, 10, 10, 855, 250)
    AddGadgetItem (4, -1, "Insert Clipboard ")
      EditorGadget(6, 10, 10, 855, 250)
    AddGadgetItem (4, -1,"Select a File ")
     TextGadget(7, 10, 10, 230, 15, "Select a File :")
     ExplorerTreeGadget(8, 10, 30, 855, 230, "", #PB_Explorer_NoDriveRequester)
     
  CloseGadgetList()

  SetActiveGadget(5)
  
  ButtonGadget(9, 450, 380, 100, 20, "Start", #PB_Button_Toggle) 
  
  Repeat
    Event = WaitWindowEvent()

    If GetGadgetState(4) = 2 And GetGadgetState(8) <> #PB_Explorer_File
      DisableGadget(9, 1)
      If Toggle
        Toggle ! 1       
      EndIf 
     Else
      DisableGadget(9, 0)
    EndIf
    
    If GetGadgetState(4) = 1 And Toggle = 0
      SetGadgetText(6, GetClipboardText())            
      Toggle ! 1  
     ElseIf GetGadgetState(4) = 0  
      If Toggle
        Toggle ! 1       
      EndIf      
    EndIf  
    
    If GetGadgetState(19) = 1 And Toggle2 = 0   
      DisableGadget(20, 0)     
      DisableGadget(21, 0)
      SetActiveGadget(21)
      HMAC = 1
      Toggle2 ! 1       
     ElseIf GetGadgetState(19) = 0  
      DisableGadget(20, 1)       
      DisableGadget(21, 1)       
      HMAC = 0
      If Toggle2
        Toggle2 ! 1       
      EndIf              
    EndIf 
    
    If Event = #PB_Event_CloseWindow
      End
    EndIf

    If GetGadgetState(8) = #PB_Explorer_File     ;Vorab-Test
      File = ReadFile(#PB_Any, GetGadgetText(8))
      If File = 0  
        MessageRequester("Error !", "File Access to ´" + GetGadgetText(8) + "´ denied (System-File?)")
        SetGadgetText(8, "")
       Else
        CloseFile(File)
        File = 0
      EndIf    
    EndIf
  Until GetGadgetState(9) Or (EventType() = #PB_EventType_LeftDoubleClick And GetGadgetState(8) = #PB_Explorer_File)
  
  FreeGadget(9)

  Select GetGadgetState(4)
    Case 0
      Input$ = GetGadgetText(5)
    Case 1
      Input$ = GetGadgetText(6)
    Case 2
      File = 1
  EndSelect

  For Mode = 0 To 3                    ;0=224 usw.
    If GetGadgetState(Mode)
      SHA$ = Mode(Mode)
      Break
    EndIf
  Next

  If HMAC
    Key$ = GetGadgetText(21)
    pKey = @Key$
    SizeKey = Len(Key$)
    HS$ = "HMAC-" + SHA$
   Else 
    HS$ = SHA$
  EndIf
  HS$ + " : " 

  If File
    File$ = GetGadgetText(8)
    *Source = ReadFile(#PB_Any, File$) ;*Source = File-Handle
    Size = Lof(*Source)
    Mode + $80                         ;Tag für File
   Else
    Size = Len(Input$)
    File$ = "String"
    *Source = @Input$
  EndIf

  SizeP$ = Str(Size)                   ;für Anzeige File-Länge < 1000 Bytes

  TextGadget(9, 10, 350, 960, 20, "File : " + File$)
  Size$ = Str(Size)                    ;for decimal points 
  LS = Len(Size$)
  LSMod = LS % 3
  If LSMod = 0
    LSMod = 3
  EndIf
  i = 3
  j = 1
  While LS - i > 0
    SizeP$ =  InsertString(Size$, ".", LSMod + j)
    i + 3
    Size$ = SizeP$
    j + 4
  Wend
  TextGadget(10, 10, 370, 560, 20, "Length : " + SizeP$ + " Bytes")
  TextGadget(11, 10, 390, 100, 20, "Throughput (MB/s) : ")
  TextGadget(12, 110,390, 100, 20, "")
  TextGadget(13, 425,410, 200, 20, "Calculation-Progress :")
  ProgressBarGadget(14, 25,435, 950, 25, 0, 100, #PB_ProgressBar_Smooth)
  TextGadget(15, 550, 410, 50, 20, "")  

  ;==================================================================
  TA = ElapsedMilliseconds()           ;or more precision (short files/strings!)

  If GetGadgetState(19)                ;HMAC
    Hash$ = HMAC_Calculate_64(*Source, Size, pKey, SizeKey, Mode)
   Else                                ;SHA
    Hash$ = SHA_Calculate_64(*Source, Size, Mode)
  EndIf

  TE = ElapsedMilliseconds() - TA
  ;==================================================================

  If File
    CloseFile(*Source)
    File$ = ""
    File = 0
  EndIf 

  FreeGadget(13) : FreeGadget(14) : FreeGadget(15)    ;Calculation-Progress

  TextGadget(13, 10, 430, 960, 20, HS$ + Hash$)
  TE$ = Str(TE)                        ;for decimal points 
  LT = Len(TE$)
  LTMod = LT % 3
  If LTMod = 0
    LTMod = 3
  EndIf
  i = 3
  j = 1
  While LT - i > 0
    TEP$ =  InsertString(TE$, ".", LTMod + j)
    i + 3
    TE$ = TEP$
    j + 4
  Wend
  SetGadgetText(12, StrF(Size / ((TE + 1) * 1000), 3))
  TextGadget(14, 10, 410, 560, 20, "Elapsed Time : " +  TE$ + " ms")

  For i = 0 To 8
    DisableGadget(i, 1)
  Next

  DisableGadget(19, 1)                 ;HMAC
  DisableGadget(20, 1)
  DisableGadget(21, 1) 

  ButtonGadget(16, 125, 450, 250, 20, "Copy Result to Clipboard", #PB_Button_Toggle)
  ButtonGadget(17, 425, 445, 150, 30, "E N D", #PB_Button_Toggle)  
  ButtonGadget(18, 625, 450, 250, 20, "New Selection", #PB_Button_Toggle)

  Repeat
    Event = WaitWindowEvent()
    If GetGadgetState(16)
      SetClipboardText(Hash$)
      SetGadgetState(16, 0)
    EndIf
    If GetGadgetState(17)
      Quit = 1
      Break
    EndIf
    If GetGadgetState(18)
      For i = 0 To 21
        FreeGadget(i)
      Next
      Break 
    EndIf
    If Event = #PB_Event_CloseWindow
      Quit = 1
      Break
    EndIf
  ForEver

Until Quit

EndIf
End
Sorry for any german comments.
Have fun!
Helle

Edit 08.25.2011: Small optimizing
Last edited by Helle on Thu Aug 25, 2011 8:12 pm, edited 1 time in total.
User avatar
netmaestro
PureBasic Bullfrog
PureBasic Bullfrog
Posts: 8433
Joined: Wed Jul 06, 2005 5:42 am
Location: Fort Nelson, BC, Canada

Re: SHA-2/HMAC for 64-Bit-Windows

Post by netmaestro »

Looks like very nice work from a master asm coder! Unfortunately I can't test it here because I'm not on 64bit but I'll bet it's fast and accurate. Between your work, wilbert's and mine, that's all the sha2's for 32 and 64bit. Now Fred just has to take them native :mrgreen:
BERESHEIT
Post Reply