SHA-2/HMAC for 32-Bit-Windows

Share your advanced PureBasic knowledge/code with the community.
Helle
Enthusiast
Enthusiast
Posts: 178
Joined: Wed Apr 12, 2006 7:59 pm
Location: Germany
Contact:

SHA-2/HMAC for 32-Bit-Windows

Post by Helle »

For tests (and fun!) I wrote this 32-bit-version. Runs on my computer in a VM with Windows XP/32-bit-PB.

Code: Select all

;- SHA-2/HMAC, based on engl. Wikipedia (Pseudo-Co6des)
;- "Helle" Klaus Helbing, 27.08.2011, PB 4.51 (x86)
;- 32-Bit-Windows-Version with SSE2
;- Optimierungen für die "Krücken" SHA-224 und SHA-384 habe ich mir verkniffen
;- Parts of Buffer: 0-7=Length.q, 8-11=Chunks.l, 16-19=MemAv.l, 24-27=pk.l, 32-103=p_a_h_A, 112-175=pHashA, 176-815=pWa, 816-819=ppAllBlocksA.l, 832-?=pAllBlocksA
Global X
Procedure Padding_224_256(BufferA)
  ;Size wird auf Vielfaches von 512 Bit (64 Byte) gebracht (Blocklänge ist 512 Bit)
  !mov ecx,[p.v_BufferA]

  !push ebx
  !push edi
  !push esi

  !mov ebx,[ecx+816]         ;ppAllBlocksA       
  !add ebx,ecx               ;zeigt auf pAllBlocks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov edi,eax
  !mov esi,64                ;512 Bit
  !xor edx,edx               ;hier mal so (ist ja unsigned)
  !div esi                   ;Modulo steht in EDX

  !inc eax                   ;eine Runde wollen wir ja wenigstens! 
  !mov [ecx+8],eax           ;Value Chunks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov byte[ebx+eax],80h     ;1 gesetztes Bit anhängen
  !or edx,edx                ;EDX=0?
  ;1.Fall: EDX=0. Letzter 512-Bit-Block ist Vielfaches von 512 (64 Byte). Es wird ein kompletter 512-Bit-Block angehängt   
  !jz @f 
  !sub edi,edx
  ;2.Fall: EDX<56. Letzter 512-Bit-Block ist kleiner als 448 Bit (56 Byte), aber größer Null
  !cmp edx,56                ;448 Bit
  !jb @f
  ;3.Fall: EDX>=56. Letzter 512-Bit-Block ist größer/gleich als 448 Bit (56 Byte). Dieser Block wird aufgefüllt (Bitwert 1 drangehängt,
  ; Rest Null) und ein weiterer Block drangehängt mit Nullen und am Ende Original-Länge als 64-Bit-Big-Endian-Wert
  !add ebx,64
  !add dword[ecx+8],1        ;es wurde ja ein Block drangehängt 1 Chunk mehr
!@@:  
  !add ebx,edi
  !mov edx,8
  !mov eax,[ecx]             ;Length is Quad!
  !mul edx                   ;nicht SHL bei großen Dateien
  !mov edi,eax
  !mov esi,edx
  !mov edx,8                 ;"Kaskadierung" für größere Dateien 
  !mov eax,[ecx+4]           ;Length ist Quad!
  !mul edx         
  !add eax,esi               ;reicht so für heutige Festplatten/Filelängen
  !bswap edi
  !bswap eax
  !mov [ebx+56],eax          ;Original-Länge als 64-Bit-Big-Endian-Wert anhängen
  !mov [ebx+60],edi 

  !pop esi
  !pop edi
  !pop ebx
EndProcedure 

Procedure Padding_384_512(BufferA)
  ;Size wird auf Vielfaches von 1024 Bit (128 Byte) gebracht (Blocklänge ist 1024 Bit)
  !mov ecx,[p.v_BufferA]

  !push ebx
  !push edi
  !push esi

  !mov ebx,[ecx+816]         ;ppAllBlocksA
  !add ebx,ecx               ;zeigt auf pAllBlocks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov edi,eax
  !mov esi,128               ;1024 Bit
  !xor edx,edx               ;hier mal so (ist ja unsigned)
  !div esi                   ;Modulo steht in EDX

  !inc eax                   ;eine Runde wollen wir ja wenigstens! 
  !mov [ecx+8],eax           ;Value Chunks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov byte[ebx+eax],80h     ;1 gesetztes Bit anhängen
  !or edx,edx                ;EDX=0?
  ;1.Fall: EDX=0. Letzter 1024-Bit-Block ist Vielfaches von 1024 Bit (128 Byte). Es wird ein kompletter 1024-Bit-Block angehängt   
  !jz @f 
  !sub edi,edx
  ;2.Fall: EDX<112. Letzter 1024-Bit-Block ist kleiner als 896 Bit (112 Byte), aber größer Null
  !cmp edx,112               ;896 Bit
  !jb @f
  ;3.Fall: EDX>=112. Letzter 1024-Bit-Block ist größer/gleich als 896 Bit (112 Byte). Dieser Block wird aufgefüllt (Bitwert 1 drangehängt,
  ; Rest Null) und ein weiterer Block drangehängt mit Nullen und am Ende Original-Länge als 128-Bit-Big-Endian-Wert
  !add ebx,128
  !add dword[ecx+8],1        ;es wurde ja ein Block drangehängt 1 Chunk mehr
!@@:  
  !add ebx,edi
  !mov edx,8
  !mov eax,[ecx]             ;Length is Quad!
  !mul edx                   ;nicht SHL bei großen Dateien
  !mov edi,eax
  !mov esi,edx
  !mov edx,8                 ;"Kaskadierung" für größere Dateien 
  !mov eax,[ecx+4]           ;Length ist Quad!
  !mul edx         
  !add eax,esi               ;reicht so für heutige Festplatten/Filelängen
  !bswap edi
  !bswap eax
  !mov [ebx+120],eax         ;Original-Länge als 64-Bit-Big-Endian-Wert anhängen
  !mov [ebx+124],edi 

  !pop esi
  !pop edi
  !pop ebx
EndProcedure 

Procedure Main_224_256(BufferA)
  !mov ecx,[p.v_BufferA]

  !push ebp                  ;pushad
  !push ebx
  !push edi
  !push esi

  ;W[0] bis W[15]
  !mov esi,ecx
  !add esi,[ecx+816]         ;ppAllBlocksA
  !mov edi,ecx
  !add edi,176               ;pWa  
  !mov edx,16
!@@:
  !mov eax,[esi]
  !bswap eax
  !mov [edi],eax
  !add edi,4
  !add esi,4
  !sub edx,1
  !jnz @b
  ;W[16] bis W[63]
  !mov ebx,24                ;64-16
  !mov esi,ecx
  !add esi,176+64            ;pWA+64  
!@@:
  ;s0
  !movq xmm0,[esi-60]        ;W[1] 2
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !psrld xmm0,7
  !pslld xmm1,25
  !por xmm0,xmm1
  !movdqa xmm1,xmm2
  !psrld xmm1,18
  !pslld xmm2,14
  !por xmm1,xmm2
  !psrld xmm3,3 
  !pxor xmm0,xmm1
  !pxor xmm0,xmm3            ;s0
  ;s1
  !movq xmm1,[esi-8]         ;W[14] 15
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !psrld xmm1,17
  !pslld xmm2,15
  !por xmm1,xmm2
  !movdqa xmm2,xmm3
  !psrld xmm2,19
  !pslld xmm3,13
  !por xmm2,xmm3
  !psrld xmm4,10
  !pxor xmm1,xmm2
  !pxor xmm1,xmm4            ;s1
  ;W[i]
  !movq xmm2,[esi-64]        ;W[0] 1
  !paddd xmm2,xmm0
  !paddd xmm2,xmm1
  !movq xmm3,[esi-28]        ;W[9] 10
  !paddd xmm2,xmm3
  !movq [esi],xmm2

  !add esi,8
  !sub ebx,1
  !jnz @b
  ;Initialisierung
  !mov ebx,ecx
  !add ebx,32                ;p_a_h_A
  !mov esi,ecx
  !add esi,112               ;pHashA
  !movdqa xmm0,[esi]
  !movdqa xmm1,[esi+16]
  !movdqa [ebx],xmm0
  !movdqa [ebx+16],xmm1
  ;Main Loop
  !mov esi,[ecx+24]          ;k_224_256, siehe Data
  !xor ecx,ecx               ;hier mal so
!@@:
  ;s0
  !mov eax,[ebx]             ;a
  !mov edx,eax
  !mov edi,eax
  !ror eax,2
  !ror edx,13
  !xor eax,edx
  !ror edi,22
  !xor edi,eax               ;EDI=s0
  ;maj (major)               maj = (a and b) + (c and (a xor b)) NEU!
  !mov eax,[ebx]             ;a
  !mov edx,eax
  !mov ebp,[ebx+4]           ;b
  !xor eax,ebp               ;(a xor b)
  !and ebp,edx               ;(a and b)
  !and eax,[ebx+8]           ;c
  !add ebp,eax               ;EBP=maj
  ;t2
  !add ebp,edi               ;EBP=t2, EDI=s0
  ;s1
  !mov eax,[ebx+16]          ;e
  !mov edx,eax
  !mov edi,eax
  !ror eax,6
  !ror edx,11
  !xor eax,edx
  !ror edi,25
  !xor edi,eax               ;EDI=s1
  ;ch                         ch = g xor (e and (f xor g)) NEU!
  !mov eax,[ebx+24]          ;g
  !mov edx,eax
  !xor edx,[ebx+20]          ;f
  !and edx,[ebx+16]          ;e
  !xor edx,eax
  ;t1
  !mov eax,[ebx+28]          ;h
  !add eax,edi               ;s1
  !add eax,edx               ;ch
  !add eax,[esi+ecx]         ;k[i]
  !mov edi,ebx               ;EBX=BufferA+32
  !add edi,144               ;=176=pWA

  !add eax,[edi+ecx]         ;EAX=t1, EDI+ECX=W[i]
  ;Vertauschungen
  !mov edx,[ebx+12]          ;"old" d
  !add edx,eax               ;EAX=t1
  !movdqa xmm0,[ebx]
  !movdqa xmm1,[ebx+16]
  !movdqu [ebx+4],xmm0       ;mit Versatz von 4 Bytes zurückkopieren 
  !movdqu [ebx+20],xmm1

  !add eax,ebp               ;EAX=t1, EBP=t2
  !mov [ebx],eax             ;"new" a
  !mov [ebx+16],edx          ;"new" e 

  !add ecx,4
  !cmp ecx,256
  !jb @b

  !add dword[edi+816-176],64 ;ppAllBlocksA, 512 Bit (64 Byte) weiter, nächster Chunk. EDI ist 176 (=pWA)
  ;am Ende jedes Chunks Hashs zu vorhandenen Werten aufaddieren (Überträge werden ignoriert!)
  !sub edi,64                ;176-64=112=pHashA
  !movdqa xmm0,[edi]  
  !movdqa xmm1,[edi+16]
  !paddd xmm0,[ebx]
  !paddd xmm1,[ebx+16]
  !movdqa [edi],xmm0
  !movdqa [edi+16],xmm1

  !pop esi
  !pop edi
  !pop ebx
  !pop ebp
EndProcedure   

Procedure Main_384_512(BufferA)
  !mov ecx,[p.v_BufferA]

  !push ebx
  !push edi
  !push esi

  ;W[0] bis W[15]
  !mov esi,ecx
  !add esi,[ecx+816]         ;ppAllBlocksA
  !mov edi,ecx
  !add edi,176               ;pWa  
  !mov edx,16
  !pxor xmm0,xmm0            ;set xmm0=0
!@@:
  !movq xmm1,[esi]           ;BSWAP for fun with SSE2, or PSHUFB for 16 Bytes, but is SSSE3
  !punpcklbw xmm1,xmm0       ;8 Bytes from xmm1 to 8 words in xmm1 with high-byte=0
  !pshufhw xmm2,xmm1,00011011b
  !pshuflw xmm2,xmm2,00011011b
  !packuswb xmm2,xmm0
  !pshufd xmm2,xmm2,00000001b
  !movq [edi],xmm2

  !add edi,8
  !add esi,8
  !sub edx,1
  !jnz @b
  ;W[16] bis W[79]
  !mov ebx,32                ;(80-16)/2
  !mov esi,ecx
  !add esi,176+128           ;pWA+128
!@@:
  ;s0
  !movdqu xmm0,[esi-120]     ;W[1] 2
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !psrlq xmm0,1
  !psllq xmm1,63
  !por xmm0,xmm1
  !movdqa xmm1,xmm2
  !psrlq xmm1,8
  !psllq xmm2,56
  !por xmm1,xmm2
  !psrlq xmm3,7 
  !pxor xmm0,xmm1
  !pxor xmm0,xmm3            ;s0
  ;s1
  !movdqa xmm1,[esi-16]      ;W[14] 15  
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !psrlq xmm1,19
  !psllq xmm2,45
  !por xmm1,xmm2;
  !movdqa xmm2,xmm3
  !psrlq xmm2,61
  !psllq xmm3,3
  !por xmm2,xmm3
  !psrlq xmm4,6
  !pxor xmm1,xmm2
  !pxor xmm1,xmm4            ;s1
  ;W[i]
  !movdqa xmm2,[esi-128]     ;W[0] 1
  !paddq xmm2,xmm0
  !paddq xmm2,xmm1
  !movdqu xmm3,[esi-56]      ;W[9] 10
  !paddq xmm2,xmm3
  !movdqa [esi],xmm2         ;W[16] 17

  !add esi,16
  !sub ebx,1
  !jnz @b
  ;Initialisierung
  !mov ebx,ecx
  !add ebx,32                ;p_a_h_A
  !mov esi,ecx
  !add esi,112               ;pHashA
  !movdqa xmm0,[esi]
  !movdqa xmm1,[esi+16]
  !movdqa xmm2,[esi+32]
  !movdqa xmm3,[esi+48]
  !movdqa [ebx],xmm0
  !movdqa [ebx+16],xmm1
  !movdqa [ebx+32],xmm2
  !movdqa [ebx+48],xmm3
  ;Main Loop
  !mov esi,[ecx+24]          ;k_384_512, siehe Data
  !xor ecx,ecx               ;hier mal so
!@@:
  ;s0
  !movq xmm0,[ebx]           ;a
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !movdqa xmm4,xmm0
  !movdqa xmm5,xmm0

  !psrlq xmm0,28
  !psllq xmm3,36
  !por xmm0,xmm3
  !psrlq xmm1,34
  !psllq xmm4,30
  !por xmm1,xmm4
  !pxor xmm0,xmm1
  !psrlq xmm2,39
  !psllq xmm5,25
  !por xmm2,xmm5
  !pxor xmm0,xmm2            ;xmm0=s0
  ;maj (major)               maj = (a and b) + (c and (a xor b)) NEU!
  !movq xmm1,[ebx]           ;a
  !movdqa xmm2,xmm1
  !movq xmm3,[ebx+8]         ;b
  !pxor xmm1,xmm3            ;(a xor b)
  !pand xmm3,xmm2
  !pand xmm1,[ebx+16]        ;c
  !paddq xmm1,xmm3           ;xmm1=maj
  ;t2=s0+maj
  !paddq xmm0,xmm1           ;xmm0=t2
  ;s1
  !movq xmm1,[ebx+32]        ;e
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !movdqa xmm5,xmm1
  !movdqa xmm6,xmm1
  !psrlq xmm1,14
  !psllq xmm4,50
  !por xmm1,xmm4
  !psrlq xmm2,18
  !psllq xmm5,46
  !por xmm2,xmm5
  !pxor xmm1,xmm2
  !psrlq xmm3,41
  !psllq xmm6,23
  !por xmm3,xmm6
  !pxor xmm1,xmm3            ;xmm1=s1
  ;ch                         ch = g xor (e and (f xor g)) NEU!
  !movq xmm2,[ebx+48]        ;g
  !movdqa xmm3,xmm2
  !movq xmm4,[ebx+40]        ;f
  !pxor xmm3,xmm4            ;XMM4 because Alignment 16
  !pand xmm3,[ebx+32]        ;e
  !pxor xmm2,xmm3            ;xmm2=ch
  ;t1
  !movq xmm3,[ebx+56]        ;h
  !paddq xmm3,xmm1           ;xmm1=s1
  !paddq xmm3,xmm2           ;xmm2=ch
  !movq xmm4,[esi+ecx]       ;k[i]
  !paddq xmm3,xmm4           ;XMM4 because Alignment 16     
  !mov edi,ebx               ;EBX=BufferA+32
  !add edi,144               ;=176=pWA
  !movq xmm4,[edi+ecx]       ;EDI+ECX=W[i]
  !paddq xmm3,xmm4           ;XMM3=t1, XMM4 because Alignment 16
  ;Changes
  !movq xmm1,[ebx+24]        ;"old" d
  !paddq xmm1,xmm3           ;XMM3=t1
  !movdqa xmm2,[ebx]
  !movdqa xmm4,[ebx+16]
  !movdqa xmm5,[ebx+32]
  !movdqa xmm6,[ebx+48]
  !movdqu [ebx+8],xmm2       ;mit Versatz von 8 Bytes zurückkopieren 
  !movdqu [ebx+24],xmm4
  !movdqu [ebx+40],xmm5
  !movdqu [ebx+56],xmm6

  !paddq xmm3,xmm0           ;XMM3=t1, XMM0=t2
  !movq [ebx],xmm3           ;"new" a
  !movq [ebx+32],xmm1        ;"new" e 

  !add ecx,8
  !cmp ecx,640
  !jb @b

  !add dword[edi+816-176],128     ;1024 Bit (128 Byte) weiter, nächster Chunk
  ;am Ende jedes Chunks Hashs zu vorhandenen Werten aufaddieren (Überträge werden ignoriert!)
  !sub edi,64                ;176-64=112=pHashA

  !movdqa xmm0,[edi]  
  !paddq xmm0,[ebx]
  !movdqa [edi],xmm0
  !movdqa xmm0,[edi+16]
  !paddq xmm0,[ebx+16]
  !movdqa [edi+16],xmm0
  !movdqa xmm0,[edi+32]  
  !paddq xmm0,[ebx+32]
  !movdqa [edi+32],xmm0
  !movdqa xmm0,[edi+48]
  !paddq xmm0,[ebx+48]
  !movdqa [edi+48],xmm0

  !pop esi
  !pop edi
  !pop ebx
EndProcedure   

Procedure.s SHA_Calculate_32(*Source, Length.q, Mode)
  SizeAv.q = Length

  HMAC = Mode & $40
  File = Mode & $80

  pMem = Mode 
  pMem & $FFFFFF00                     ;is Alignment 256, Byte0 = 0
  Mode & $3F

  If Length > $40000 
    MemAv = $40000                     ;Split-Size 256KB
    LCopy = $40000
    If Mode < 2
      Chunks = $1000
     Else 
      Chunks = $800
    EndIf
    ChunksOld = Chunks
   Else
    MemAv = Length
    LCopy = Length
  EndIf

  Buffer = AllocateMemory(832 + MemAv + 128)
  i = Buffer % 16
  If i 
    BufferA = Buffer - i + 16          ;Alignment 16
   Else
    BufferA = Buffer
  EndIf

  Select Mode                          ;set HashBase
    Case 0
      HashBase = ?HashBase_224
      LHB = 32                         ;Length HashBase
      PokeL(BufferA + 24, ?k_224_256)
      SZ = 24                          ;Loop-Counter
      ProgFac.d = 0.064                ;for Calculation-Progress
    Case 1
      HashBase = ?HashBase_256
      LHB = 32                         ;Length HashBase
      PokeL(BufferA + 24, ?k_224_256)
      SZ = 28                          ;Loop-Counter
      ProgFac.d = 0.064                ;for Calculation-Progress
    Case 2
      HashBase = ?HashBase_384
      LHB = 64                         ;Length HashBase
      PokeL(BufferA + 24, ?k_384_512)
      SZ = 40                          ;Loop-Counter
      ProgFac.d = 0.128                ;for Calculation-Progress
    Case 3
      HashBase = ?HashBase_512
      LHB = 64                         ;Length HashBase
      PokeL(BufferA + 24, ?k_384_512)
      SZ = 56                          ;Loop-Counter
      ProgFac.d = 0.128                ;for Calculation-Progress
  EndSelect

  CopyMemory(HashBase, BufferA + 112, LHB)  ;to pHashA

  PokeQ(BufferA, Length)

  pAllBlocksStart = 832
  PokeL(BufferA + 816, pAllBlocksStart)     ;Pointer in pAllBlocksA

  ChunksSum.d = 0
  ChunksSum1.d = 0      
  ChunksAll.d = (Length / (ProgFac * 1000)) + 1   ;for Calculation-Progress
  TF1 = 0
  TF2 = 0

  If HMAC
    HMAC = 0
    HMACVal = LHB << 1
    If File
      SizeAv - HMACVal
    EndIf
    CopyMemory(pMem, BufferA + pAllBlocksStart, HMACVal)   ;to pAllBlocksA
  EndIf

  If Length = 0                        ;damit auch Länge=0 berücksichtigt wird
    PokeL(BufferA + 16, MemAv)         ;MemAv=0
    If Mode < 2
      Padding_224_256(BufferA)
      Main_224_256(BufferA)
     Else
      Padding_384_512(BufferA)
      Main_384_512(BufferA)
    EndIf
  EndIf  

  Seek.q = 0

  While SizeAv > 0

    If File
      BytesRead = ReadData(*Source, BufferA + pAllBlocksStart + HMACVal, MemAv - HMACVal)     ;Datei in Speicher einlesen, *Source=File-Handle
      Seek + BytesRead
      FileSeek(*Source, Seek)        
      SizeAv - BytesRead
     Else 
      CopyMemory(*Source + Seek, BufferA + pAllBlocksStart + HMACVal, LCopy - HMACVal)   ;to pAllBlocksA
      BytesCopy = LCopy
      Seek + BytesCopy
      SizeAv - BytesCopy
      If SizeAv < LCopy    
        LCopy = SizeAv  
      EndIf 
    EndIf

    HMACVal = 0

    If SizeAv = 0 
      PokeL(BufferA + 16, MemAv)
      If Mode < 2
        Padding_224_256(BufferA)
       Else
        Padding_384_512(BufferA)
      EndIf
      Chunks = PeekL(BufferA + 8)
    EndIf

    While Chunks                       ;512 Bit (64 Byte)
      If Mode < 2
        Main_224_256(BufferA)
       Else
        Main_384_512(BufferA)
      EndIf

      ChunksSum + 1                    ;Calculation-Progress
      TF = ElapsedMilliseconds()
      If TF - TF1 > 250
        If TF - TF2 > 500
          SetGadgetText(12, StrD((ChunksSum - ChunksSum1) * ProgFac / (TF - TF2), 3))
          SetGadgetText(15, StrD((ChunksSum / ChunksAll) * 100, 1) + "%")
          TF2 = TF
          ChunksSum1 = ChunksSum
        EndIf 
        TF1 = TF
        SetGadgetState(14, (ChunksSum / ChunksAll) * 100)
      EndIf

      Chunks - 1
    Wend

    PokeL(BufferA + 816, pAllBlocksStart)

    If SizeAv <= MemAv 
      MemAv = SizeAv
      NewMem = MemAv + 128
      For i = 0 To NewMem Step 4
        PokeL(BufferA + 832 + i, 0)    ;clear Buffer
      Next
     Else
      Chunks = ChunksOld
    EndIf

  Wend 

  Hash$ = ""
  If Mode < 2                          ;224/256
    For i = 0 To SZ Step 4
      Hash$ + RSet(Hex(PeekL(BufferA + 112 + i) & $FFFFFFFF), 8, "0")
    Next 
   Else                                ;384/512
    For i = 0 To SZ Step 8
      Hash$ + RSet(Hex(PeekQ(BufferA + 112 + i)), 16, "0")
    Next
  EndIf

  FreeMemory(Buffer)

 ProcedureReturn Hash$

DataSection   ;Read only, evtl. ohne
HashBase_224: ;The second 32 bits of the fractional parts of the square roots of the 9th through 16th primes 23...53, Big-Endian!
  Data.l $c1059ed8, $367cd507, $3070dd17, $f70e5939, $ffc00b31, $68581511, $64f98fa7, $befa4fa4  
HashBase_256: ;The first 32 bits of the fractional parts of the square roots of the first 8 primes 2...19, Big-Endian!
  Data.l $6a09e667, $bb67ae85, $3c6ef372, $a54ff53a, $510e527f, $9b05688c, $1f83d9ab, $5be0cd19
HashBase_384: ;The first 64 bits of the fractional parts of the square roots of the 9th through 16th primes 23...53, Big-Endian! 
  Data.q $cbbb9d5dc1059ed8, $629a292a367cd507, $9159015a3070dd17, $152fecd8f70e5939
  Data.q $67332667ffc00b31, $8eb44a8768581511, $db0c2e0d64f98fa7, $47b5481dbefa4fa4  
HashBase_512: ;The first 64 bits of the fractional parts of the square roots of the first 8 primes 2...19, Big-Endian!
  Data.q $6a09e667f3bcc908, $bb67ae8584caa73b, $3c6ef372fe94f82b, $a54ff53a5f1d36f1
  Data.q $510e527fade682d1, $9b05688c2b3e6c1f, $1f83d9abfb41bd6b, $5be0cd19137e2179
k_224_256:    ;The first 32 bits of the fractional parts of the cube roots of the first 64 primes 2...311, Big-Endian!
  Data.l $428a2f98, $71374491, $b5c0fbcf, $e9b5dba5, $3956c25b, $59f111f1, $923f82a4, $ab1c5ed5
  Data.l $d807aa98, $12835b01, $243185be, $550c7dc3, $72be5d74, $80deb1fe, $9bdc06a7, $c19bf174
  Data.l $e49b69c1, $efbe4786, $0fc19dc6, $240ca1cc, $2de92c6f, $4a7484aa, $5cb0a9dc, $76f988da
  Data.l $983e5152, $a831c66d, $b00327c8, $bf597fc7, $c6e00bf3, $d5a79147, $06ca6351, $14292967
  Data.l $27b70a85, $2e1b2138, $4d2c6dfc, $53380d13, $650a7354, $766a0abb, $81c2c92e, $92722c85
  Data.l $a2bfe8a1, $a81a664b, $c24b8b70, $c76c51a3, $d192e819, $d6990624, $f40e3585, $106aa070
  Data.l $19a4c116, $1e376c08, $2748774c, $34b0bcb5, $391c0cb3, $4ed8aa4a, $5b9cca4f, $682e6ff3
  Data.l $748f82ee, $78a5636f, $84c87814, $8cc70208, $90befffa, $a4506ceb, $bef9a3f7, $c67178f2
k_384_512:    ;The first 64 bits of the fractional parts of the cube roots of the first 80 primes 2...409, Big-Endian! 
  Data.q $428a2f98d728ae22, $7137449123ef65cd, $b5c0fbcfec4d3b2f, $e9b5dba58189dbbc
  Data.q $3956c25bf348b538, $59f111f1b605d019, $923f82a4af194f9b, $ab1c5ed5da6d8118
  Data.q $d807aa98a3030242, $12835b0145706fbe, $243185be4ee4b28c, $550c7dc3d5ffb4e2
  Data.q $72be5d74f27b896f, $80deb1fe3b1696b1, $9bdc06a725c71235, $c19bf174cf692694
  Data.q $e49b69c19ef14ad2, $efbe4786384f25e3, $0fc19dc68b8cd5b5, $240ca1cc77ac9c65
  Data.q $2de92c6f592b0275, $4a7484aa6ea6e483, $5cb0a9dcbd41fbd4, $76f988da831153b5
  Data.q $983e5152ee66dfab, $a831c66d2db43210, $b00327c898fb213f, $bf597fc7beef0ee4
  Data.q $c6e00bf33da88fc2, $d5a79147930aa725, $06ca6351e003826f, $142929670a0e6e70
  Data.q $27b70a8546d22ffc, $2e1b21385c26c926, $4d2c6dfc5ac42aed, $53380d139d95b3df
  Data.q $650a73548baf63de, $766a0abb3c77b2a8, $81c2c92e47edaee6, $92722c851482353b
  Data.q $a2bfe8a14cf10364, $a81a664bbc423001, $c24b8b70d0f89791, $c76c51a30654be30
  Data.q $d192e819d6ef5218, $d69906245565a910, $f40e35855771202a, $106aa07032bbd1b8
  Data.q $19a4c116b8d2d0c8, $1e376c085141ab53, $2748774cdf8eeb99, $34b0bcb5e19b48a8
  Data.q $391c0cb3c5c95a63, $4ed8aa4ae3418acb, $5b9cca4f7763e373, $682e6ff3d6b2b8a3
  Data.q $748f82ee5defb2fc, $78a5636f43172f60, $84c87814a1f0ab72, $8cc702081a6439ec
  Data.q $90befffa23631e28, $a4506cebde82bde9, $bef9a3f7b2c67915, $c67178f2e372532b
  Data.q $ca273eceea26619c, $d186b8c721c0c207, $eada7dd6cde0eb1e, $f57d4f7fee6ed178
  Data.q $06f067aa72176fba, $0a637dc5a2c898a6, $113f9804bef90dae, $1b710b35131c471b
  Data.q $28db77f523047d84, $32caab7b40c72493, $3c9ebe0a15c9bebc, $431d67c49c100d4c
  Data.q $4cc5d4becb3e42b6, $597f299cfc657e2a, $5fcb6fab3ad6faec, $6c44198c4a475817 
EndDataSection
EndProcedure

Procedure Key_XOR_224_256(Buffer, Source)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Source]
  !movdqu xmm0,[edx]
  !mov eax,4
!@@:  
  !movdqa xmm1,[ecx]
  !pxor xmm1,xmm0
  !movdqa [ecx],xmm1
  !add ecx,16
  !dec eax
  !jnz @b
EndProcedure

Procedure Concatenation_224_256(Buffer, Hash)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Hash]
  !push ebx
  !mov ebx,8
!@@:  
  !mov eax,[edx]
  !bswap eax
  !mov [ecx],eax
  !add ecx,4
  !add edx,4
  !dec ebx
  !jnz @b
  !pop ebx
EndProcedure

Procedure Key_XOR_384_512(Buffer, Source)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Source]
  !movdqu xmm0,[edx]
  !mov eax,8
!@@:  
  !movdqa xmm1,[ecx]
  !pxor xmm1,xmm0
  !movdqa [ecx],xmm1
  !add ecx,16
  !dec eax
  !jnz @b
EndProcedure

Procedure Concatenation_384_512(Buffer, Hash)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Hash]
  !mov eax,8
  !pxor xmm0,xmm0            ;set xmm0=0
!@@:  
  !movq xmm1,[edx]           ;BSWAP for fun with SSE2, or PSHUFB for 16 Bytes, but is SSSE3
  !punpcklbw xmm1,xmm0       ;8 Bytes from xmm1 to 8 words in xmm1 with high-byte=0
  !pshufhw xmm2,xmm1,00011011b
  !pshuflw xmm2,xmm2,00011011b
  !packuswb xmm2,xmm0
  !pshufd xmm2,xmm2,00000001b
  !movq [ecx],xmm2
  !add ecx,8
  !add edx,8
  !dec eax
  !jnz @b
EndProcedure

Procedure.s HMAC_Calculate_32(*Source, LengthSource.q, pKey, LengthKey, Mode)
  Select (Mode & $3F)                  ;HashBase setzen, evtl. File-Tag weg
    Case 0
      LB = 64                          ;Length Block
      LH = 28                          ;Length Hash 
    Case 1
      LB = 64                          ;Length Block
      LH = 32                          ;Length Hash
    Case 2 
      LB = 128                         ;Length Block
      LH = 48                          ;Length Hash
    Case 3
      LB = 128                         ;Length Block
      LH = 64                          ;Length Hash
  EndSelect

  pMem = AllocateMemory(LB + 256)

  i = pMem % 256
  If i 
    pMemA = pMem - i + 256             ;Alignment 256
   Else
    pMemA = pMem
  EndIf

  If LengthKey <= LB
    CopyMemory(pKey, pMemA, LengthKey)
   Else
    HashKey$ = SHA_Calculate_32(pKey, LengthKey, Mode & $3F)  
    ;HashKey$ wieder zurück in Bytes
    pHashKey = AllocateMemory(LH + 16)
    i = pHashKey % 16
    If i 
      pHashKeyA = pHashKey - i + 16    ;Alignment 16
     Else
      pHashKeyA = pHashKey
    EndIf
    If Mode < 2                        ;224/256
      For i = 0 To 28 Step 4
        PokeL(pHashKeyA + i, Val("$" + Mid(HashKey$, (i * 2 ) + 1, 8)) & $FFFFFFFF)
      Next 
      Concatenation_224_256(pMemA, pHashKeyA)    ;bswap!
     Else                              ;384/512
      For i = 0 To 56 Step 8
        PokeQ(pHashKeyA + i, Val("$" + Mid(HashKey$, (i * 2 ) + 1, 16)))
      Next
      Concatenation_384_512(pMemA, pHashKeyA)    ;bswap!
    EndIf
  EndIf

  ;Key XOR ipad
  If Mode < 2                          ;224/256
    Key_XOR_224_256(pMemA, ?ipad)      ;$36
   Else
    Key_XOR_384_512(pMemA, ?ipad)      ;$36 
  EndIf

  If LengthSource
    Hash1$ = SHA_Calculate_32(*Source, LengthSource + LB, Mode | pMemA | $40)
   Else
    Hash1$ = SHA_Calculate_32(pMemA, LB, Mode & $3F)
  EndIf

  FreeMemory(pMem)

  pMem = AllocateMemory(LB + LH + 16)
  i = pMem % 16
  If i 
    pMemA = pMem - i + 16              ;Alignment 16
   Else
    pMemA = pMem
  EndIf

  If LengthKey <= LB
    CopyMemory(pKey, pMemA, LengthKey)
   Else
    If Mode < 2                        ;224/256
      Concatenation_224_256(pMemA, pHashKeyA)    ;bswap!
     Else
      Concatenation_384_512(pMemA, pHashKeyA)    ;bswap! 
    EndIf
    FreeMemory(pHashKey)
  EndIf

  pHash1 = AllocateMemory(LH + 16)
  i = pHash1 % 16
  If i 
    pHash1A = pHash1 - i + 16          ;Alignment 16
   Else
    pHash1A = pHash1
  EndIf

  Mode & $3F
  ;Hash1$ wieder zurück in Bytes
  If Mode < 2                          ;224/256
    For i = 0 To 28 Step 4
      PokeL(pHash1A + i, Val("$" + Mid(Hash1$, (i * 2 ) + 1, 8)) & $FFFFFFFF)
    Next 
    Key_XOR_224_256(pMemA, ?opad)      ;$5C
    Concatenation_224_256(pMemA + LB, pHash1A)   ;bswap!
   Else                                ;384/512
    For i = 0 To 56 Step 8
      PokeQ(pHash1A + i, Val("$" + Mid(Hash1$, (i * 2 ) + 1, 16)))
    Next
    Key_XOR_384_512(pMemA, ?opad)      ;$5C
    Concatenation_384_512(pMemA + LB, pHash1A)   ;bswap!
  EndIf

  FreeMemory(pHash1)

  HMAC$ = SHA_Calculate_32(pMemA, LB + LH, Mode)

  FreeMemory(pMem)

 ProcedureReturn HMAC$

DataSection
ipad:
  Data.q $3636363636363636, $3636363636363636
opad:
  Data.q $5c5c5c5c5c5c5c5c, $5c5c5c5c5c5c5c5c  
EndDataSection
EndProcedure

If OpenWindow(0, 0, 0, 1000, 480, "Helles SHA-2/HMAC, 32-Bit-Windows", #PB_Window_MinimizeGadget | #PB_Window_ScreenCentered)
  Dim Mode.s(3)
  Mode(0) = "SHA-224"
  Mode(1) = "SHA-256"
  Mode(2) = "SHA-384"
  Mode(3) = "SHA-512"

Repeat                                           ;hier rigoros für neue Abfrage

  OptionGadget(0, 10, 40, 80, 20, Mode(0))
  OptionGadget(1, 10, 65, 80, 20, Mode(1))
  OptionGadget(2, 10, 90, 80, 20, Mode(2))
  OptionGadget(3, 10, 115, 80, 20, Mode(3))
  SetGadgetState(1, 1)
  
  CheckBoxGadget(19, 10, 290, 50, 20, "HMAC")    
  TextGadget(20, 30, 325, 80, 15, "HMAC-Key : ")  
  EditorGadget(21, 110, 320, 880, 20)
  DisableGadget(20, 1)
  DisableGadget(21, 1) 
   
  PanelGadget(4, 110, 10, 880, 300)
    AddGadgetItem (4, -1, "Text ")
      EditorGadget(5, 10, 10, 855, 250)
    AddGadgetItem (4, -1, "Insert Clipboard ")
      EditorGadget(6, 10, 10, 855, 250)
    AddGadgetItem (4, -1,"Select a File ")
     TextGadget(7, 10, 10, 230, 15, "Select a File :")
     ExplorerTreeGadget(8, 10, 30, 855, 230, "", #PB_Explorer_NoDriveRequester)
     
  CloseGadgetList()

  SetActiveGadget(5)
  
  ButtonGadget(9, 450, 380, 100, 20, "Start", #PB_Button_Toggle) 
  
  Repeat
    Event = WaitWindowEvent()

    If GetGadgetState(4) = 2 And GetGadgetState(8) <> #PB_Explorer_File
      DisableGadget(9, 1)
      If Toggle
        Toggle ! 1       
      EndIf 
     Else
      DisableGadget(9, 0)
    EndIf
    
    If GetGadgetState(4) = 1 And Toggle = 0
      SetGadgetText(6, GetClipboardText())            
      Toggle ! 1  
     ElseIf GetGadgetState(4) = 0  
      If Toggle
        Toggle ! 1       
      EndIf      
    EndIf  
    
    If GetGadgetState(19) = 1 And Toggle2 = 0   
      DisableGadget(20, 0)     
      DisableGadget(21, 0)
      SetActiveGadget(21)
      HMAC = 1
      Toggle2 ! 1       
     ElseIf GetGadgetState(19) = 0  
      DisableGadget(20, 1)       
      DisableGadget(21, 1)       
      HMAC = 0
      If Toggle2
        Toggle2 ! 1       
      EndIf              
    EndIf 
    
    If Event = #PB_Event_CloseWindow
      End
    EndIf

    If GetGadgetState(8) = #PB_Explorer_File     ;Vorab-Test
      File = ReadFile(#PB_Any, GetGadgetText(8))
      If File = 0  
        MessageRequester("Error !", "File Access to ´" + GetGadgetText(8) + "´ denied (System-File?)")
        SetGadgetText(8, "")
       Else
        CloseFile(File)
        File = 0
      EndIf    
    EndIf
  Until GetGadgetState(9) Or (EventType() = #PB_EventType_LeftDoubleClick And GetGadgetState(8) = #PB_Explorer_File)
  
  FreeGadget(9)

  Select GetGadgetState(4)
    Case 0
      Input$ = GetGadgetText(5)
    Case 1
      Input$ = GetGadgetText(6)
    Case 2
      File = 1
  EndSelect

  For Mode = 0 To 3                    ;0=224 usw.
    If GetGadgetState(Mode)
      SHA$ = Mode(Mode)
      Break
    EndIf
  Next

  If HMAC
    Key$ = GetGadgetText(21)
    pKey = @Key$
    SizeKey = Len(Key$)
    HS$ = "HMAC-" + SHA$
   Else 
    HS$ = SHA$
  EndIf
  HS$ + " : " 

  If File
    File$ = GetGadgetText(8)
    *Source = ReadFile(#PB_Any, File$) ;*Source = File-Handle
    Size.q = Lof(*Source)
    Mode + $80                         ;Tag für File
   Else
    Size.q = Len(Input$)
    File$ = "String"
    *Source = @Input$
  EndIf

  SizeP$ = Str(Size)                   ;für Anzeige File-Länge < 1000 Bytes

  TextGadget(9, 10, 350, 960, 20, "File : " + File$)
  Size$ = Str(Size)                    ;for decimal points 
  LS = Len(Size$)
  LSMod = LS % 3
  If LSMod = 0
    LSMod = 3
  EndIf
  i = 3
  j = 1
  While LS - i > 0
    SizeP$ =  InsertString(Size$, ".", LSMod + j)
    i + 3
    Size$ = SizeP$
    j + 4
  Wend
  TextGadget(10, 10, 370, 560, 20, "Length : " + SizeP$ + " Bytes")
  TextGadget(11, 10, 390, 100, 20, "Throughput (MB/s) : ")
  TextGadget(12, 110,390, 100, 20, "")
  TextGadget(13, 425,410, 200, 20, "Calculation-Progress :")
  ProgressBarGadget(14, 25,435, 950, 25, 0, 100, #PB_ProgressBar_Smooth)
  TextGadget(15, 550, 410, 50, 20, "")  

  ;==================================================================
  TA = ElapsedMilliseconds()           ;or more precision (short files/strings!)

  If GetGadgetState(19)                ;HMAC
    Hash$ = HMAC_Calculate_32(*Source, Size, pKey, SizeKey, Mode)
   Else                                ;SHA
    Hash$ = SHA_Calculate_32(*Source, Size, Mode)
  EndIf

  TE = ElapsedMilliseconds() - TA
  ;==================================================================

  If File
    CloseFile(*Source)
    File$ = ""
    File = 0
  EndIf 

  FreeGadget(13) : FreeGadget(14) : FreeGadget(15)    ;Calculation-Progress

  TextGadget(13, 10, 430, 960, 20, HS$ + Hash$)
  TE$ = Str(TE)                        ;for decimal points 
  LT = Len(TE$)
  LTMod = LT % 3
  If LTMod = 0
    LTMod = 3
  EndIf
  i = 3
  j = 1
  While LT - i > 0
    TEP$ =  InsertString(TE$, ".", LTMod + j)
    i + 3
    TE$ = TEP$
    j + 4
  Wend
  SetGadgetText(12, StrF(Size / ((TE + 1) * 1000), 3))
  TextGadget(14, 10, 410, 560, 20, "Elapsed Time : " +  TE$ + " ms")

  For i = 0 To 8
    DisableGadget(i, 1)
  Next

  DisableGadget(19, 1)                 ;HMAC
  DisableGadget(20, 1)
  DisableGadget(21, 1) 

  ButtonGadget(16, 125, 450, 250, 20, "Copy Result to Clipboard", #PB_Button_Toggle)
  ButtonGadget(17, 425, 445, 150, 30, "E N D", #PB_Button_Toggle)  
  ButtonGadget(18, 625, 450, 250, 20, "New Selection", #PB_Button_Toggle)

  Repeat
    Event = WaitWindowEvent()
    If GetGadgetState(16)
      SetClipboardText(Hash$)
      SetGadgetState(16, 0)
    EndIf
    If GetGadgetState(17)
      Quit = 1
      Break
    EndIf
    If GetGadgetState(18)
      For i = 0 To 21
        FreeGadget(i)
      Next
      Break 
    EndIf
    If Event = #PB_Event_CloseWindow
      Quit = 1
      Break
    EndIf
  ForEver

Until Quit

EndIf
End
Have fun :D !
Helle
Melow
New User
New User
Posts: 6
Joined: Thu Aug 29, 2013 11:51 am

Re: SHA-2/HMAC for 32-Bit-Windows

Post by Melow »

Hi
Thank you, works great on Win 7, 32 bit.

I try to compile it under OSX (32bit) but i got compile errors that "@@ is already defined".

Any Ideas?

Greetings
Melow

:)
Helle
Enthusiast
Enthusiast
Posts: 178
Joined: Wed Apr 12, 2006 7:59 pm
Location: Germany
Contact:

Re: SHA-2/HMAC for 32-Bit-Windows

Post by Helle »

Thank you!
"@@" is FASM-Syntax (anonymus label), uses for Linux- and Windows-PB. OS X-PB uses YASM. In YASM (and NASM) are anonymus labels a special point.
This is a version without anonymus labels:

Code: Select all

;- SHA-2/HMAC, based on engl. Wikipedia (Pseudo-Co6des)
;- "Helle" Klaus Helbing, 27.08.2011, PB 4.51 (x86)
;- New without anonymus labels: 21.03.2014, PB 5.22 LTS (x86) 
;- 32-Bit-Windows-Version with SSE2
;- Optimierungen für die "Krücken" SHA-224 und SHA-384 habe ich mir verkniffen
;- Parts of Buffer: 0-7=Length.q, 8-11=Chunks.l, 16-19=MemAv.l, 24-27=pk.l, 32-103=p_a_h_A, 112-175=pHashA, 176-815=pWa, 816-819=ppAllBlocksA.l, 832-?=pAllBlocksA
Global X
Procedure Padding_224_256(BufferA)
  ;Size wird auf Vielfaches von 512 Bit (64 Byte) gebracht (Blocklänge ist 512 Bit)
  !mov ecx,[p.v_BufferA]

  !push ebx
  !push edi
  !push esi

  !mov ebx,[ecx+816]         ;ppAllBlocksA       
  !add ebx,ecx               ;zeigt auf pAllBlocks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov edi,eax
  !mov esi,64                ;512 Bit
  !xor edx,edx               ;hier mal so (ist ja unsigned)
  !div esi                   ;Modulo steht in EDX

  !inc eax                   ;eine Runde wollen wir ja wenigstens!
  !mov [ecx+8],eax           ;Value Chunks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov byte[ebx+eax],80h     ;1 gesetztes Bit anhängen
  !or edx,edx                ;EDX=0?
  ;1.Fall: EDX=0. Letzter 512-Bit-Block ist Vielfaches von 512 (64 Byte). Es wird ein kompletter 512-Bit-Block angehängt   
 !jz No_Anonymus_Label_1
  !sub edi,edx
  ;2.Fall: EDX<56. Letzter 512-Bit-Block ist kleiner als 448 Bit (56 Byte), aber größer Null
  !cmp edx,56                ;448 Bit
 !jb No_Anonymus_Label_1
  ;3.Fall: EDX>=56. Letzter 512-Bit-Block ist größer/gleich als 448 Bit (56 Byte). Dieser Block wird aufgefüllt (Bitwert 1 drangehängt,
  ; Rest Null) und ein weiterer Block drangehängt mit Nullen und am Ende Original-Länge als 64-Bit-Big-Endian-Wert
  !add ebx,64
  !add dword[ecx+8],1        ;es wurde ja ein Block drangehängt 1 Chunk mehr
!No_Anonymus_Label_1: 
  !add ebx,edi
  !mov edx,8
  !mov eax,[ecx]             ;Length is Quad!
  !mul edx                   ;nicht SHL bei großen Dateien
  !mov edi,eax
  !mov esi,edx
  !mov edx,8                 ;"Kaskadierung" für größere Dateien
  !mov eax,[ecx+4]           ;Length ist Quad!
  !mul edx         
  !add eax,esi               ;reicht so für heutige Festplatten/Filelängen
  !bswap edi
  !bswap eax
  !mov [ebx+56],eax          ;Original-Länge als 64-Bit-Big-Endian-Wert anhängen
  !mov [ebx+60],edi

  !pop esi
  !pop edi
  !pop ebx
EndProcedure

Procedure Padding_384_512(BufferA)
  ;Size wird auf Vielfaches von 1024 Bit (128 Byte) gebracht (Blocklänge ist 1024 Bit)
  !mov ecx,[p.v_BufferA]

  !push ebx
  !push edi
  !push esi

  !mov ebx,[ecx+816]         ;ppAllBlocksA
  !add ebx,ecx               ;zeigt auf pAllBlocks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov edi,eax
  !mov esi,128               ;1024 Bit
  !xor edx,edx               ;hier mal so (ist ja unsigned)
  !div esi                   ;Modulo steht in EDX

  !inc eax                   ;eine Runde wollen wir ja wenigstens!
  !mov [ecx+8],eax           ;Value Chunks
  !mov eax,[ecx+16]          ;Value MemAv
  !mov byte[ebx+eax],80h     ;1 gesetztes Bit anhängen
  !or edx,edx                ;EDX=0?
  ;1.Fall: EDX=0. Letzter 1024-Bit-Block ist Vielfaches von 1024 Bit (128 Byte). Es wird ein kompletter 1024-Bit-Block angehängt   
 !jz No_Anonymus_Label_2
  !sub edi,edx
  ;2.Fall: EDX<112. Letzter 1024-Bit-Block ist kleiner als 896 Bit (112 Byte), aber größer Null
  !cmp edx,112               ;896 Bit
 !jb No_Anonymus_Label_2
  ;3.Fall: EDX>=112. Letzter 1024-Bit-Block ist größer/gleich als 896 Bit (112 Byte). Dieser Block wird aufgefüllt (Bitwert 1 drangehängt,
  ; Rest Null) und ein weiterer Block drangehängt mit Nullen und am Ende Original-Länge als 128-Bit-Big-Endian-Wert
  !add ebx,128
  !add dword[ecx+8],1        ;es wurde ja ein Block drangehängt 1 Chunk mehr
!No_Anonymus_Label_2: 
  !add ebx,edi
  !mov edx,8
  !mov eax,[ecx]             ;Length is Quad!
  !mul edx                   ;nicht SHL bei großen Dateien
  !mov edi,eax
  !mov esi,edx
  !mov edx,8                 ;"Kaskadierung" für größere Dateien
  !mov eax,[ecx+4]           ;Length ist Quad!
  !mul edx         
  !add eax,esi               ;reicht so für heutige Festplatten/Filelängen
  !bswap edi
  !bswap eax
  !mov [ebx+120],eax         ;Original-Länge als 64-Bit-Big-Endian-Wert anhängen
  !mov [ebx+124],edi

  !pop esi
  !pop edi
  !pop ebx
EndProcedure

Procedure Main_224_256(BufferA)
  !mov ecx,[p.v_BufferA]

  !push ebp                  ;pushad
  !push ebx
  !push edi
  !push esi

  ;W[0] bis W[15]
  !mov esi,ecx
  !add esi,[ecx+816]         ;ppAllBlocksA
  !mov edi,ecx
  !add edi,176               ;pWa 
  !mov edx,16
!No_Anonymus_Label_3:
  !mov eax,[esi]
  !bswap eax
  !mov [edi],eax
  !add edi,4
  !add esi,4
  !sub edx,1
 !jnz No_Anonymus_Label_3
  ;W[16] bis W[63]
  !mov ebx,24                ;64-16
  !mov esi,ecx
  !add esi,176+64            ;pWA+64 
!No_Anonymus_Label_4:
  ;s0
  !movq xmm0,[esi-60]        ;W[1] 2
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !psrld xmm0,7
  !pslld xmm1,25
  !por xmm0,xmm1
  !movdqa xmm1,xmm2
  !psrld xmm1,18
  !pslld xmm2,14
  !por xmm1,xmm2
  !psrld xmm3,3
  !pxor xmm0,xmm1
  !pxor xmm0,xmm3            ;s0
  ;s1
  !movq xmm1,[esi-8]         ;W[14] 15
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !psrld xmm1,17
  !pslld xmm2,15
  !por xmm1,xmm2
  !movdqa xmm2,xmm3
  !psrld xmm2,19
  !pslld xmm3,13
  !por xmm2,xmm3
  !psrld xmm4,10
  !pxor xmm1,xmm2
  !pxor xmm1,xmm4            ;s1
  ;W[i]
  !movq xmm2,[esi-64]        ;W[0] 1
  !paddd xmm2,xmm0
  !paddd xmm2,xmm1
  !movq xmm3,[esi-28]        ;W[9] 10
  !paddd xmm2,xmm3
  !movq [esi],xmm2

  !add esi,8
  !sub ebx,1
 !jnz No_Anonymus_Label_4
  ;Initialisierung
  !mov ebx,ecx
  !add ebx,32                ;p_a_h_A
  !mov esi,ecx
  !add esi,112               ;pHashA
  !movdqa xmm0,[esi]
  !movdqa xmm1,[esi+16]
  !movdqa [ebx],xmm0
  !movdqa [ebx+16],xmm1
  ;Main Loop
  !mov esi,[ecx+24]          ;k_224_256, siehe Data
  !xor ecx,ecx               ;hier mal so
!No_Anonymus_Label_5:
  ;s0
  !mov eax,[ebx]             ;a
  !mov edx,eax
  !mov edi,eax
  !ror eax,2
  !ror edx,13
  !xor eax,edx
  !ror edi,22
  !xor edi,eax               ;EDI=s0
  ;maj (major)               maj = (a and b) + (c and (a xor b)) NEU!
  !mov eax,[ebx]             ;a
  !mov edx,eax
  !mov ebp,[ebx+4]           ;b
  !xor eax,ebp               ;(a xor b)
  !and ebp,edx               ;(a and b)
  !and eax,[ebx+8]           ;c
  !add ebp,eax               ;EBP=maj
  ;t2
  !add ebp,edi               ;EBP=t2, EDI=s0
  ;s1
  !mov eax,[ebx+16]          ;e
  !mov edx,eax
  !mov edi,eax
  !ror eax,6
  !ror edx,11
  !xor eax,edx
  !ror edi,25
  !xor edi,eax               ;EDI=s1
  ;ch                         ch = g xor (e and (f xor g)) NEU!
  !mov eax,[ebx+24]          ;g
  !mov edx,eax
  !xor edx,[ebx+20]          ;f
  !and edx,[ebx+16]          ;e
  !xor edx,eax
  ;t1
  !mov eax,[ebx+28]          ;h
  !add eax,edi               ;s1
  !add eax,edx               ;ch
  !add eax,[esi+ecx]         ;k[i]
  !mov edi,ebx               ;EBX=BufferA+32
  !add edi,144               ;=176=pWA

  !add eax,[edi+ecx]         ;EAX=t1, EDI+ECX=W[i]
  ;Vertauschungen
  !mov edx,[ebx+12]          ;"old" d
  !add edx,eax               ;EAX=t1
  !movdqa xmm0,[ebx]
  !movdqa xmm1,[ebx+16]
  !movdqu [ebx+4],xmm0       ;mit Versatz von 4 Bytes zurückkopieren
  !movdqu [ebx+20],xmm1

  !add eax,ebp               ;EAX=t1, EBP=t2
  !mov [ebx],eax             ;"new" a
  !mov [ebx+16],edx          ;"new" e

  !add ecx,4
  !cmp ecx,256
 !jb No_Anonymus_Label_5

  !add dword[edi+816-176],64 ;ppAllBlocksA, 512 Bit (64 Byte) weiter, nächster Chunk. EDI ist 176 (=pWA)
  ;am Ende jedes Chunks Hashs zu vorhandenen Werten aufaddieren (Überträge werden ignoriert!)
  !sub edi,64                ;176-64=112=pHashA
  !movdqa xmm0,[edi] 
  !movdqa xmm1,[edi+16]
  !paddd xmm0,[ebx]
  !paddd xmm1,[ebx+16]
  !movdqa [edi],xmm0
  !movdqa [edi+16],xmm1

  !pop esi
  !pop edi
  !pop ebx
  !pop ebp
EndProcedure   

Procedure Main_384_512(BufferA)
  !mov ecx,[p.v_BufferA]

  !push ebx
  !push edi
  !push esi

  ;W[0] bis W[15]
  !mov esi,ecx
  !add esi,[ecx+816]         ;ppAllBlocksA
  !mov edi,ecx
  !add edi,176               ;pWa 
  !mov edx,16
  !pxor xmm0,xmm0            ;set xmm0=0
!No_Anonymus_Label_6:
  !movq xmm1,[esi]           ;BSWAP for fun with SSE2, or PSHUFB for 16 Bytes, but is SSSE3
  !punpcklbw xmm1,xmm0       ;8 Bytes from xmm1 to 8 words in xmm1 with high-byte=0
  !pshufhw xmm2,xmm1,00011011b
  !pshuflw xmm2,xmm2,00011011b
  !packuswb xmm2,xmm0
  !pshufd xmm2,xmm2,00000001b
  !movq [edi],xmm2

  !add edi,8
  !add esi,8
  !sub edx,1
 !jnz No_Anonymus_Label_6
  ;W[16] bis W[79]
  !mov ebx,32                ;(80-16)/2
  !mov esi,ecx
  !add esi,176+128           ;pWA+128
!No_Anonymus_Label_7:
  ;s0
  !movdqu xmm0,[esi-120]     ;W[1] 2
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !psrlq xmm0,1
  !psllq xmm1,63
  !por xmm0,xmm1
  !movdqa xmm1,xmm2
  !psrlq xmm1,8
  !psllq xmm2,56
  !por xmm1,xmm2
  !psrlq xmm3,7
  !pxor xmm0,xmm1
  !pxor xmm0,xmm3            ;s0
  ;s1
  !movdqa xmm1,[esi-16]      ;W[14] 15 
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !psrlq xmm1,19
  !psllq xmm2,45
  !por xmm1,xmm2;
  !movdqa xmm2,xmm3
  !psrlq xmm2,61
  !psllq xmm3,3
  !por xmm2,xmm3
  !psrlq xmm4,6
  !pxor xmm1,xmm2
  !pxor xmm1,xmm4            ;s1
  ;W[i]
  !movdqa xmm2,[esi-128]     ;W[0] 1
  !paddq xmm2,xmm0
  !paddq xmm2,xmm1
  !movdqu xmm3,[esi-56]      ;W[9] 10
  !paddq xmm2,xmm3
  !movdqa [esi],xmm2         ;W[16] 17

  !add esi,16
  !sub ebx,1
 !jnz No_Anonymus_Label_7
  ;Initialisierung
  !mov ebx,ecx
  !add ebx,32                ;p_a_h_A
  !mov esi,ecx
  !add esi,112               ;pHashA
  !movdqa xmm0,[esi]
  !movdqa xmm1,[esi+16]
  !movdqa xmm2,[esi+32]
  !movdqa xmm3,[esi+48]
  !movdqa [ebx],xmm0
  !movdqa [ebx+16],xmm1
  !movdqa [ebx+32],xmm2
  !movdqa [ebx+48],xmm3
  ;Main Loop
  !mov esi,[ecx+24]          ;k_384_512, siehe Data
  !xor ecx,ecx               ;hier mal so
!No_Anonymus_Label_8:
  ;s0
  !movq xmm0,[ebx]           ;a
  !movdqa xmm1,xmm0
  !movdqa xmm2,xmm0
  !movdqa xmm3,xmm0
  !movdqa xmm4,xmm0
  !movdqa xmm5,xmm0

  !psrlq xmm0,28
  !psllq xmm3,36
  !por xmm0,xmm3
  !psrlq xmm1,34
  !psllq xmm4,30
  !por xmm1,xmm4
  !pxor xmm0,xmm1
  !psrlq xmm2,39
  !psllq xmm5,25
  !por xmm2,xmm5
  !pxor xmm0,xmm2            ;xmm0=s0
  ;maj (major)               maj = (a and b) + (c and (a xor b)) NEU!
  !movq xmm1,[ebx]           ;a
  !movdqa xmm2,xmm1
  !movq xmm3,[ebx+8]         ;b
  !pxor xmm1,xmm3            ;(a xor b)
  !pand xmm3,xmm2
  !pand xmm1,[ebx+16]        ;c
  !paddq xmm1,xmm3           ;xmm1=maj
  ;t2=s0+maj
  !paddq xmm0,xmm1           ;xmm0=t2
  ;s1
  !movq xmm1,[ebx+32]        ;e
  !movdqa xmm2,xmm1
  !movdqa xmm3,xmm1
  !movdqa xmm4,xmm1
  !movdqa xmm5,xmm1
  !movdqa xmm6,xmm1
  !psrlq xmm1,14
  !psllq xmm4,50
  !por xmm1,xmm4
  !psrlq xmm2,18
  !psllq xmm5,46
  !por xmm2,xmm5
  !pxor xmm1,xmm2
  !psrlq xmm3,41
  !psllq xmm6,23
  !por xmm3,xmm6
  !pxor xmm1,xmm3            ;xmm1=s1
  ;ch                         ch = g xor (e and (f xor g)) NEU!
  !movq xmm2,[ebx+48]        ;g
  !movdqa xmm3,xmm2
  !movq xmm4,[ebx+40]        ;f
  !pxor xmm3,xmm4            ;XMM4 because Alignment 16
  !pand xmm3,[ebx+32]        ;e
  !pxor xmm2,xmm3            ;xmm2=ch
  ;t1
  !movq xmm3,[ebx+56]        ;h
  !paddq xmm3,xmm1           ;xmm1=s1
  !paddq xmm3,xmm2           ;xmm2=ch
  !movq xmm4,[esi+ecx]       ;k[i]
  !paddq xmm3,xmm4           ;XMM4 because Alignment 16     
  !mov edi,ebx               ;EBX=BufferA+32
  !add edi,144               ;=176=pWA
  !movq xmm4,[edi+ecx]       ;EDI+ECX=W[i]
  !paddq xmm3,xmm4           ;XMM3=t1, XMM4 because Alignment 16
  ;Changes
  !movq xmm1,[ebx+24]        ;"old" d
  !paddq xmm1,xmm3           ;XMM3=t1
  !movdqa xmm2,[ebx]
  !movdqa xmm4,[ebx+16]
  !movdqa xmm5,[ebx+32]
  !movdqa xmm6,[ebx+48]
  !movdqu [ebx+8],xmm2       ;mit Versatz von 8 Bytes zurückkopieren
  !movdqu [ebx+24],xmm4
  !movdqu [ebx+40],xmm5
  !movdqu [ebx+56],xmm6

  !paddq xmm3,xmm0           ;XMM3=t1, XMM0=t2
  !movq [ebx],xmm3           ;"new" a
  !movq [ebx+32],xmm1        ;"new" e

  !add ecx,8
  !cmp ecx,640
 !jb No_Anonymus_Label_8

  !add dword[edi+816-176],128     ;1024 Bit (128 Byte) weiter, nächster Chunk
  ;am Ende jedes Chunks Hashs zu vorhandenen Werten aufaddieren (Überträge werden ignoriert!)
  !sub edi,64                ;176-64=112=pHashA

  !movdqa xmm0,[edi] 
  !paddq xmm0,[ebx]
  !movdqa [edi],xmm0
  !movdqa xmm0,[edi+16]
  !paddq xmm0,[ebx+16]
  !movdqa [edi+16],xmm0
  !movdqa xmm0,[edi+32] 
  !paddq xmm0,[ebx+32]
  !movdqa [edi+32],xmm0
  !movdqa xmm0,[edi+48]
  !paddq xmm0,[ebx+48]
  !movdqa [edi+48],xmm0

  !pop esi
  !pop edi
  !pop ebx
EndProcedure   

Procedure.s SHA_Calculate_32(*Source, Length.q, Mode)
  SizeAv.q = Length

  HMAC = Mode & $40
  File = Mode & $80

  pMem = Mode
  pMem & $FFFFFF00                     ;is Alignment 256, Byte0 = 0
  Mode & $3F

  If Length > $40000
    MemAv = $40000                     ;Split-Size 256KB
    LCopy = $40000
    If Mode < 2
      Chunks = $1000
     Else
      Chunks = $800
    EndIf
    ChunksOld = Chunks
   Else
    MemAv = Length
    LCopy = Length
  EndIf

  Buffer = AllocateMemory(832 + MemAv + 128)
  i = Buffer % 16
  If i
    BufferA = Buffer - i + 16          ;Alignment 16
   Else
    BufferA = Buffer
  EndIf

  Select Mode                          ;set HashBase
    Case 0
      HashBase = ?HashBase_224
      LHB = 32                         ;Length HashBase
      PokeL(BufferA + 24, ?k_224_256)
      SZ = 24                          ;Loop-Counter
      ProgFac.d = 0.064                ;for Calculation-Progress
    Case 1
      HashBase = ?HashBase_256
      LHB = 32                         ;Length HashBase
      PokeL(BufferA + 24, ?k_224_256)
      SZ = 28                          ;Loop-Counter
      ProgFac.d = 0.064                ;for Calculation-Progress
    Case 2
      HashBase = ?HashBase_384
      LHB = 64                         ;Length HashBase
      PokeL(BufferA + 24, ?k_384_512)
      SZ = 40                          ;Loop-Counter
      ProgFac.d = 0.128                ;for Calculation-Progress
    Case 3
      HashBase = ?HashBase_512
      LHB = 64                         ;Length HashBase
      PokeL(BufferA + 24, ?k_384_512)
      SZ = 56                          ;Loop-Counter
      ProgFac.d = 0.128                ;for Calculation-Progress
  EndSelect

  CopyMemory(HashBase, BufferA + 112, LHB)  ;to pHashA

  PokeQ(BufferA, Length)

  pAllBlocksStart = 832
  PokeL(BufferA + 816, pAllBlocksStart)     ;Pointer in pAllBlocksA

  ChunksSum.d = 0
  ChunksSum1.d = 0     
  ChunksAll.d = (Length / (ProgFac * 1000)) + 1   ;for Calculation-Progress
  TF1 = 0
  TF2 = 0

  If HMAC
    HMAC = 0
    HMACVal = LHB << 1
    If File
      SizeAv - HMACVal
    EndIf
    CopyMemory(pMem, BufferA + pAllBlocksStart, HMACVal)   ;to pAllBlocksA
  EndIf

  If Length = 0                        ;damit auch Länge=0 berücksichtigt wird
    PokeL(BufferA + 16, MemAv)         ;MemAv=0
    If Mode < 2
      Padding_224_256(BufferA)
      Main_224_256(BufferA)
     Else
      Padding_384_512(BufferA)
      Main_384_512(BufferA)
    EndIf
  EndIf 

  Seek.q = 0

  While SizeAv > 0

    If File
      BytesRead = ReadData(*Source, BufferA + pAllBlocksStart + HMACVal, MemAv - HMACVal)     ;Datei in Speicher einlesen, *Source=File-Handle
      Seek + BytesRead
      FileSeek(*Source, Seek)       
      SizeAv - BytesRead
     Else
      CopyMemory(*Source + Seek, BufferA + pAllBlocksStart + HMACVal, LCopy - HMACVal)   ;to pAllBlocksA
      BytesCopy = LCopy
      Seek + BytesCopy
      SizeAv - BytesCopy
      If SizeAv < LCopy   
        LCopy = SizeAv 
      EndIf
    EndIf

    HMACVal = 0

    If SizeAv = 0
      PokeL(BufferA + 16, MemAv)
      If Mode < 2
        Padding_224_256(BufferA)
       Else
        Padding_384_512(BufferA)
      EndIf
      Chunks = PeekL(BufferA + 8)
    EndIf

    While Chunks                       ;512 Bit (64 Byte)
      If Mode < 2
        Main_224_256(BufferA)
       Else
        Main_384_512(BufferA)
      EndIf

      ChunksSum + 1                    ;Calculation-Progress
      TF = ElapsedMilliseconds()
      If TF - TF1 > 250
        If TF - TF2 > 500
          SetGadgetText(12, StrD((ChunksSum - ChunksSum1) * ProgFac / (TF - TF2), 3))
          SetGadgetText(15, StrD((ChunksSum / ChunksAll) * 100, 1) + "%")
          TF2 = TF
          ChunksSum1 = ChunksSum
        EndIf
        TF1 = TF
        SetGadgetState(14, (ChunksSum / ChunksAll) * 100)
      EndIf

      Chunks - 1
    Wend

    PokeL(BufferA + 816, pAllBlocksStart)

    If SizeAv <= MemAv
      MemAv = SizeAv
      NewMem = MemAv + 128
      For i = 0 To NewMem Step 4
        PokeL(BufferA + 832 + i, 0)    ;clear Buffer
      Next
     Else
      Chunks = ChunksOld
    EndIf

  Wend

  Hash$ = ""
  If Mode < 2                          ;224/256
    For i = 0 To SZ Step 4
      Hash$ + RSet(Hex(PeekL(BufferA + 112 + i) & $FFFFFFFF), 8, "0")
    Next
   Else                                ;384/512
    For i = 0 To SZ Step 8
      Hash$ + RSet(Hex(PeekQ(BufferA + 112 + i)), 16, "0")
    Next
  EndIf

  FreeMemory(Buffer)

 ProcedureReturn Hash$

DataSection   ;Read only, evtl. ohne
HashBase_224: ;The second 32 bits of the fractional parts of the square roots of the 9th through 16th primes 23...53, Big-Endian!
  Data.l $c1059ed8, $367cd507, $3070dd17, $f70e5939, $ffc00b31, $68581511, $64f98fa7, $befa4fa4 
HashBase_256: ;The first 32 bits of the fractional parts of the square roots of the first 8 primes 2...19, Big-Endian!
  Data.l $6a09e667, $bb67ae85, $3c6ef372, $a54ff53a, $510e527f, $9b05688c, $1f83d9ab, $5be0cd19
HashBase_384: ;The first 64 bits of the fractional parts of the square roots of the 9th through 16th primes 23...53, Big-Endian!
  Data.q $cbbb9d5dc1059ed8, $629a292a367cd507, $9159015a3070dd17, $152fecd8f70e5939
  Data.q $67332667ffc00b31, $8eb44a8768581511, $db0c2e0d64f98fa7, $47b5481dbefa4fa4 
HashBase_512: ;The first 64 bits of the fractional parts of the square roots of the first 8 primes 2...19, Big-Endian!
  Data.q $6a09e667f3bcc908, $bb67ae8584caa73b, $3c6ef372fe94f82b, $a54ff53a5f1d36f1
  Data.q $510e527fade682d1, $9b05688c2b3e6c1f, $1f83d9abfb41bd6b, $5be0cd19137e2179
k_224_256:    ;The first 32 bits of the fractional parts of the cube roots of the first 64 primes 2...311, Big-Endian!
  Data.l $428a2f98, $71374491, $b5c0fbcf, $e9b5dba5, $3956c25b, $59f111f1, $923f82a4, $ab1c5ed5
  Data.l $d807aa98, $12835b01, $243185be, $550c7dc3, $72be5d74, $80deb1fe, $9bdc06a7, $c19bf174
  Data.l $e49b69c1, $efbe4786, $0fc19dc6, $240ca1cc, $2de92c6f, $4a7484aa, $5cb0a9dc, $76f988da
  Data.l $983e5152, $a831c66d, $b00327c8, $bf597fc7, $c6e00bf3, $d5a79147, $06ca6351, $14292967
  Data.l $27b70a85, $2e1b2138, $4d2c6dfc, $53380d13, $650a7354, $766a0abb, $81c2c92e, $92722c85
  Data.l $a2bfe8a1, $a81a664b, $c24b8b70, $c76c51a3, $d192e819, $d6990624, $f40e3585, $106aa070
  Data.l $19a4c116, $1e376c08, $2748774c, $34b0bcb5, $391c0cb3, $4ed8aa4a, $5b9cca4f, $682e6ff3
  Data.l $748f82ee, $78a5636f, $84c87814, $8cc70208, $90befffa, $a4506ceb, $bef9a3f7, $c67178f2
k_384_512:    ;The first 64 bits of the fractional parts of the cube roots of the first 80 primes 2...409, Big-Endian!
  Data.q $428a2f98d728ae22, $7137449123ef65cd, $b5c0fbcfec4d3b2f, $e9b5dba58189dbbc
  Data.q $3956c25bf348b538, $59f111f1b605d019, $923f82a4af194f9b, $ab1c5ed5da6d8118
  Data.q $d807aa98a3030242, $12835b0145706fbe, $243185be4ee4b28c, $550c7dc3d5ffb4e2
  Data.q $72be5d74f27b896f, $80deb1fe3b1696b1, $9bdc06a725c71235, $c19bf174cf692694
  Data.q $e49b69c19ef14ad2, $efbe4786384f25e3, $0fc19dc68b8cd5b5, $240ca1cc77ac9c65
  Data.q $2de92c6f592b0275, $4a7484aa6ea6e483, $5cb0a9dcbd41fbd4, $76f988da831153b5
  Data.q $983e5152ee66dfab, $a831c66d2db43210, $b00327c898fb213f, $bf597fc7beef0ee4
  Data.q $c6e00bf33da88fc2, $d5a79147930aa725, $06ca6351e003826f, $142929670a0e6e70
  Data.q $27b70a8546d22ffc, $2e1b21385c26c926, $4d2c6dfc5ac42aed, $53380d139d95b3df
  Data.q $650a73548baf63de, $766a0abb3c77b2a8, $81c2c92e47edaee6, $92722c851482353b
  Data.q $a2bfe8a14cf10364, $a81a664bbc423001, $c24b8b70d0f89791, $c76c51a30654be30
  Data.q $d192e819d6ef5218, $d69906245565a910, $f40e35855771202a, $106aa07032bbd1b8
  Data.q $19a4c116b8d2d0c8, $1e376c085141ab53, $2748774cdf8eeb99, $34b0bcb5e19b48a8
  Data.q $391c0cb3c5c95a63, $4ed8aa4ae3418acb, $5b9cca4f7763e373, $682e6ff3d6b2b8a3
  Data.q $748f82ee5defb2fc, $78a5636f43172f60, $84c87814a1f0ab72, $8cc702081a6439ec
  Data.q $90befffa23631e28, $a4506cebde82bde9, $bef9a3f7b2c67915, $c67178f2e372532b
  Data.q $ca273eceea26619c, $d186b8c721c0c207, $eada7dd6cde0eb1e, $f57d4f7fee6ed178
  Data.q $06f067aa72176fba, $0a637dc5a2c898a6, $113f9804bef90dae, $1b710b35131c471b
  Data.q $28db77f523047d84, $32caab7b40c72493, $3c9ebe0a15c9bebc, $431d67c49c100d4c
  Data.q $4cc5d4becb3e42b6, $597f299cfc657e2a, $5fcb6fab3ad6faec, $6c44198c4a475817
EndDataSection
EndProcedure

Procedure Key_XOR_224_256(Buffer, Source)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Source]
  !movdqu xmm0,[edx]
  !mov eax,4
!No_Anonymus_Label_9: 
  !movdqa xmm1,[ecx]
  !pxor xmm1,xmm0
  !movdqa [ecx],xmm1
  !add ecx,16
  !dec eax
 !jnz No_Anonymus_Label_9
EndProcedure

Procedure Concatenation_224_256(Buffer, Hash)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Hash]
  !push ebx
  !mov ebx,8
!No_Anonymus_Label_10: 
  !mov eax,[edx]
  !bswap eax
  !mov [ecx],eax
  !add ecx,4
  !add edx,4
  !dec ebx
 !jnz No_Anonymus_Label_10
  !pop ebx
EndProcedure

Procedure Key_XOR_384_512(Buffer, Source)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Source]
  !movdqu xmm0,[edx]
  !mov eax,8
!No_Anonymus_Label_11: 
  !movdqa xmm1,[ecx]
  !pxor xmm1,xmm0
  !movdqa [ecx],xmm1
  !add ecx,16
  !dec eax
 !jnz No_Anonymus_Label_11
EndProcedure

Procedure Concatenation_384_512(Buffer, Hash)
  !mov ecx,[p.v_Buffer]
  !mov edx,[p.v_Hash]
  !mov eax,8
  !pxor xmm0,xmm0            ;set xmm0=0
!No_Anonymus_Label_12: 
  !movq xmm1,[edx]           ;BSWAP for fun with SSE2, or PSHUFB for 16 Bytes, but is SSSE3
  !punpcklbw xmm1,xmm0       ;8 Bytes from xmm1 to 8 words in xmm1 with high-byte=0
  !pshufhw xmm2,xmm1,00011011b
  !pshuflw xmm2,xmm2,00011011b
  !packuswb xmm2,xmm0
  !pshufd xmm2,xmm2,00000001b
  !movq [ecx],xmm2
  !add ecx,8
  !add edx,8
  !dec eax
 !jnz No_Anonymus_Label_12
EndProcedure

Procedure.s HMAC_Calculate_32(*Source, LengthSource.q, pKey, LengthKey, Mode)
  Select (Mode & $3F)                  ;HashBase setzen, evtl. File-Tag weg
    Case 0
      LB = 64                          ;Length Block
      LH = 28                          ;Length Hash
    Case 1
      LB = 64                          ;Length Block
      LH = 32                          ;Length Hash
    Case 2
      LB = 128                         ;Length Block
      LH = 48                          ;Length Hash
    Case 3
      LB = 128                         ;Length Block
      LH = 64                          ;Length Hash
  EndSelect

  pMem = AllocateMemory(LB + 256)

  i = pMem % 256
  If i
    pMemA = pMem - i + 256             ;Alignment 256
   Else
    pMemA = pMem
  EndIf

  If LengthKey <= LB
    CopyMemory(pKey, pMemA, LengthKey)
   Else
    HashKey$ = SHA_Calculate_32(pKey, LengthKey, Mode & $3F) 
    ;HashKey$ wieder zurück in Bytes
    pHashKey = AllocateMemory(LH + 16)
    i = pHashKey % 16
    If i
      pHashKeyA = pHashKey - i + 16    ;Alignment 16
     Else
      pHashKeyA = pHashKey
    EndIf
    If Mode < 2                        ;224/256
      For i = 0 To 28 Step 4
        PokeL(pHashKeyA + i, Val("$" + Mid(HashKey$, (i * 2 ) + 1, 8)) & $FFFFFFFF)
      Next
      Concatenation_224_256(pMemA, pHashKeyA)    ;bswap!
     Else                              ;384/512
      For i = 0 To 56 Step 8
        PokeQ(pHashKeyA + i, Val("$" + Mid(HashKey$, (i * 2 ) + 1, 16)))
      Next
      Concatenation_384_512(pMemA, pHashKeyA)    ;bswap!
    EndIf
  EndIf

  ;Key XOR ipad
  If Mode < 2                          ;224/256
    Key_XOR_224_256(pMemA, ?ipad)      ;$36
   Else
    Key_XOR_384_512(pMemA, ?ipad)      ;$36
  EndIf

  If LengthSource
    Hash1$ = SHA_Calculate_32(*Source, LengthSource + LB, Mode | pMemA | $40)
   Else
    Hash1$ = SHA_Calculate_32(pMemA, LB, Mode & $3F)
  EndIf

  FreeMemory(pMem)

  pMem = AllocateMemory(LB + LH + 16)
  i = pMem % 16
  If i
    pMemA = pMem - i + 16              ;Alignment 16
   Else
    pMemA = pMem
  EndIf

  If LengthKey <= LB
    CopyMemory(pKey, pMemA, LengthKey)
   Else
    If Mode < 2                        ;224/256
      Concatenation_224_256(pMemA, pHashKeyA)    ;bswap!
     Else
      Concatenation_384_512(pMemA, pHashKeyA)    ;bswap!
    EndIf
    FreeMemory(pHashKey)
  EndIf

  pHash1 = AllocateMemory(LH + 16)
  i = pHash1 % 16
  If i
    pHash1A = pHash1 - i + 16          ;Alignment 16
   Else
    pHash1A = pHash1
  EndIf

  Mode & $3F
  ;Hash1$ wieder zurück in Bytes
  If Mode < 2                          ;224/256
    For i = 0 To 28 Step 4
      PokeL(pHash1A + i, Val("$" + Mid(Hash1$, (i * 2 ) + 1, 8)) & $FFFFFFFF)
    Next
    Key_XOR_224_256(pMemA, ?opad)      ;$5C
    Concatenation_224_256(pMemA + LB, pHash1A)   ;bswap!
   Else                                ;384/512
    For i = 0 To 56 Step 8
      PokeQ(pHash1A + i, Val("$" + Mid(Hash1$, (i * 2 ) + 1, 16)))
    Next
    Key_XOR_384_512(pMemA, ?opad)      ;$5C
    Concatenation_384_512(pMemA + LB, pHash1A)   ;bswap!
  EndIf

  FreeMemory(pHash1)

  HMAC$ = SHA_Calculate_32(pMemA, LB + LH, Mode)

  FreeMemory(pMem)

 ProcedureReturn HMAC$

DataSection
ipad:
  Data.q $3636363636363636, $3636363636363636
opad:
  Data.q $5c5c5c5c5c5c5c5c, $5c5c5c5c5c5c5c5c 
EndDataSection
EndProcedure

If OpenWindow(0, 0, 0, 1000, 480, "Helles SHA-2/HMAC, 32-Bit-Windows", #PB_Window_MinimizeGadget | #PB_Window_ScreenCentered)
  Dim Mode.s(3)
  Mode(0) = "SHA-224"
  Mode(1) = "SHA-256"
  Mode(2) = "SHA-384"
  Mode(3) = "SHA-512"

Repeat                                           ;hier rigoros für neue Abfrage

  OptionGadget(0, 10, 40, 80, 20, Mode(0))
  OptionGadget(1, 10, 65, 80, 20, Mode(1))
  OptionGadget(2, 10, 90, 80, 20, Mode(2))
  OptionGadget(3, 10, 115, 80, 20, Mode(3))
  SetGadgetState(1, 1)
 
  CheckBoxGadget(19, 10, 290, 50, 20, "HMAC")   
  TextGadget(20, 30, 325, 80, 15, "HMAC-Key : ") 
  EditorGadget(21, 110, 320, 880, 20)
  DisableGadget(20, 1)
  DisableGadget(21, 1)
   
  PanelGadget(4, 110, 10, 880, 300)
    AddGadgetItem (4, -1, "Text ")
      EditorGadget(5, 10, 10, 855, 250)
    AddGadgetItem (4, -1, "Insert Clipboard ")
      EditorGadget(6, 10, 10, 855, 250)
    AddGadgetItem (4, -1,"Select a File ")
     TextGadget(7, 10, 10, 230, 15, "Select a File :")
     ExplorerTreeGadget(8, 10, 30, 855, 230, "", #PB_Explorer_NoDriveRequester)
     
  CloseGadgetList()

  SetActiveGadget(5)
 
  ButtonGadget(9, 450, 380, 100, 20, "Start", #PB_Button_Toggle)
 
  Repeat
    Event = WaitWindowEvent()

    If GetGadgetState(4) = 2 And GetGadgetState(8) <> #PB_Explorer_File
      DisableGadget(9, 1)
      If Toggle
        Toggle ! 1       
      EndIf
     Else
      DisableGadget(9, 0)
    EndIf
   
    If GetGadgetState(4) = 1 And Toggle = 0
      SetGadgetText(6, GetClipboardText())           
      Toggle ! 1 
     ElseIf GetGadgetState(4) = 0 
      If Toggle
        Toggle ! 1       
      EndIf     
    EndIf 
   
    If GetGadgetState(19) = 1 And Toggle2 = 0   
      DisableGadget(20, 0)     
      DisableGadget(21, 0)
      SetActiveGadget(21)
      HMAC = 1
      Toggle2 ! 1       
     ElseIf GetGadgetState(19) = 0 
      DisableGadget(20, 1)       
      DisableGadget(21, 1)       
      HMAC = 0
      If Toggle2
        Toggle2 ! 1       
      EndIf             
    EndIf
   
    If Event = #PB_Event_CloseWindow
      End
    EndIf

    If GetGadgetState(8) = #PB_Explorer_File     ;Vorab-Test
      File = ReadFile(#PB_Any, GetGadgetText(8))
      If File = 0 
        MessageRequester("Error !", "File Access to ´" + GetGadgetText(8) + "´ denied (System-File?)")
        SetGadgetText(8, "")
       Else
        CloseFile(File)
        File = 0
      EndIf   
    EndIf
  Until GetGadgetState(9) Or (EventType() = #PB_EventType_LeftDoubleClick And GetGadgetState(8) = #PB_Explorer_File)
 
  FreeGadget(9)

  Select GetGadgetState(4)
    Case 0
      Input$ = GetGadgetText(5)
    Case 1
      Input$ = GetGadgetText(6)
    Case 2
      File = 1
  EndSelect

  For Mode = 0 To 3                    ;0=224 usw.
    If GetGadgetState(Mode)
      SHA$ = Mode(Mode)
      Break
    EndIf
  Next

  If HMAC
    Key$ = GetGadgetText(21)
    pKey = @Key$
    SizeKey = Len(Key$)
    HS$ = "HMAC-" + SHA$
   Else
    HS$ = SHA$
  EndIf
  HS$ + " : "

  If File
    File$ = GetGadgetText(8)
    *Source = ReadFile(#PB_Any, File$) ;*Source = File-Handle
    Size.q = Lof(*Source)
    Mode + $80                         ;Tag für File
   Else
    Size.q = Len(Input$)
    File$ = "String"
    *Source = @Input$
  EndIf

  If Size
    SizeP$ = Str(Size)                 ;für Anzeige File-Länge < 1000 Bytes

    TextGadget(9, 10, 350, 960, 20, "File : " + File$)
    Size$ = Str(Size)                    ;for decimal points
    LS = Len(Size$)
    LSMod = LS % 3
    If LSMod = 0
      LSMod = 3
    EndIf
    i = 3
    j = 1
    While LS - i > 0
      SizeP$ =  InsertString(Size$, ".", LSMod + j)
      i + 3
      Size$ = SizeP$
      j + 4
    Wend
    TextGadget(10, 10, 370, 560, 20, "Length : " + SizeP$ + " Bytes")
    TextGadget(11, 10, 390, 100, 20, "Throughput (MB/s) : ")
    TextGadget(12, 110,390, 100, 20, "")
    TextGadget(13, 425,410, 200, 20, "Calculation-Progress :")
    ProgressBarGadget(14, 25,435, 950, 25, 0, 100, #PB_ProgressBar_Smooth)
    TextGadget(15, 550, 410, 50, 20, "") 

    ;==================================================================
    TA = ElapsedMilliseconds()         ;or more precision (short files/strings!)

    If GetGadgetState(19)              ;HMAC
      Hash$ = HMAC_Calculate_32(*Source, Size, pKey, SizeKey, Mode)
     Else                              ;SHA
      Hash$ = SHA_Calculate_32(*Source, Size, Mode)
    EndIf

    TE = ElapsedMilliseconds() - TA
    ;==================================================================

    If File
      CloseFile(*Source)
      File$ = ""
      File = 0
    EndIf

    FreeGadget(13) : FreeGadget(14) : FreeGadget(15)  ;Calculation-Progress

    TextGadget(13, 10, 430, 960, 20, HS$ + Hash$)
    TE$ = Str(TE)                      ;for decimal points
    LT = Len(TE$)
    LTMod = LT % 3
    If LTMod = 0
      LTMod = 3
    EndIf
    i = 3
    j = 1
    While LT - i > 0
      TEP$ =  InsertString(TE$, ".", LTMod + j)
      i + 3
      TE$ = TEP$
      j + 4
    Wend
    SetGadgetText(12, StrF(Size / ((TE + 1) * 1000), 3))
    TextGadget(14, 10, 410, 560, 20, "Elapsed Time : " +  TE$ + " ms")

    For i = 0 To 8
      DisableGadget(i, 1)
    Next

    DisableGadget(19, 1)               ;HMAC
    DisableGadget(20, 1)
    DisableGadget(21, 1)

    ButtonGadget(16, 125, 450, 250, 20, "Copy Result to Clipboard", #PB_Button_Toggle)
    ButtonGadget(17, 425, 445, 150, 30, "E N D", #PB_Button_Toggle) 
    ButtonGadget(18, 625, 450, 250, 20, "New Selection", #PB_Button_Toggle)

    Repeat
      Event = WaitWindowEvent()
      If GetGadgetState(16)
        SetClipboardText(Hash$)
        SetGadgetState(16, 0)
      EndIf
      If GetGadgetState(17)
        Quit = 1
        Break
      EndIf
      If GetGadgetState(18)
        For i = 0 To 21
          If IsGadget(i)
            FreeGadget(i)
          EndIf
        Next
        Break
      EndIf
      If Event = #PB_Event_CloseWindow
        Quit = 1
        Break
      EndIf
    ForEver
  EndIf
Until Quit

EndIf
End


Have fun!
Helle
Melow
New User
New User
Posts: 6
Joined: Thu Aug 29, 2013 11:51 am

Re: SHA-2/HMAC for 32-Bit-Windows

Post by Melow »

Helle... works great now also on OSX 32bit.
Thank you very much.

Just one question (because i'm not able to "decrypt" the ASM code :) )

Is SSE2 required or does the code also run on non-SSE2 CPU's?

Thank you again for fxing the "@@" issue...

Greetings
Melow
:)
Melow
New User
New User
Posts: 6
Joined: Thu Aug 29, 2013 11:51 am

Re: SHA-2/HMAC for 32-Bit-Windows

Post by Melow »

Hi Helle, it's me again :)

on large *Sources it seems some HMAC calculations goes wrong.

For example, the SHA256 HMAC hex-digest of 1MB NULL-Bytes filled data, with a 32 byte NULL-Byte filed *key should be:
F7B04DA1D2F0C3F59F20FE10115CB839C7DB041333B78ADCE37E7FBC83222B2B

But HMAC_Calculate_32() calculates:
563041F58DC6ADFFC8A59708A662F1245795013D12C2544A475789B92D307DFE

PB Testsource:

Code: Select all

Define len.q = 1024*1024
Define *Source = AllocateMemory(len)
Define *key = AllocateMemory(32)
Debug "HMAC 256:" + #CRLF$ + HMAC_Calculate_32(*Source, len, *key, 32, 1) + #CRLF$  + "F7B04DA1D2F0C3F59F20FE10115CB839C7DB041333B78ADCE37E7FBC83222B2B" 
I was using this PHP code to verify the HMAC digest:

Code: Select all

echo 'HMAC 256: ' . strtoupper(hash_hmac('sha256', str_repeat("\0",1024*1024) , str_repeat("\0",32)));
Maybe, if you have time, you can have a look on this?


Greetings Melow :)
Helle
Enthusiast
Enthusiast
Posts: 178
Joined: Wed Apr 12, 2006 7:59 pm
Location: Germany
Contact:

Re: SHA-2/HMAC for 32-Bit-Windows

Post by Helle »

Hello Melow,
I get "F7B04DA1D2F0C3F59F20FE10115CB839C7DB041333B78ADCE37E7FBC83222B2B" with this test (replace the code from line 997 to 1101):

Code: Select all

 ; If Size  LINE 997
    SizeP$ = Str(Size)                 ;für Anzeige File-Länge < 1000 Bytes

    TextGadget(9, 10, 350, 960, 20, "File : " + File$)
    Size$ = Str(Size)                    ;for decimal points
    LS = Len(Size$)
    LSMod = LS % 3
    If LSMod = 0
      LSMod = 3
    EndIf
    i = 3
    j = 1
    While LS - i > 0
      SizeP$ =  InsertString(Size$, ".", LSMod + j)
      i + 3
      Size$ = SizeP$
      j + 4
    Wend
    TextGadget(10, 10, 370, 560, 20, "Length : " + SizeP$ + " Bytes")
    TextGadget(11, 10, 390, 100, 20, "Throughput (MB/s) : ")
    TextGadget(12, 110,390, 100, 20, "")
    TextGadget(13, 425,410, 200, 20, "Calculation-Progress :")
    ProgressBarGadget(14, 25,435, 950, 25, 0, 100, #PB_ProgressBar_Smooth)
    TextGadget(15, 550, 410, 50, 20, "") 

    ;==================================================================
    TA = ElapsedMilliseconds()         ;or more precision (short files/strings!)

;    If GetGadgetState(19)              ;HMAC
;      Hash$ = HMAC_Calculate_32(*Source, Size, pKey, SizeKey, Mode)
;     Else                              ;SHA
;      Hash$ = SHA_Calculate_32(*Source, Size, Mode)
;    EndIf

Define len.q = 1024*1024
Define *Source = AllocateMemory(len)
Define *key = AllocateMemory(32)

Debug "HMAC 256:" + #CRLF$ + HMAC_Calculate_32(*Source, len, *key, 32, 1) + #CRLF$  + "F7B04DA1D2F0C3F59F20FE10115CB839C7DB041333B78ADCE37E7FBC83222B2B"

    TE = ElapsedMilliseconds() - TA
    ;==================================================================

    If File
      CloseFile(*Source)
      File$ = ""
      File = 0
    EndIf

    FreeGadget(13) : FreeGadget(14) : FreeGadget(15)  ;Calculation-Progress

    TextGadget(13, 10, 430, 960, 20, HS$ + Hash$)
    TE$ = Str(TE)                      ;for decimal points
    LT = Len(TE$)
    LTMod = LT % 3
    If LTMod = 0
      LTMod = 3
    EndIf
    i = 3
    j = 1
    While LT - i > 0
      TEP$ =  InsertString(TE$, ".", LTMod + j)
      i + 3
      TE$ = TEP$
      j + 4
    Wend
    SetGadgetText(12, StrF(Size / ((TE + 1) * 1000), 3))
    TextGadget(14, 10, 410, 560, 20, "Elapsed Time : " +  TE$ + " ms")

    For i = 0 To 8
      DisableGadget(i, 1)
    Next

    DisableGadget(19, 1)               ;HMAC
    DisableGadget(20, 1)
    DisableGadget(21, 1)

    ButtonGadget(16, 125, 450, 250, 20, "Copy Result to Clipboard", #PB_Button_Toggle)
    ButtonGadget(17, 425, 445, 150, 30, "E N D", #PB_Button_Toggle) 
    ButtonGadget(18, 625, 450, 250, 20, "New Selection", #PB_Button_Toggle)

    Repeat
      Event = WaitWindowEvent()
      If GetGadgetState(16)
        SetClipboardText(Hash$)
        SetGadgetState(16, 0)
      EndIf
      If GetGadgetState(17)
        Quit = 1
        Break
      EndIf
      If GetGadgetState(18)
        For i = 0 To 21
          If IsGadget(i)
            FreeGadget(i)
          EndIf
        Next
        Break
      EndIf
      If Event = #PB_Event_CloseWindow
        Quit = 1
        Break
      EndIf
    ForEver
  ;EndIf   LINE 1101
Tested with Windows XP/SP3 in a VM (Oracle Virtual Box).
BTW, I use SSE2 (e.g. the shuffle-instructions) for a better performance. This is supported since over 10 years in CPU´s from Intel and AMD.
Greetings Helle
Melow
New User
New User
Posts: 6
Joined: Thu Aug 29, 2013 11:51 am

Re: SHA-2/HMAC for 32-Bit-Windows

Post by Melow »

Hi Helle,

confirmed.

I was testing it on Win 7 64bit but with the PB 32bit compiler.
The wrong calculations occurs then only if the PB-Debugger = On... i just figured out.

Running the code without the PB-Debugger, then all = OK.

So it seems that this (PB Bug?) will happens only on a 64bit machine with a PB 32bit compiler + Debug enabled
Mhhh... but good to know...

Thank you again Helle
Greetings
Melow :)
Post Reply