I made a few small changes.
The callback procedure if specified, was called every 128 bytes which is very often if you process a large file.
I made a few changes so it is called less often and also added a call to the callback with 100% at the end since users using a callback might expect that. Currently it didn't always reach 100.
The other change I made is that I changed the LCase / RSet / Hex with one custom function that takes three parameters
First is the address to read from, second the number of bytes to process, third the buffer to place the result in.
Code: Select all
;====================================================================================
;
; Library Commands: sha512FingerPrint()
; sha512FileFingerPrint()
; sha384FingerPrint()
; sha384FileFingerPrint()
;
; Authors: Lloyd Gallant (netmaestro) and Wilbert
;
; Contributors: Thanks to Danilo, thorium, infratec and idle
; for their help with the asm routines
; and to Christopher Devine for the
; c code this program is based on.
;
; Date: August 15, 2011
; Target Compiler: Purebasic 4 and up
; Target OS: Windows, Linux, MacOS
;
; License: GNU General Public License
;
; This program is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License As published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY Or FITNESS For A PARTICULAR PURPOSE. See the
; GNU General Public License For more details.
;
; The logic for this program is based on sha256.c found here:
;
; http://www.spale.com/download/scrypt/scrypt1.0/
;
; You can test the accuracy of this program by comparing results with
; test data at:
;
; http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA2_Additional.pdf
;
;===================================================================================
;
;
; Usage:
;
; result$ = sha512Fingerprint(*address, length, [ ,*progress ] )
; result$ = sha512FileFingerprint(file$, [ ,*progress ] )
;
; result$ = sha384Fingerprint(*address, length, [ ,*progress ] )
; result$ = sha384FileFingerprint(file$, [ ,*progress ] )
;
; Progress callback function:
;
; Procedure MyCallBack(value.i)
; ; value is 0 to 100 representing percentage completed
; Endprocedure
;
;
;================================================================
; STRUCTURES
;================================================================
Structure sha512_context
state.q [8]
total.q
buffer.a [128]
EndStructure
Structure UINT8_BUFFER
b.a[128]
EndStructure
Structure UINT64_BUFFER
w.q[80]
EndStructure
Structure msglen
lowpart.q
highpart.q
EndStructure
;================================================================
; HELPER MACROS
;================================================================
Macro SIGMA0M(reg) ; By wilbert
!movq xmm7, reg
!pshufd reg, reg, 0x14; 0 - 1 - 1 - 0
; rotate right 1
!psrlq reg, 1
!pshufd reg, reg, 0x28; 0 - 2 - 2 - 0
!movdqa xmm6, reg
; delta rotate 8 - 1 = 7 right
!psrlq xmm6, 7
!pshufd xmm6, xmm6, 0x28; 0 - 2 - 2 - 0
!pxor reg, xmm6
; shift right 7
!psrlq xmm7, 7
!pxor reg, xmm7
EndMacro
Macro SIGMA1M(reg) ; By wilbert
!movq xmm7, reg
!pshufd reg, reg, 0x14; 0 - 1 - 1 - 0
; rotate right 19
!psrlq reg, 19
!pshufd reg, reg, 0x28; 0 - 2 - 2 - 0
!movdqa xmm6, reg
; delta rotate 61 - 19 = 42 right = 22 left
!pshufd xmm6, xmm6, 0x41; 1 - 0 - 0 - 1
!psllq xmm6, 22
!pshufd xmm6, xmm6, 0x7d; 1 - 3 - 3 - 1
!pxor reg, xmm6
; shift right 6
!psrlq xmm7, 6
!pxor reg, xmm7
EndMacro
Macro SIGMA2M(reg) ; By wilbert
!pshufd reg, reg, 0x14; 0 - 1 - 1 - 0
; rotate right 28
!psrlq reg, 28
!pshufd reg, reg, 0x28; 0 - 2 - 2 - 0
!movdqa xmm7, reg
; delta rotate 34 - 28 = 6 right
!psrlq xmm7, 6
!pshufd xmm7, xmm7, 0x28; 0 - 2 - 2 - 0
!pxor reg, xmm7
; delta rotate 39 - 34 = 5 right
!psrlq xmm7, 5
!pshufd xmm7, xmm7, 0x28; 0 - 2 - 2 - 0
!pxor reg, xmm7
EndMacro
Macro SIGMA3M(reg) ; By wilbert
!pshufd reg, reg, 0x14; 0 - 1 - 1 - 0
; rotate right 14
!psrlq reg, 14
!pshufd reg, reg, 0x28; 0 - 2 - 2 - 0
!movdqa xmm7, reg
; delta rotate 18 - 14 = 4 right
!psrlq xmm7, 4
!pshufd xmm7, xmm7, 0x28; 0 - 2 - 2 - 0
!pxor reg, xmm7
; delta rotate 41 - 18 = 23 right
!psrlq xmm7, 23
!pshufd xmm7, xmm7, 0x28; 0 - 2 - 2 - 0
!pxor reg, xmm7
EndMacro
Macro P(a,b,c,d,e,f,g,h, offset) ; By wilbert
!movq2dq xmm0, a
!movq2dq xmm1, e
!movq xmm6, xmm0
!movq xmm5, xmm1
SIGMA2M(xmm0); xmm0 = temp1 = Sigma2(a)
SIGMA3M(xmm1); xmm1 = temp2 = Sigma3(e)
!movq2dq xmm2, h
!paddq xmm1, xmm2; temp2 + h
!movq xmm2, [esi + ecx + offset]
!paddq xmm1, xmm2; temp2 + *sha512constants [ ]
!movq xmm2, [edi + ecx + offset]
!paddq xmm1, xmm2; temp2 + *w [ ]
; F0 = ((a & b) | (c & (a | b)))
!movq2dq xmm3, b
!movq xmm4, xmm6
!por xmm4, xmm3; xmm4 = a | b
!pand xmm6, xmm3; xmm6 = a & b
!movq2dq xmm3, c
!pand xmm3, xmm4; xmm3 = c & (a | b)
!por xmm6, xmm3; xmm6 = F0
!paddq xmm0, xmm6; temp1 + F0
; F1 = (g ! (e & (f ! g)))
!movq2dq xmm2, f
!movq2dq xmm3, g
!pxor xmm2, xmm3; xmm2 = f ! g
!pand xmm5, xmm2; xmm5 = e & (f ! g)
!pxor xmm3, xmm5; xmm3 = F1
!paddq xmm1, xmm3; temp2 + F1
!movq2dq xmm2, d
!paddq xmm2, xmm1
!movdq2q d, xmm2; d + temp2
!paddq xmm0, xmm1
!movdq2q h, xmm0; h = temp 1 + temp2
EndMacro
Macro ADD_RESULT(reg, offset) ; By wilbert
!paddq reg, [edx + offset]
!movq [edx + offset], reg
EndMacro
Macro DEF_Q(h1, l1, h2, l2, h3, l3, h4, l4) ; By wilbert
!dd l1,h1,l2,h2,l3,h3,l4,h4
EndMacro
DataSection
!sha512constants:
DEF_Q(0x428a2f98,0xd728ae22 , 0x71374491,0x23ef65cd , 0xb5c0fbcf,0xec4d3b2f , 0xe9b5dba5,0x8189dbbc)
DEF_Q(0x3956c25b,0xf348b538 , 0x59f111f1,0xb605d019 , 0x923f82a4,0xaf194f9b , 0xab1c5ed5,0xda6d8118)
DEF_Q(0xd807aa98,0xa3030242 , 0x12835b01,0x45706fbe , 0x243185be,0x4ee4b28c , 0x550c7dc3,0xd5ffb4e2)
DEF_Q(0x72be5d74,0xf27b896f , 0x80deb1fe,0x3b1696b1 , 0x9bdc06a7,0x25c71235 , 0xc19bf174,0xcf692694)
DEF_Q(0xe49b69c1,0x9ef14ad2 , 0xefbe4786,0x384f25e3 , 0x0fc19dc6,0x8b8cd5b5 , 0x240ca1cc,0x77ac9c65)
DEF_Q(0x2de92c6f,0x592b0275 , 0x4a7484aa,0x6ea6e483 , 0x5cb0a9dc,0xbd41fbd4 , 0x76f988da,0x831153b5)
DEF_Q(0x983e5152,0xee66dfab , 0xa831c66d,0x2db43210 , 0xb00327c8,0x98fb213f , 0xbf597fc7,0xbeef0ee4)
DEF_Q(0xc6e00bf3,0x3da88fc2 , 0xd5a79147,0x930aa725 , 0x06ca6351,0xe003826f , 0x14292967,0x0a0e6e70)
DEF_Q(0x27b70a85,0x46d22ffc , 0x2e1b2138,0x5c26c926 , 0x4d2c6dfc,0x5ac42aed , 0x53380d13,0x9d95b3df)
DEF_Q(0x650a7354,0x8baf63de , 0x766a0abb,0x3c77b2a8 , 0x81c2c92e,0x47edaee6 , 0x92722c85,0x1482353b)
DEF_Q(0xa2bfe8a1,0x4cf10364 , 0xa81a664b,0xbc423001 , 0xc24b8b70,0xd0f89791 , 0xc76c51a3,0x0654be30)
DEF_Q(0xd192e819,0xd6ef5218 , 0xd6990624,0x5565a910 , 0xf40e3585,0x5771202a , 0x106aa070,0x32bbd1b8)
DEF_Q(0x19a4c116,0xb8d2d0c8 , 0x1e376c08,0x5141ab53 , 0x2748774c,0xdf8eeb99 , 0x34b0bcb5,0xe19b48a8)
DEF_Q(0x391c0cb3,0xc5c95a63 , 0x4ed8aa4a,0xe3418acb , 0x5b9cca4f,0x7763e373 , 0x682e6ff3,0xd6b2b8a3)
DEF_Q(0x748f82ee,0x5defb2fc , 0x78a5636f,0x43172f60 , 0x84c87814,0xa1f0ab72 , 0x8cc70208,0x1a6439ec)
DEF_Q(0x90befffa,0x23631e28 , 0xa4506ceb,0xde82bde9 , 0xbef9a3f7,0xb2c67915 , 0xc67178f2,0xe372532b)
DEF_Q(0xca273ece,0xea26619c , 0xd186b8c7,0x21c0c207 , 0xeada7dd6,0xcde0eb1e , 0xf57d4f7f,0xee6ed178)
DEF_Q(0x06f067aa,0x72176fba , 0x0a637dc5,0xa2c898a6 , 0x113f9804,0xbef90dae , 0x1b710b35,0x131c471b)
DEF_Q(0x28db77f5,0x23047d84 , 0x32caab7b,0x40c72493 , 0x3c9ebe0a,0x15c9bebc , 0x431d67c4,0x9c100d4c)
DEF_Q(0x4cc5d4be,0xcb3e42b6 , 0x597f299c,0xfc657e2a , 0x5fcb6fab,0x3ad6faec , 0x6c44198c,0x4a475817)
EndDataSection
Procedure HexBytesAddr__() ; By Wilbert
!mov eax,hexBytes_start
ProcedureReturn
!hexBytes_start:
!push esi
!push edi
!mov esi, [esp + 12]
!mov ecx, [esp + 16]
!mov edi, [esp + 20]
!push edi
!hexBytes_loop:
!lodsb
!mov ah, al
!shr al, 4
!and ax, 0x0f0f
!or ax, 0x3030
!cmp ah, 0x3a
!jb hexBytes_1
!add ah, 39
!hexBytes_1:
!cmp al, 0x3a
!jb hexBytes_2
!add al, 39
!hexBytes_2:
!stosw
!loop hexBytes_loop
!pop eax
!pop edi
!pop esi
!ret
EndProcedure
PrototypeC.l ProtoHexBytes(*addr, numBytes, *buffer)
Global HexBytes.ProtoHexBytes = HexBytesAddr__()
Procedure ChgEnd64Addr__() ; By idle
; small adaptation by Wilbert to
; work around a Purebasic OS X bug
!mov eax,ChgEnd64_start
ProcedureReturn
!ChgEnd64_start:
!mov eax, [esp + 8]
!mov edx, [esp + 4]
!bswap eax
!bswap edx
!ret
EndProcedure
PrototypeC.q ProtoChgEnd64(value.q)
Global ChangeEndian64.ProtoChgEnd64 = ChgEnd64Addr__()
;================================================================
; LOCAL FUNCTIONS
;================================================================
Procedure sha384_starts(*ctx.sha512_context )
*ctx\state[0] = $cbbb9d5dc1059ed8
*ctx\state[1] = $629a292a367cd507
*ctx\state[2] = $9159015a3070dd17
*ctx\state[3] = $152fecd8f70e5939
*ctx\state[4] = $67332667ffc00b31
*ctx\state[5] = $8eb44a8768581511
*ctx\state[6] = $db0c2e0d64f98fa7
*ctx\state[7] = $47b5481dbefa4fa4
EndProcedure
Procedure sha512_starts( *ctx.sha512_context )
*ctx\state[0] = $6a09e667f3bcc908
*ctx\state[1] = $bb67ae8584caa73b
*ctx\state[2] = $3c6ef372fe94f82b
*ctx\state[3] = $a54ff53a5f1d36f1
*ctx\state[4] = $510e527fade682d1
*ctx\state[5] = $9b05688c2b3e6c1f
*ctx\state[6] = $1f83d9abfb41bd6b
*ctx\state[7] = $5be0cd19137e2179
EndProcedure
Procedure sha512_process_addr__() ; By wilbert
!mov eax, sha512_process_start
ProcedureReturn
!sha512_process_start:
!push esi
!push edi
!mov edx, [esp + 12]; edx = *ctx.sha512_context
!mov esi, [esp + 16]; esi = *bytes.UINT64_BUFFER
!sub esp, 656
!mov edi, esp
!add edi, 0xf
!and edi, 0xfffffff0; edi = *w.UINT64_BUFFER
; copy from *bytes to *w and change endian
!mov ecx, 120
!changeEndianLoop:
!mov eax, [esi + ecx]
!bswap eax
!mov [edi + ecx + 4], eax
!mov eax, [esi + ecx + 4]
!bswap eax
!mov [edi + ecx], eax
!sub ecx, 8
!jnc changeEndianLoop
; sigma 0 & sigma1 loop
!mov ecx, 128
!sigma01Loop:
!movq xmm2, [edi + ecx - 128]; w\w[t-16]
!movq xmm0, [edi + ecx - 120]; w\w[t-15]
!movq xmm3, [edi + ecx - 56] ; w\w[t-7]
!movq xmm1, [edi + ecx - 16] ; w\w[t-2]
SIGMA0M(xmm0)
SIGMA1M(xmm1)
!paddq xmm0, xmm1
!paddq xmm0, xmm2
!paddq xmm0, xmm3
!movq [edi + ecx], xmm0
!add ecx, 8
!cmp ecx, 640
!jne sigma01Loop
!movq mm0, [edx]
!movq mm1, [edx + 8]
!movq mm2, [edx + 16]
!movq mm3, [edx + 24]
!movq mm4, [edx + 32]
!movq mm5, [edx + 40]
!movq mm6, [edx + 48]
!movq mm7, [edx + 56]
!mov esi, sha512constants; esi = *sha512constants
!xor ecx, ecx
; loop 10 times
!sigma23Loop:
P( mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, 0)
P( mm7, mm0, mm1, mm2, mm3, mm4, mm5, mm6, 8)
P( mm6, mm7, mm0, mm1, mm2, mm3, mm4, mm5, 16)
P( mm5, mm6, mm7, mm0, mm1, mm2, mm3, mm4, 24)
P( mm4, mm5, mm6, mm7, mm0, mm1, mm2, mm3, 32)
P( mm3, mm4, mm5, mm6, mm7, mm0, mm1, mm2, 40)
P( mm2, mm3, mm4, mm5, mm6, mm7, mm0, mm1, 48)
P( mm1, mm2, mm3, mm4, mm5, mm6, mm7, mm0, 56)
!add ecx, 64
!cmp ecx, 640
!jne sigma23Loop
ADD_RESULT(mm0, 0)
ADD_RESULT(mm1, 8)
ADD_RESULT(mm2, 16)
ADD_RESULT(mm3, 24)
ADD_RESULT(mm4, 32)
ADD_RESULT(mm5, 40)
ADD_RESULT(mm6, 48)
ADD_RESULT(mm7, 56)
!emms
!add esp, 656
!pop edi
!pop esi
!ret
EndProcedure
PrototypeC sha512_process_proto(*ctx.sha512_context, *bytes.UINT64_BUFFER)
Global sha512_process.sha512_process_proto = sha512_process_addr__()
Procedure shaQuad_update( *ctx.sha512_context, *input, length, jobsize.q, *callback )
Static totalprocessed.q = 0
Define.l left, fill
If length=0 : ProcedureReturn : EndIf
left = *ctx\total & 127
fill = 128 - left
*ctx\total + length
If left And (length >= fill)
CopyMemory( *input, @*ctx\buffer[0]+left, fill )
sha512_process( *ctx, @*ctx\buffer[0] )
length - fill
*input + fill
left = 0
EndIf
While length >= 128
sha512_process( *ctx, *input )
length - 128
*input + 128
totalprocessed + 128
If *callback And totalprocessed & $ffff = 0
CallFunctionFast(*callback, 100 * totalprocessed / jobsize )
EndIf
Wend
If length
CopyMemory( *input, @*ctx\buffer[0]+left, length )
EndIf
EndProcedure
Procedure shaQuad_finish( *ctx.sha512_context, *digest.UINT64_BUFFER, jobsize.q, full, *callback )
Define.l last, padn
msglen.msglen
sha512_padding.UINT8_BUFFER
sha512_padding\b[0]=$80
msglen\highpart = ChangeEndian64(jobsize<<3)
last = *ctx\total & 127
If last<112
padn = 112-last
Else
padn = 240-last
EndIf
shaQuad_update( *ctx, @sha512_padding, padn, padn, *callback )
shaQuad_update( *ctx, @msglen, 16, 16, *callback )
For i = 0 To 7
*digest\w[i] = ChangeEndian64(*ctx\state[i])
Next
If *callback
CallFunctionFast(*callback, 100)
EndIf
EndProcedure
;================================================================
; EXPORTED FUNCTIONS
;================================================================
Procedure.s shaQuadFingerprint(*datapointer, Length, full, *callback=0) ; Data address, data size, [ ,<procaddress> ]
*ctx.sha512_context = AllocateMemory(SizeOf(sha512_context))
*sha512sum = AllocateMemory(64)
Protected digest.s
If full
sha512_starts( *ctx )
Else
sha384_starts( *ctx )
EndIf
shaQuad_update( *ctx, *datapointer, Length, Length, *callback )
shaQuad_finish( *ctx, *sha512sum, Length, full, *callback )
If full
iterations = 64
Else
iterations = 48
EndIf
*output = AllocateMemory(128)
digest = PeekS(HexBytes(*sha512sum, iterations, *output), iterations << 1, #PB_Ascii)
FreeMemory(*output)
FreeMemory(*sha512sum)
FreeMemory(*ctx)
ProcedureReturn digest
EndProcedure
ProcedureDLL.s sha512Fingerprint(*datapointer, Length, *callback=0) ; Data address, data size, [ ,<procaddress> ]
ProcedureReturn shaQuadFingerprint(*datapointer, Length, 1, *callback)
EndProcedure
ProcedureDLL.s sha384Fingerprint(*datapointer, Length, *callback=0) ; Data address, data size, [ ,<procaddress> ]
ProcedureReturn shaQuadFingerprint(*datapointer, Length, 0, *callback)
EndProcedure
Procedure.s shaQuadFileFingerprint(filename.s, full, *callback=0) ; filename$, [ ,<procaddress> ]
Protected digest.s
Protected *datapointer, *output, *ctx.sha512_context
Protected bytesread, fresult
Protected jobsize.q
*ctx = AllocateMemory(SizeOf(sha512_context))
*sha512sum = AllocateMemory(64)
*datapointer = AllocateMemory(4096)
fresult = OpenFile(#PB_Any, filename)
If fresult
jobsize = Lof(fresult)
If full
sha512_starts( *ctx )
Else
sha384_starts( *ctx )
EndIf
While Not Eof(fresult)
bytesread = ReadData(fresult, *datapointer, 4096)
shaQuad_update( *ctx, *datapointer, bytesread, jobsize, *callback )
Wend
shaQuad_finish( *ctx, *sha512sum, jobsize, full, *callback )
If full
iterations = 64
Else
iterations = 48
EndIf
*output = AllocateMemory(128)
digest = PeekS(HexBytes(*sha512sum, iterations, *output), iterations << 1, #PB_Ascii)
FreeMemory(*output)
CloseFile(fresult)
EndIf
FreeMemory(*datapointer)
FreeMemory(*sha512sum)
FreeMemory(*ctx)
ProcedureReturn digest
EndProcedure
ProcedureDLL.s sha512FileFingerprint(filename.s, *callback=0) ; Data address, data size, [ ,<procaddress> ]
ProcedureReturn shaQuadFileFingerprint(filename.s, 1, *callback)
EndProcedure
ProcedureDLL.s sha384FileFingerprint(filename.s, *callback=0) ; Data address, data size, [ ,<procaddress> ]
ProcedureReturn shaQuadFileFingerprint(filename.s, 0, *callback)
EndProcedure