https://groups.google.com/d/topic/fa.li ... UelRfl5hx4
This is a translation and SHA256-Test (SHA1 is out) with PureBasic:
Code: Select all
;SHA256-CPU-Instructions for Windows 64-Bit and Unicode-Test-String
;Tested with PB 5.61 (x64) Unicode and PB 5.45 LTS (x64) Unicode, CPU AMD Ryzen 7 1800X
;"Helle" Klaus Helbing, 04.12.2017
;Based on https://groups.google.com/d/topic/fa.linux.kernel/jUelRfl5hx4
;From this:
;This file is provided under a dual BSD/GPLv2 license. When using or
;redistributing this file, you may do so under either license.
;
;GPL LICENSE SUMMARY
;
;Copyright(c) 2015 Intel Corporation.
;
;This program is free software; you can redistribute it and/or modify
;it under the terms of version 2 of the GNU General Public License as
;published by the Free Software Foundation.
;
;This program is distributed in the hope that it will be useful, but
;WITHOUT ANY WARRANTY; without even the implied warranty of
;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;General Public License for more details.
;
;BSD LICENSE
;
;Copyright(c) 2015 Intel Corporation.
;
;Redistribution and use in source and binary forms, with or without
;modification, are permitted provided that the following conditions
;are met:
;
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in
; the documentation and/or other materials provided with the
; distribution.
; * Neither the name of Intel Corporation nor the names of its
; contributors may be used to endorse or promote products derived
; from this software without specific prior written permission.
;
;THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
;LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
;DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
;THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
;OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;------------------------------------------------------------------------------
;Check for CPU-SHA-Instructions:
!mov eax,7
!xor ecx,ecx
!cpuid
!test ebx,20000000h ;Bit29 SHA
!jnz IsSHA
MessageRequester("Ooops!", "No CPU-Support for SHA-Instructions!" + #CRLF$ + "End")
End
!IsSHA:
;Check for Unicode
Declare.s CPU_SHA256(pSource.q)
;Generate a long Test-String, Unicode for this test
Source$ = "The quick brown fox jumps over the lazy dog" ;Or your own
For i = 1 To 23
Source$ + Source$
Next
;Test with CPU-Instructions
TA_CPU = ElapsedMilliseconds()
Res_CPU$ = CPU_SHA256(@Source$) ;Pointer to string
TE_CPU = ElapsedMilliseconds() - TA_CPU
;Test with PB
UseSHA2Fingerprint()
TA_PB = ElapsedMilliseconds()
Res_PB$ = UCase(StringFingerprint(Source$, #PB_Cipher_SHA2, 256))
TE_PB = ElapsedMilliseconds() - TA_PB
Display$ = "CPU: " + Res_CPU$ + " Time: " + Str(TE_CPU) + "ms" + #LFCR$ + "PB: " + Res_PB$ + " Time: " + Str(TE_PB) + "ms"
;SetClipboardText(Display$)
;CPU: 46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748 Time: 238ms
;PB: 46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748 Time: 2281ms
MessageRequester("SHA256-CPU-Test Unicode", Display$)
End
Procedure.s CPU_SHA256(pSource.q)
;Save registers in programs
!mov r15,[p.v_pSource] ;Pointer to string
!lea rax,[Varis] ;For variables etc.
;Set Align 16 for variables
!mov rdx,rax
!and rdx,0fh
!add rax,16
!sub rax,rdx
!mov r8,rax ;Last_Chunk
!mov r10,r8
!add r10,256 ;Chunks
!mov r11,r10
!add r11,8 ;Chunk_Rest
!mov r12,r11
!add r12,8 ;Size
;Len(String)
!mov rdx,r15
!mov rax, -16
!pxor xmm1,xmm1
!@@:
!add rax,16
!pcmpistri xmm1,dqword[rdx+rax],00001001b ;Unicode Bit0=1 and Bit1=0 -> String-Chars are unsigned Words, Bit2=0 and Bit3=1 -> Test for equal each
!jnz @b
!shr rax,1 ;Unicode
!add rax,rcx
!mov [r12],rax
;Chunks = ((Size + 8) / 64) + 1
!mov rdx,rax
!add rdx,8
!shr rdx,6
!add rdx,1
!mov [r10],rdx ;Chunks
;Chunk_Rest = Size %64
!mov rcx,64
!xor rdx,rdx
!div rcx
!mov [r11],rdx
!mov rax,[r12] ;Size
!shl rax,3 ;Bits Size
!bswap rax ;To Big Endian
!movq xmm0,rax ;Unicode
!pxor xmm1,xmm1
!punpcklbw xmm0,xmm1 ;"Blow-Up" to Unicode
!cmp qword[r10],1
!je .Only_1Chunk
;Copy String-Part
!mov rdi,r8
!mov rax,r15
!cmp qword[r12],64
!jbe @f
!mov rdx,[r10]
!sub rdx,2
!shl rdx,7
!add rax,rdx
!@@:
!cld
!mov rsi,rax
!mov rcx,[r11]
!cmp rcx,56
!jae @f
!add rcx,64
!@@:
!rep movsw
!mov rax,r8
!movdqu [rax+120*2],xmm0 ;Unicode
!jmp @f
!.Only_1Chunk:
;Copy String
!mov rdi,r8
!mov rsi,r15
!mov rcx,[r11]
!rep movsw
!mov rax,r8
!movdqu [rax+56*2],xmm0 ;Unicode
!@@:
!mov word[rdi],80h ;Set Bit Unicode
!cmp qword[r10],3 ;Chunks
!jae @f
!mov r15,r8 ;pSource=Last_Chunk
!@@:
;Start_Values
!movdqu xmm1,dqword[STATE0]
!movdqu xmm2,dqword[STATE1]
!pshufd xmm1,xmm1,0b1h ;CDAB
!pshufd xmm2,xmm2,1bh ;EFGH
!movdqa xmm7,xmm1
!palignr xmm1,xmm2,8 ;ABEF
!pblendw xmm2,xmm7,0f0h ;CDGH
!movdqa xmm8,dqword[PSHUFFLE_BYTE_FLIP_MASK]
!lea rax,[K256]
!mov rsi,r15
!mov rcx,[r10]
!.Lloop0:
;Save hash values for addition after rounds
!movdqa xmm9,xmm1 ;Save ABEF
!movdqa xmm10,xmm2 ;Save CDGH
;Rounds 0-3
!movdqu xmm0,[rsi] ;String is Unicode, we need ASCII
!movdqu xmm11,[rsi+16]
!packuswb xmm0,xmm11 ;ASCII
!pshufb xmm0,xmm8
!movdqa xmm3,xmm0
!paddd xmm0,[rax]
!sha256rnds2 xmm2,xmm1
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
;Rounds 4-7
!movdqu xmm0,[rsi+32] ;Unicode
!movdqu xmm11,[rsi+48]
!packuswb xmm0,xmm11
!pshufb xmm0,xmm8
!movdqa xmm4,xmm0
!paddd xmm0,[rax+16]
!sha256rnds2 xmm2,xmm1
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm3,xmm4
;Rounds 8-11
!movdqu xmm0,[rsi+64] ;Unicode
!movdqu xmm11,[rsi+80]
!packuswb xmm0,xmm11
!pshufb xmm0,xmm8
!movdqa xmm5,xmm0
!paddd xmm0,[rax+32]
!sha256rnds2 xmm2,xmm1
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm4,xmm5
;Rounds 12-15
!movdqu xmm0,[rsi+96] ;Unicode
!movdqu xmm11,[rsi+112]
!packuswb xmm0,xmm11
!pshufb xmm0,xmm8
!movdqa xmm6,xmm0
!paddd xmm0,[rax+48]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm6
!palignr xmm7,xmm5,4
!paddd xmm3,xmm7
!sha256msg2 xmm3,xmm6
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm5,xmm6
;Rounds 16-19
!movdqa xmm0,xmm3
!paddd xmm0,[rax+64]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm3
!palignr xmm7,xmm6,4
!paddd xmm4,xmm7
!sha256msg2 xmm4,xmm3
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm6,xmm3
;Rounds 20-23
!movdqa xmm0,xmm4
!paddd xmm0,[rax+80]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm4
!palignr xmm7,xmm3,4
!paddd xmm5,xmm7
!sha256msg2 xmm5,xmm4
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm3,xmm4
;Rounds 24-27
!movdqa xmm0,xmm5
!paddd xmm0,[rax+96]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm5
!palignr xmm7,xmm4,4
!paddd xmm6,xmm7
!sha256msg2 xmm6,xmm5
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm4,xmm5
;Rounds 28-31
!movdqa xmm0,xmm6
!paddd xmm0,[rax+112]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm6
!palignr xmm7,xmm5,4
!paddd xmm3,xmm7
!sha256msg2 xmm3,xmm6
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm5,xmm6
;Rounds 32-35
!movdqa xmm0,xmm3
!paddd xmm0,[rax+128]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm3
!palignr xmm7,xmm6,4
!paddd xmm4,xmm7
!sha256msg2 xmm4,xmm3
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm6,xmm3
;Rounds 36-39
!movdqa xmm0,xmm4
!paddd xmm0,[rax+144]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm4
!palignr xmm7,xmm3,4
!paddd xmm5,xmm7
!sha256msg2 xmm5,xmm4
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm3,xmm4
;Rounds 40-43
!movdqa xmm0,xmm5
!paddd xmm0,[rax+160]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm5
!palignr xmm7,xmm4,4
!paddd xmm6,xmm7
!sha256msg2 xmm6,xmm5
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm4,xmm5
;Rounds 44-47
!movdqa xmm0,xmm6
!paddd xmm0,[rax+176]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm6
!palignr xmm7,xmm5,4
!paddd xmm3,xmm7
!sha256msg2 xmm3,xmm6
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm5,xmm6
;Rounds 48-51
!movdqa xmm0,xmm3
!paddd xmm0,[rax+192]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm3
!palignr xmm7,xmm6,4
!paddd xmm4,xmm7
!sha256msg2 xmm4,xmm3
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
!sha256msg1 xmm6,xmm3
;Rounds 52-55
!movdqa xmm0,xmm4
!paddd xmm0,[rax+208]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm4
!palignr xmm7,xmm3,4
!paddd xmm5,xmm7
!sha256msg2 xmm5,xmm4
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
;Rounds 56-59
!movdqa xmm0,xmm5
!paddd xmm0,[rax+224]
!sha256rnds2 xmm2,xmm1
!movdqa xmm7,xmm5
!palignr xmm7,xmm4,4
!paddd xmm6,xmm7
!sha256msg2 xmm6,xmm5
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
;Rounds 60-63
!movdqa xmm0,xmm6
!paddd xmm0,[rax+240]
!sha256rnds2 xmm2,xmm1
!pshufd xmm0,xmm0,0eh
!sha256rnds2 xmm1,xmm2
;Add current hash values with previously saved
!paddd xmm1,xmm9
!paddd xmm2,xmm10
;Increment Data pointer and loop if more to process
!dec rcx ;Chunks
!jz @f
!add rsi,128 ;Unicode
!cmp rcx,2
!jne .Lloop0
!cmp qword[r10],3
!jb .Lloop0 ;rsi ist schon Last_Chunk
!mov rsi,r8 ;Change Source
!jmp .Lloop0
!@@:
;Write hash values back in the correct order
!pshufd xmm1,xmm1,1bh
!pshufd xmm2,xmm2,0b1h
!movdqa xmm7,xmm1
!pblendw xmm1,xmm2,0f0h
!palignr xmm2,xmm7,8
!lea rdi,[Varis]
!movdqu [rdi],xmm1
!movdqu [rdi+16],xmm2
For i = ?Varis To ?Varis + 28 Step 4
Res$ + RSet(Hex(PeekL(i) & $FFFFFFFF), 8, "0")
Next
;Restore registers in programs
ProcedureReturn Res$
!Align 16
;Constants, old known values
;The first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19, Big-Endian!
;$6a09e667, $bb67ae85, $3c6ef372, $a54ff53a, $510e527f, $9b05688c, $1f83d9ab, $5be0cd19
!STATE0 dq 0bb67ae856a09e667h,0a54ff53a3c6ef372h
!STATE1 dq 9b05688c510e527fh,5be0cd191f83d9abh
!PSHUFFLE_BYTE_FLIP_MASK dq 0405060700010203h,0c0d0e0f08090a0bh
!K256:
!dd 428a2f98h,71374491h,0b5c0fbcfh,0e9b5dba5h
!dd 3956c25bh,59f111f1h,923f82a4h,0ab1c5ed5h
!dd 0d807aa98h,12835b01h,243185beh,550c7dc3h
!dd 72be5d74h,80deb1feh,9bdc06a7h,0c19bf174h
!dd 0e49b69c1h,0efbe4786h,0fc19dc6h,240ca1cch
!dd 2de92c6fh,4a7484aah,5cb0a9dch,76f988dah
!dd 983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
!dd 0c6e00bf3h,0d5a79147h,6ca6351h,14292967h
!dd 27b70a85h,2e1b2138h,4d2c6dfch,53380d13h
!dd 650a7354h,766a0abbh,81c2c92eh,92722c85h
!dd 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
!dd 0d192e819h,0d6990624h,0f40e3585h,106aa070h
!dd 19a4c116h,1e376c08h,2748774ch,34b0bcb5h
!dd 391c0cb3h,4ed8aa4ah,5b9cca4fh,682e6ff3h
!dd 748f82eeh,78a5636fh,84c87814h,8cc70208h
!dd 90befffah,0a4506cebh,0bef9a3f7h,0c67178f2h
DataSection
;Variables
Varis: ;For Res_CPU$
!Varis:
!times 512 db 0
EndDataSection
EndProcedure
CPU: 46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748 Time: 238ms
PB: 46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748 Time: 2281ms
Have fun!
Helle