It is currently Sat Dec 16, 2017 7:29 pm

All times are UTC + 1 hour




Post new topic Reply to topic  [ 6 posts ] 
Author Message
 Post subject: SHA256 with CPU-SHA-Instruction-Set
PostPosted: Mon Dec 04, 2017 10:27 pm 
Offline
Enthusiast
Enthusiast
User avatar

Joined: Wed Apr 12, 2006 7:59 pm
Posts: 164
Location: Germany
For this I found
https://groups.google.com/d/topic/fa.linux.kernel/jUelRfl5hx4
This is a translation and SHA256-Test (SHA1 is out) with PureBasic:
Code:
;SHA256-CPU-Instructions for Windows 64-Bit and Unicode-Test-String
;Tested with PB 5.61 (x64) Unicode and PB 5.45 LTS (x64) Unicode, CPU AMD Ryzen 7 1800X
;"Helle" Klaus Helbing, 04.12.2017
;Based on https://groups.google.com/d/topic/fa.linux.kernel/jUelRfl5hx4
;From this:
;This file is provided under a dual BSD/GPLv2 license.  When using or
;redistributing this file, you may do so under either license.
;
;GPL LICENSE SUMMARY
;
;Copyright(c) 2015 Intel Corporation.
;
;This program is free software; you can redistribute it and/or modify
;it under the terms of version 2 of the GNU General Public License as
;published by the Free Software Foundation.
;
;This program is distributed in the hope that it will be useful, but
;WITHOUT ANY WARRANTY; without even the implied warranty of
;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;General Public License for more details.
;
;BSD LICENSE
;
;Copyright(c) 2015 Intel Corporation.
;
;Redistribution and use in source and binary forms, with or without
;modification, are permitted provided that the following conditions
;are met:
;
;         * Redistributions of source code must retain the above copyright
;           notice, this list of conditions and the following disclaimer.
;         * Redistributions in binary form must reproduce the above copyright
;           notice, this list of conditions and the following disclaimer in
;           the documentation and/or other materials provided with the
;           distribution.
;         * Neither the name of Intel Corporation nor the names of its
;           contributors may be used to endorse or promote products derived
;           from this software without specific prior written permission.
;
;THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
;LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
;DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
;THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
;OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;------------------------------------------------------------------------------

;Check for CPU-SHA-Instructions:
!mov eax,7
!xor ecx,ecx
!cpuid
!test ebx,20000000h          ;Bit29 SHA
!jnz IsSHA
MessageRequester("Ooops!", "No CPU-Support for SHA-Instructions!" + #CRLF$ + "End")
End

!IsSHA:
;Check for Unicode

Declare.s CPU_SHA256(pSource.q)

;Generate a long Test-String, Unicode for this test
Source$ = "The quick brown fox jumps over the lazy dog"    ;Or your own
For i = 1 To 23
  Source$ + Source$
Next

;Test with CPU-Instructions
TA_CPU = ElapsedMilliseconds()
  Res_CPU$ = CPU_SHA256(@Source$)                ;Pointer to string
TE_CPU = ElapsedMilliseconds() - TA_CPU

;Test with PB
UseSHA2Fingerprint()
TA_PB = ElapsedMilliseconds()
  Res_PB$ = UCase(StringFingerprint(Source$, #PB_Cipher_SHA2, 256))
TE_PB = ElapsedMilliseconds() - TA_PB

Display$ = "CPU: " + Res_CPU$ + "  Time: " + Str(TE_CPU) + "ms" + #LFCR$ + "PB:    " + Res_PB$ + "  Time: " + Str(TE_PB) + "ms"
;SetClipboardText(Display$)
;CPU: 46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748  Time: 238ms
;PB:  46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748  Time: 2281ms
MessageRequester("SHA256-CPU-Test Unicode", Display$)
End

Procedure.s CPU_SHA256(pSource.q)
;Save registers in programs
  !mov r15,[p.v_pSource]     ;Pointer to string
  !lea rax,[Varis]           ;For variables etc.

  ;Set Align 16 for variables
  !mov rdx,rax
  !and rdx,0fh
  !add rax,16
  !sub rax,rdx

  !mov r8,rax                ;Last_Chunk
  !mov r10,r8
  !add r10,256               ;Chunks
  !mov r11,r10
  !add r11,8                 ;Chunk_Rest
  !mov r12,r11
  !add r12,8                 ;Size

  ;Len(String)
  !mov rdx,r15
  !mov rax, -16
  !pxor xmm1,xmm1 
 !@@:
  !add rax,16
  !pcmpistri xmm1,dqword[rdx+rax],00001001b      ;Unicode  Bit0=1 and Bit1=0 -> String-Chars are unsigned Words, Bit2=0 and Bit3=1 -> Test for equal each
 !jnz @b
  !shr rax,1                                     ;Unicode
  !add rax,rcx
  !mov [r12],rax

  ;Chunks = ((Size + 8) / 64) + 1
  !mov rdx,rax
  !add rdx,8
  !shr rdx,6
  !add rdx,1
  !mov [r10],rdx             ;Chunks

  ;Chunk_Rest = Size %64
  !mov rcx,64
  !xor rdx,rdx
  !div rcx
  !mov [r11],rdx

  !mov rax,[r12]             ;Size
  !shl rax,3                 ;Bits Size
  !bswap rax                 ;To Big Endian
  !movq xmm0,rax             ;Unicode
  !pxor xmm1,xmm1
  !punpcklbw xmm0,xmm1       ;"Blow-Up" to Unicode

  !cmp qword[r10],1
 !je .Only_1Chunk

  ;Copy String-Part
  !mov rdi,r8
  !mov rax,r15
  !cmp qword[r12],64
 !jbe @f
  !mov rdx,[r10]
  !sub rdx,2
  !shl rdx,7
  !add rax,rdx
 !@@:
  !cld
  !mov rsi,rax
  !mov rcx,[r11]
  !cmp rcx,56
 !jae @f
  !add rcx,64
 !@@:
  !rep movsw

  !mov rax,r8
  !movdqu [rax+120*2],xmm0   ;Unicode

 !jmp @f
 !.Only_1Chunk:
  ;Copy String
  !mov rdi,r8
  !mov rsi,r15
  !mov rcx,[r11]
  !rep movsw
  !mov rax,r8

  !movdqu [rax+56*2],xmm0    ;Unicode
 !@@:
  !mov word[rdi],80h         ;Set Bit Unicode

  !cmp qword[r10],3          ;Chunks
 !jae @f
  !mov r15,r8                ;pSource=Last_Chunk

 !@@:
  ;Start_Values
  !movdqu xmm1,dqword[STATE0]
  !movdqu xmm2,dqword[STATE1]

  !pshufd xmm1,xmm1,0b1h     ;CDAB
  !pshufd xmm2,xmm2,1bh      ;EFGH
  !movdqa xmm7,xmm1
  !palignr xmm1,xmm2,8       ;ABEF
  !pblendw xmm2,xmm7,0f0h    ;CDGH

  !movdqa xmm8,dqword[PSHUFFLE_BYTE_FLIP_MASK]
  !lea rax,[K256]

  !mov rsi,r15
  !mov rcx,[r10]

 !.Lloop0:
  ;Save hash values for addition after rounds
  !movdqa xmm9,xmm1          ;Save ABEF
  !movdqa xmm10,xmm2         ;Save CDGH
  ;Rounds 0-3
  !movdqu xmm0,[rsi]         ;String is Unicode, we need ASCII
  !movdqu xmm11,[rsi+16]
  !packuswb xmm0,xmm11       ;ASCII

  !pshufb xmm0,xmm8
  !movdqa xmm3,xmm0
  !paddd xmm0,[rax]
  !sha256rnds2 xmm2,xmm1
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  ;Rounds 4-7
  !movdqu xmm0,[rsi+32]      ;Unicode
  !movdqu xmm11,[rsi+48]
  !packuswb xmm0,xmm11

  !pshufb xmm0,xmm8
  !movdqa xmm4,xmm0
  !paddd xmm0,[rax+16]
  !sha256rnds2 xmm2,xmm1
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm3,xmm4
  ;Rounds 8-11
  !movdqu xmm0,[rsi+64]      ;Unicode
  !movdqu xmm11,[rsi+80]
  !packuswb xmm0,xmm11

  !pshufb xmm0,xmm8
  !movdqa xmm5,xmm0
  !paddd xmm0,[rax+32]
  !sha256rnds2 xmm2,xmm1
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm4,xmm5
  ;Rounds 12-15
  !movdqu xmm0,[rsi+96]      ;Unicode
  !movdqu xmm11,[rsi+112]
  !packuswb xmm0,xmm11

  !pshufb xmm0,xmm8
  !movdqa xmm6,xmm0
  !paddd xmm0,[rax+48]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm6
  !palignr xmm7,xmm5,4
  !paddd xmm3,xmm7
  !sha256msg2 xmm3,xmm6
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm5,xmm6
  ;Rounds 16-19
  !movdqa xmm0,xmm3
  !paddd xmm0,[rax+64]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm3
  !palignr xmm7,xmm6,4
  !paddd xmm4,xmm7
  !sha256msg2 xmm4,xmm3
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm6,xmm3
  ;Rounds 20-23
  !movdqa xmm0,xmm4
  !paddd xmm0,[rax+80]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm4
  !palignr xmm7,xmm3,4
  !paddd xmm5,xmm7
  !sha256msg2 xmm5,xmm4
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm3,xmm4
  ;Rounds 24-27
  !movdqa xmm0,xmm5
  !paddd xmm0,[rax+96]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm5
  !palignr xmm7,xmm4,4
  !paddd xmm6,xmm7
  !sha256msg2 xmm6,xmm5
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm4,xmm5
  ;Rounds 28-31
  !movdqa xmm0,xmm6
  !paddd xmm0,[rax+112]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm6
  !palignr xmm7,xmm5,4
  !paddd xmm3,xmm7
  !sha256msg2 xmm3,xmm6
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm5,xmm6
  ;Rounds 32-35
  !movdqa xmm0,xmm3
  !paddd xmm0,[rax+128]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm3
  !palignr xmm7,xmm6,4
  !paddd xmm4,xmm7
  !sha256msg2 xmm4,xmm3
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm6,xmm3
  ;Rounds 36-39
  !movdqa xmm0,xmm4
  !paddd xmm0,[rax+144]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm4
  !palignr xmm7,xmm3,4
  !paddd xmm5,xmm7
  !sha256msg2 xmm5,xmm4
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm3,xmm4
  ;Rounds 40-43
  !movdqa xmm0,xmm5
  !paddd xmm0,[rax+160]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm5
  !palignr xmm7,xmm4,4
  !paddd xmm6,xmm7
  !sha256msg2 xmm6,xmm5
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm4,xmm5
  ;Rounds 44-47
  !movdqa xmm0,xmm6
  !paddd xmm0,[rax+176]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm6
  !palignr xmm7,xmm5,4
  !paddd xmm3,xmm7
  !sha256msg2 xmm3,xmm6
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm5,xmm6
  ;Rounds 48-51
  !movdqa xmm0,xmm3
  !paddd xmm0,[rax+192]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm3
  !palignr xmm7,xmm6,4
  !paddd xmm4,xmm7
  !sha256msg2 xmm4,xmm3
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  !sha256msg1 xmm6,xmm3
  ;Rounds 52-55
  !movdqa xmm0,xmm4
  !paddd xmm0,[rax+208]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm4
  !palignr xmm7,xmm3,4
  !paddd xmm5,xmm7
  !sha256msg2 xmm5,xmm4
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  ;Rounds 56-59
  !movdqa xmm0,xmm5
  !paddd xmm0,[rax+224]
  !sha256rnds2 xmm2,xmm1
  !movdqa xmm7,xmm5
  !palignr xmm7,xmm4,4
  !paddd xmm6,xmm7
  !sha256msg2 xmm6,xmm5
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  ;Rounds 60-63
  !movdqa xmm0,xmm6
  !paddd xmm0,[rax+240]
  !sha256rnds2 xmm2,xmm1
  !pshufd xmm0,xmm0,0eh
  !sha256rnds2 xmm1,xmm2
  ;Add current hash values with previously saved
  !paddd xmm1,xmm9
  !paddd xmm2,xmm10
  ;Increment Data pointer and loop if more to process
  !dec rcx                     ;Chunks
 !jz @f
  !add rsi,128                 ;Unicode
  !cmp rcx,2
 !jne .Lloop0
  !cmp qword[r10],3
 !jb .Lloop0                  ;rsi ist schon Last_Chunk
  !mov rsi,r8                ;Change Source
 !jmp .Lloop0
 !@@:
  ;Write hash values back in the correct order
  !pshufd xmm1,xmm1,1bh
  !pshufd xmm2,xmm2,0b1h
  !movdqa xmm7,xmm1
  !pblendw xmm1,xmm2,0f0h
  !palignr xmm2,xmm7,8

  !lea rdi,[Varis]
  !movdqu [rdi],xmm1
  !movdqu [rdi+16],xmm2

  For i = ?Varis To ?Varis + 28 Step 4
    Res$ + RSet(Hex(PeekL(i) & $FFFFFFFF), 8, "0")
  Next
;Restore registers in programs
 ProcedureReturn Res$

!Align 16
  ;Constants, old known values
  ;The first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19, Big-Endian!
  ;$6a09e667, $bb67ae85, $3c6ef372, $a54ff53a, $510e527f, $9b05688c, $1f83d9ab, $5be0cd19
  !STATE0 dq 0bb67ae856a09e667h,0a54ff53a3c6ef372h
  !STATE1 dq 9b05688c510e527fh,5be0cd191f83d9abh
  !PSHUFFLE_BYTE_FLIP_MASK dq 0405060700010203h,0c0d0e0f08090a0bh
  !K256:
  !dd 428a2f98h,71374491h,0b5c0fbcfh,0e9b5dba5h
  !dd 3956c25bh,59f111f1h,923f82a4h,0ab1c5ed5h
  !dd 0d807aa98h,12835b01h,243185beh,550c7dc3h
  !dd 72be5d74h,80deb1feh,9bdc06a7h,0c19bf174h
  !dd 0e49b69c1h,0efbe4786h,0fc19dc6h,240ca1cch
  !dd 2de92c6fh,4a7484aah,5cb0a9dch,76f988dah
  !dd 983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h
  !dd 0c6e00bf3h,0d5a79147h,6ca6351h,14292967h
  !dd 27b70a85h,2e1b2138h,4d2c6dfch,53380d13h
  !dd 650a7354h,766a0abbh,81c2c92eh,92722c85h
  !dd 0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h
  !dd 0d192e819h,0d6990624h,0f40e3585h,106aa070h
  !dd 19a4c116h,1e376c08h,2748774ch,34b0bcb5h
  !dd 391c0cb3h,4ed8aa4ah,5b9cca4fh,682e6ff3h
  !dd 748f82eeh,78a5636fh,84c87814h,8cc70208h
  !dd 90befffah,0a4506cebh,0bef9a3f7h,0c67178f2h

 DataSection
  ;Variables
  Varis:                     ;For Res_CPU$
  !Varis:
  !times 512 db 0
 EndDataSection
EndProcedure

Result for the Test-String:
CPU: 46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748 Time: 238ms
PB: 46FFC4A3DA4F60B940A0058764689FE79863F1C77C1A11230F1CF740666C5748 Time: 2281ms
Have fun!
Helle


Top
 Profile  
Reply with quote  
 Post subject: Re: SHA256 with CPU-SHA-Instruction-Set
PostPosted: Tue Dec 05, 2017 9:35 pm 
Offline
PureBasic Bullfrog
PureBasic Bullfrog
User avatar

Joined: Wed Jul 06, 2005 5:42 am
Posts: 7958
Location: Kelowna, BC, Canada
I get an illegal instruction error on line 309 :cry: (Intel i7)

_________________
Veni, vidi, vici.


Top
 Profile  
Reply with quote  
 Post subject: Re: SHA256 with CPU-SHA-Instruction-Set
PostPosted: Tue Dec 05, 2017 11:20 pm 
Offline
Enthusiast
Enthusiast

Joined: Mon Nov 03, 2008 9:56 pm
Posts: 507
You have to compile it with PB x64.


Top
 Profile  
Reply with quote  
 Post subject: Re: SHA256 with CPU-SHA-Instruction-Set
PostPosted: Wed Dec 06, 2017 6:10 am 
Offline
Addict
Addict

Joined: Fri Nov 09, 2012 11:04 pm
Posts: 1465
Location: Uttoxeter, UK
I am using an Intel i7 5960 with PureBasic x64 and get:
Debugger wrote:
PureBasic.asm [426]
sha256rnds2 xmm2,xmm1
error: illegal instruction.

If I switch off the Debugger, 'ere running, the bracketed number changes from 426 to 330.

_________________
DE AA EB


Top
 Profile  
Reply with quote  
 Post subject: Re: SHA256 with CPU-SHA-Instruction-Set
PostPosted: Wed Dec 06, 2017 9:59 am 
Offline
Enthusiast
Enthusiast
User avatar

Joined: Tue May 28, 2013 10:51 pm
Posts: 500
Location: Europe
Download latest FAsm.

https://flatassembler.net/download.php

Quote:
version 1.72 (Oct 10, 2017)

[+] Support for Intel AVX-512, SHA, CLFLUSHOPT, CLWB, PCOMMIT, ADX, RDSEED, SMAP and MPX instruction sets.

_________________
"If you lie to the compiler, it will get its revenge."
Henry Spencer
https://www.pci-z.com/


Top
 Profile  
Reply with quote  
 Post subject: Re: SHA256 with CPU-SHA-Instruction-Set
PostPosted: Wed Dec 06, 2017 11:07 am 
Offline
Enthusiast
Enthusiast
User avatar

Joined: Wed Apr 12, 2006 7:59 pm
Posts: 164
Location: Germany
Sorry, but if I install a new PB-version, first action is copy the latest FAsm-version into the PB Compilers-Directory.
SHA-support is new in FAsm-version 1.71.40 (Oct 19, 2015)! 2 years :D !


Top
 Profile  
Reply with quote  
Display posts from previous:  Sort by  
Post new topic Reply to topic  [ 6 posts ] 

All times are UTC + 1 hour


Who is online

Users browsing this forum: No registered users and 2 guests


You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum

Search for:
Jump to:  

 


Powered by phpBB © 2008 phpBB Group
subSilver+ theme by Canver Software, sponsor Sanal Modifiye