Code: Alles auswählen
;CRC32-Fingerprint mit PCLMULQDQ. Verwendet wird Polynomial Reversed ($EDB88320)
;Basierend auf: http://stuff.mit.edu/afs/sipb/contrib/linux/arch/x86/crypto/crc32-pclmul_asm.S
;Lizenz beachten!
;PureBasic 5.22 LTS (Windows - x64)
;Helle 13.04.2014
Global.l CRC32
Procedure.l CRC32_CL(Mem, Laenge)
;Test auf PCLMULQDQ
!MOV eax,1
!CPUID
!TEST ecx,2
!JNZ CL_OK ;kann losgehen!
CRC32Fingerprint(Mem, Laenge) ;doch wieder mit PB
ProcedureReturn
!CL_OK:
!MOV [v_CRC32],0 ;falls Laenge < 64
!MOV r9,[p.v_Laenge]
!AND r9,0FFFFFFFFFFFFFFC0h
!JZ less_64
!MOV eax,0FFFFFFFFh ;Initialisierungswert für CRC32
!MOVD xmm0,eax
!MOV r8,[p.v_Mem]
!MOVDQA xmm1,[r8]
!MOVDQA xmm2,[r8+10h]
!MOVDQA xmm3,[r8+20h]
!MOVDQA xmm4,[r8+30h]
!PXOR xmm1,xmm0
!SUB r9,40h
!ADD r8,40h
!MOVDQA xmm0,[Lconstant_R2R1]
!loop_64:
!PREFETCHNTA [r8+0c0h] ;Cache "vorfüllen"
!MOVDQA xmm5,xmm1
!MOVDQA xmm6,xmm2
!MOVDQA xmm7,xmm3
!MOVDQA xmm8,xmm4
!PCLMULQDQ xmm1,xmm0,00h
!PCLMULQDQ xmm2,xmm0,00h
!PCLMULQDQ xmm3,xmm0,00h
!PCLMULQDQ xmm4,xmm0,00h
!PCLMULQDQ xmm5,xmm0,11h
!PCLMULQDQ xmm6,xmm0,11h
!PCLMULQDQ xmm7,xmm0,11h
!PCLMULQDQ xmm8,xmm0,11h
!PXOR xmm1,xmm5
!PXOR xmm2,xmm6
!PXOR xmm3,xmm7
!PXOR xmm4,xmm8
!PXOR xmm1,[r8]
!PXOR xmm2,[r8+10h]
!PXOR xmm3,[r8+20h]
!PXOR xmm4,[r8+30h]
!SUB r9,40h
!ADD r8,40h
!CMP r9,40h
!JGE loop_64
!MOVDQA xmm0,[Lconstant_R4R3]
!PREFETCHNTA [r8]
!MOVDQA xmm5,xmm1
!PCLMULQDQ xmm1,xmm0,00h
!PCLMULQDQ xmm5,xmm0,11h
!PXOR xmm1,xmm5
!PXOR xmm1,xmm2
!MOVDQA xmm5,xmm1
!PCLMULQDQ xmm1,xmm0,00h
!PCLMULQDQ xmm5,xmm0,11h
!PXOR xmm1,xmm5
!PXOR xmm1,xmm3
!MOVDQA xmm5,xmm1
!PCLMULQDQ xmm1,xmm0,00h
!PCLMULQDQ xmm5,xmm0,11h
!PXOR xmm1,xmm5
!PXOR xmm1,xmm4
!PCLMULQDQ xmm0,xmm1,01h
!PSRLDQ xmm1,08h
!PXOR xmm1,xmm0
!MOVDQA xmm2,xmm1
!MOVDQA xmm0,[Lconstant_R5]
!MOVDQA xmm3,[Lconstant_mask32]
!PSRLDQ xmm2,04h
!PAND xmm1,xmm3
!PCLMULQDQ xmm1,xmm0,00h
!PXOR xmm1,xmm2
!MOVDQA xmm0,[Lconstant_RUpoly]
!MOVDQA xmm2,xmm1
!PAND xmm1,xmm3
!PCLMULQDQ xmm1,xmm0,10h
!PAND xmm1,xmm3
!PCLMULQDQ xmm1,xmm0,00h
!PXOR xmm1,xmm2
!PEXTRD eax,xmm1,01h
!NOT eax ;wegen Polynomial Reversed
!MOV [v_CRC32],eax
!less_64: ;Rest mit PB (max.63 Bytes)
CRC32Fingerprint(Mem + Laenge - (Laenge & $3F), Laenge & $3F, CRC32)
ProcedureReturn
;Konstanten
!Align 16
!Lconstant_R2R1:
!dq 0000000154442bd4h
!dq 00000001c6e41596h
!Lconstant_R4R3:
!dq 00000001751997d0h
!dq 00000000ccaa009eh
!Lconstant_R5:
!dq 0000000163cd6124h
!dq 0000000000000000h
!Lconstant_mask32:
!dq 00000000FFFFFFFFh
!dq 0000000000000000h
!Lconstant_RUpoly:
!dq 00000001DB710641h
!dq 00000001F7011641h
EndProcedure
;Test
A$ = "The quick brown fox jumps over the lazy dog."
LA = Len(A$)
Debug LA
Faktor = 9999999
Buffer = AllocateMemory(LA * (Faktor + 1) + 16)
If Buffer
BufferA = Buffer
If BufferA & $0F ;muss Alignment 16 sein!
BufferA = Buffer + 16 - (BufferA & $0F)
EndIf
For i = 0 To Faktor
PokeS(BufferA + (i * LA), A$)
Next
Length = LA * (Faktor + 1)
Time_PB_A = ElapsedMilliseconds()
CRC32 = CRC32Fingerprint(BufferA, Length) ;Achtung, bei > 4GB splitten!
Time_PB_E = ElapsedMilliseconds() - Time_PB_A
PB$ = "CRC32_PB = " + Hex(CRC32 & $FFFFFFFF) + " in " + Str(Time_PB_E) + " ms"
Time_CL_A = ElapsedMilliseconds()
CRC32 = CRC32_CL(BufferA, Length)
Time_CL_E = ElapsedMilliseconds() - Time_CL_A
CL$ = "CRC32_CL = " + Hex(CRC32 & $FFFFFFFF) + " in " + Str(Time_CL_E) + " ms"
FreeMemory(Buffer)
MessageRequester("CRC32 für " + Str(Length) + " Bytes", PB$ + #LFCR$ + CL$)
EndIf
PB: 797 ms
CL: 30 ms.
Viel Spaß!
Helle