here is a timing demo. If i did it right, it is a search in a 512 Char = 1kB String
It seems to be to fast for me. I hope there is no bug!
Code: Select all
EnableExplicit
Procedure.i SSE_FindStr(*String, *StringToFind)
; Attention: This function is in beta state!
; ============================================================================
; NAME: SSE_FindStr
; DESC: Try to find StringToFind in String with SSE operation (PCmpIStrI)
; DESC: Search for the needle in the haystack
; DESC: This Function is for 2Byte Character Strings only
; VAR(*String): Pointer to String (Haystack)
; VAR(*StringToFind): Pointer to StringToFind (Needle)
; RET.i: If found: The startposition in Characters [1..n]. Otherwise 0
; ============================================================================
;DisableDebugger
; TODO! Solve the 16Byte align problem
CompilerIf #PB_Compiler_Backend = #PB_Backend_Asm
CompilerIf #PB_Compiler_64Bit
Protected memRAX, memRDX
; Returns a pointer To the first occurrence of str2 in str1, Or a null pointer If str2 is Not part of str1.
; The matching process does not include the terminating null-characters, but it stops there
; RAX = haystack (Heuhaufen), RDX = needle (Nadel)
; XMM0 XMM1 XMM2 XMM3 XMM4
; XMM1 = [String1] : XMM2=[String2]
!MOV RAX, [p.p_String] ; haystack
!MOV RDX, [p.p_StringToFind] ; needle
!MOVDQU XMM2, DQWORD[RDX] ; load the first 16 bytes of neddle (String to find)
!SUB RAX, 16 ; Avoid extra jump in main loop
; ----------------------------------------------------------------------
; Find the first possible match of 16-byte fragment in haystack
; ----------------------------------------------------------------------
!FindStr_MainLoop:
!ADD RAX, 16 ; Step up Counter
!MOVDQU XMM1, DQWORD[RAX]
;!PCMPISTRI XMM2, XMM1, 1100b ; EQUAL_ORDERED ; for ASCII Strings
!PCMPISTRI XMM2, XMM1, 1101b ; EQUAL_ORDERED + UNSIGNED_WORDS; 11001b
; now RCX contains the offset in WORDS where a match was found
; Loop while ZF=0 and CF=0:
; 1) We find a null in s1(RAX) ZF=1
; 2) We find a char that does not match CF=1
!JA FindStr_MainLoop
; Jump if CF=0, we found only matching chars
!JNC FindStr_StrNotFound
; possible match found at WordOffset in RCX
!ADD RCX, RCX ; Word to Byte
!ADD RAX, RCX ; save the possible match start
!MOV [p.v_memRDX], RDX ; mov edi, edx; save RDX
!MOV [p.v_memRAX], RAX ; mov esi, eax; save RAX
; ----------------------------------------------------------------------
; Compare String, at possible match postion in haystack, with needle
; ----------------------------------------------------------------------
!SUB RDX, RAX
!SUB RAX, 16 ; counter
!PXOR XMM3, XMM3 ; XMM3 = 0
; compare the strings
!FindStr_Compare:
!ADD RAX, 16 ; Counter
!MOVDQU XMM1, DQWORD[RAX+RDX] ; Haystack
; mask out invalid bytes in the haystack
;!PCMPISTRM XMM3, XMM1, 1011000b ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK ; for ASCII Strings
!PCMPISTRM XMM3, XMM1, 1011001b ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK + UNSIGNED_WORDS
; PCMPISTRM writes as result a Mask To XMM0, we used BYTE_MASK
!MOVDQU XMM4, DQWORD[RAX] ; haystack
!PAND XMM4, XMM0
;!PCMPISTRI XMM1, XMM4, 0011000b ; EQUAL_EACH + NEGATIVE_POLARITY ; for ASCII Strings
!PCMPISTRI XMM1, XMM4, 0011001b ; EQUAL_EACH + NEGATIVE_POLARITY + UNSIGNED_WORDS
; Loop while ZF=0 and CF=0:
; 1) We find a null in s1(RDX+RCX) ZF=1 {JA CF=0 & ZF=0} {JE : ZF=1)
; 2) We find a char that does not match CF=1 {JC, JNC}
; 3) We find a null in s2 SF=1 {JS, JNS}
;!JS FindStr_StrNotFound
!JA FindStr_Compare ; CF=0 AND ZF=0
!MOV RDX, [p.v_memRDX]
!MOV RAX, [p.v_memRAX]
!JNC FindStr_StrFound
;!SUB RAX, 15 ; for ASCII Strings
!SUB RAX, 14
!JMP FindStr_MainLoop
!FindStr_StrNotFound:
!XOR RAX, RAX
!JMP FindStr_End
!FindStr_StrFound:
; because RAX contains the Pointer we have to calculate the Char-No.
!SUB RAX, [p.p_String] ; Sub the Haystack Start-Pointer
!SHR RAX, 1 ; Byte to Word: not needed for ASCII Strings
!ADD RAX, 1 ; Add 1 to start with 1 as first Char-No.
!FindStr_End:
ProcedureReturn ; !RAX
CompilerElse ; #PB_Compiler_32Bit
Protected memEAX, memEDX
; Returns a pointer To the first occurrence of str2 in str1, Or a null pointer If str2 is Not part of str1.
; The matching process does not include the terminating null-characters, but it stops there
; RAX = haystack (Heuhaufen), EDX = needle (Nadel)
; XMM0 XMM1 XMM2 XMM3 XMM4
; XMM1 = [String1] : XMM2=[String2]
!MOV EAX, [p.p_String] ; haystack
!MOV EDX, [p.p_StringToFind] ; needle
!MOVDQU XMM2, DQWORD[EDX] ; load the first 16 bytes of neddle (String to find)
!SUB EAX, 16 ; Avoid extra jump in main loop
; ----------------------------------------------------------------------
; Find the first possible match of 16-byte fragment in haystack
; ----------------------------------------------------------------------
!FindStr_MainLoop:
!ADD EAX, 16 ; Step up Counter
!MOVDQU XMM1, DQWORD[EAX]
;!PCMPISTRI XMM2, XMM1, 1100b ; EQUAL_ORDERED ; for ASCII Strings
!PCMPISTRI XMM2, XMM1, 1101b ; EQUAL_ORDERED + UNSIGNED_WORDS; 11001b
; now RCX contains the offset in WORDS where a match was found
; Loop while ZF=0 and CF=0:
; 1) We find a null in s1(EAX) ZF=1
; 2) We find a char that does not match CF=1
!JA FindStr_MainLoop
; Jump if CF=0, we found only matching chars
!JNC FindStr_StrNotFound
; possible match found at WordOffset in ECX
!ADD ECX, ECX ; Word to Byte
!ADD EAX, ECX ; save the possible match start
!MOV [p.v_memEDX], EDX ; mov edi, edx; save EDX
!MOV [p.v_memEAX], EAX ; mov esi, eax; save EAX
; ----------------------------------------------------------------------
; Compare String, at possible match postion in haystack, with needle
; ----------------------------------------------------------------------
!SUB EDX, EAX
!SUB EAX, 16 ; counter
!PXOR XMM3, XMM3 ; XMM3 = 0
; compare the strings
!FindStr_Compare:
!ADD EAX, 16 ; Counter
!MOVDQU XMM1, DQWORD[EAX+EDX] ; Haystack
; mask out invalid bytes in the haystack
;!PCMPISTRM XMM3, XMM1, 1011000b ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK ; for ASCII Strings
!PCMPISTRM XMM3, XMM1, 1011001b ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK + UNSIGNED_WORDS
; PCMPISTRM writes as result a Mask To XMM0, we used BYTE_MASK
!MOVDQU XMM4, DQWORD[EAX] ; haystack
!PAND XMM4, XMM0
;!PCMPISTRI XMM1, XMM4, 0011000b ; EQUAL_EACH + NEGATIVE_POLARITY ; for ASCII Strings
!PCMPISTRI XMM1, XMM4, 0011001b ; EQUAL_EACH + NEGATIVE_POLARITY + UNSIGNED_WORDS
; Loop while ZF=0 and CF=0:
; 1) We find a null in s1(EDX+ECX) ZF=1 {JA CF=0 & ZF=0} {JE : ZF=1)
; 2) We find a char that does not match CF=1 {JC, JNC}
; 3) We find a null in s2 SF=1 {JS, JNS}
;!JS FindStr_StrNotFound
!JA FindStr_Compare ; CF=0 AND ZF=0
!MOV EDX, [p.v_memEDX]
!MOV EAX, [p.v_memEAX]
!JNC FindStr_StrFound
;!SUB EAX, 15 ; for ASCII Strings
!SUB EAX, 14
!JMP FindStr_MainLoop
!FindStr_StrNotFound:
!XOR EAX, EAX
!JMP FindStr_End
!FindStr_StrFound:
; because EAX contains the Pointer we have to calculate the Char-No.
!SUB EAX, [p.p_String] ; Sub the Haystack Start-Pointer
!SHR EAX, 1 ; Byte to Word: not needed for ASCII Strings
!ADD EAX, 1 ; Add 1 to start with 1 as first Char-No.
!FindStr_End:
ProcedureReturn
CompilerEndIf ; #PB_Compiler_32Bit
CompilerElse ; C-Backend
; for now use PB FindString. So it will work on other Platforms too.
; maybe provide a C optimized version in the future
Protected *pStr.String = *String
Protected *pStrToFind.String = *StringToFind
ProcedureReturn FindString(*pStr\s, *pStrToFind\s)
CompilerEndIf
EndProcedure
Define BigSize = 512 ; 512 Char = 1KB
Define Big$=Space(BigSize)
Define Search$ = "search"
Define LB = Len(Search$)*2
Debug "Len(Big$)= " + Str (Len(Big$))
CopyMemory(@Search$, @Big$ + BigSize-LB-10 , LB)
Debug Search$
Debug "---- Big ----"
Debug Right(Big$, 50)
Debug "-------------"
Define pos
pos = SSE_FindStr(@Big$, @Search$)
Debug "found " + Search$ + " at Character " + pos
; do the timing test only without Debugger
CompilerIf Not #PB_Compiler_Debugger
#Loops = (1024*1024) * 30 ; 1024*1024 => 1 GB * 30 => 30GB to search
Define I, t1
t1 = ElapsedMilliseconds()
For I = 1 To #Loops
pos = SSE_FindStr(@Big$, @Search$)
Next
t1 = ElapsedMilliseconds() - t1
Define Msg$
Msg$ = Str(t1) + " " + "ms"
MessageRequester("Timing", Msg$)
CompilerEndIf