here is a timing demo. If i did it right, it is a search in a 512 Char = 1kB String
It seems to be to fast for me. I hope there is no bug!
Code: Select all
EnableExplicit
Procedure.i SSE_FindStr(*String, *StringToFind)
; Attention: This function is in beta state!
; ============================================================================
; NAME: SSE_FindStr
; DESC: Try to find StringToFind in String with SSE operation (PCmpIStrI)
; DESC: Search for the needle in the haystack
; DESC: This Function is for 2Byte Character Strings only
; VAR(*String): Pointer to String (Haystack)
; VAR(*StringToFind): Pointer to StringToFind (Needle)
; RET.i: If found: The startposition in Characters [1..n]. Otherwise 0
; ============================================================================    
      
  ;DisableDebugger
  
  ; TODO! Solve the 16Byte align problem
  
  CompilerIf #PB_Compiler_Backend = #PB_Backend_Asm
    
    CompilerIf #PB_Compiler_64Bit 
      Protected memRAX, memRDX
      ; Returns a pointer To the first occurrence of str2 in str1, Or a null pointer If str2 is Not part of str1. 
      ; The matching process does not include the terminating null-characters, but it stops there
      ; RAX = haystack (Heuhaufen), RDX = needle (Nadel)
      
      ; XMM0 XMM1 XMM2 XMM3 XMM4
      ; XMM1 = [String1] : XMM2=[String2]
      
      !MOV RAX, [p.p_String]        ; haystack
      !MOV RDX, [p.p_StringToFind]  ; needle
      !MOVDQU XMM2, DQWORD[RDX] ; load the first 16 bytes of neddle (String to find)
  
     	!SUB RAX, 16		; Avoid extra jump in main loop
         
      ; ----------------------------------------------------------------------
      ; Find the first possible match of 16-byte fragment in haystack
      ; ----------------------------------------------------------------------
      !FindStr_MainLoop:
        !ADD RAX, 16      ; Step up Counter
        !MOVDQU XMM1, DQWORD[RAX]
       ;!PCMPISTRI XMM2, XMM1, 1100b ; EQUAL_ORDERED ; for ASCII Strings
        !PCMPISTRI XMM2, XMM1, 1101b ; EQUAL_ORDERED + UNSIGNED_WORDS; 11001b
        ; now RCX contains the offset in WORDS where a match was found
       	; Loop while ZF=0 and CF=0:
      	;	1) We find a null in s1(RAX) ZF=1
        ;	2) We find a char that does not match CF=1 
      !JA FindStr_MainLoop
      ; Jump if CF=0, we found only matching chars  
      !JNC FindStr_StrNotFound
      
      ; possible match found at WordOffset in RCX
      !ADD RCX, RCX ; Word to Byte
      !ADD RAX, RCX ; save the possible match start
              
      !MOV [p.v_memRDX], RDX ; mov edi, edx; save RDX
      !MOV [p.v_memRAX], RAX ; mov esi, eax; save RAX
      
      ; ----------------------------------------------------------------------
      ; Compare String, at possible match postion in haystack, with needle
      ; ----------------------------------------------------------------------
      !SUB RDX, RAX
      !SUB RAX, 16  ; counter
      
      !PXOR XMM3, XMM3          ; XMM3 = 0
      
      ; compare the strings
      !FindStr_Compare:
        !ADD RAX, 16  ; Counter
        !MOVDQU XMM1, DQWORD[RAX+RDX] ; Haystack          
        ; mask out invalid bytes in the haystack
       ;!PCMPISTRM XMM3, XMM1, 1011000b   ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK  ; for ASCII Strings
        !PCMPISTRM XMM3, XMM1, 1011001b   ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK + UNSIGNED_WORDS
        ; PCMPISTRM writes as result a Mask To XMM0, we used BYTE_MASK
        !MOVDQU XMM4, DQWORD[RAX] ; haystack  
        !PAND XMM4, XMM0
        
       ;!PCMPISTRI XMM1, XMM4, 0011000b ; EQUAL_EACH + NEGATIVE_POLARITY ; for ASCII Strings
        !PCMPISTRI XMM1, XMM4, 0011001b ; EQUAL_EACH + NEGATIVE_POLARITY + UNSIGNED_WORDS
       	; Loop while ZF=0 and CF=0:
      	;	1) We find a null in s1(RDX+RCX) ZF=1      {JA CF=0 & ZF=0} {JE : ZF=1)
        ;	2) We find a char that does not match CF=1 {JC, JNC}
        ; 3) We find a null in s2 SF=1               {JS, JNS}
        ;!JS FindStr_StrNotFound 
      !JA FindStr_Compare ; CF=0 AND ZF=0
      
      !MOV RDX, [p.v_memRDX]
      !MOV RAX, [p.v_memRAX]
      !JNC FindStr_StrFound
      
      ;!SUB RAX, 15  ; for ASCII Strings
      !SUB RAX, 14
      !JMP FindStr_MainLoop
      
      !FindStr_StrNotFound:
        !XOR RAX, RAX
        !JMP FindStr_End
        
      !FindStr_StrFound:
        ; because RAX contains the Pointer we have to calculate the Char-No.
        !SUB RAX, [p.p_String]    ; Sub the Haystack Start-Pointer
        !SHR RAX, 1  ; Byte to Word: not needed for ASCII Strings
        !ADD RAX, 1  ; Add 1 to start with 1 as first Char-No.
      !FindStr_End:
      ProcedureReturn  ; !RAX
    CompilerElse  ; #PB_Compiler_32Bit
      
      Protected memEAX, memEDX
      ; Returns a pointer To the first occurrence of str2 in str1, Or a null pointer If str2 is Not part of str1. 
      ; The matching process does not include the terminating null-characters, but it stops there
      ; RAX = haystack (Heuhaufen), EDX = needle (Nadel)
      
      ; XMM0 XMM1 XMM2 XMM3 XMM4
      ; XMM1 = [String1] : XMM2=[String2]
      
      !MOV EAX, [p.p_String]        ; haystack
      !MOV EDX, [p.p_StringToFind]  ; needle
      !MOVDQU XMM2, DQWORD[EDX] ; load the first 16 bytes of neddle (String to find)
  
     	!SUB EAX, 16		; Avoid extra jump in main loop
         
      ; ----------------------------------------------------------------------
      ; Find the first possible match of 16-byte fragment in haystack
      ; ----------------------------------------------------------------------
      !FindStr_MainLoop:
        !ADD EAX, 16      ; Step up Counter
        !MOVDQU XMM1, DQWORD[EAX]
       ;!PCMPISTRI XMM2, XMM1, 1100b ; EQUAL_ORDERED ; for ASCII Strings
        !PCMPISTRI XMM2, XMM1, 1101b ; EQUAL_ORDERED + UNSIGNED_WORDS; 11001b
        ; now RCX contains the offset in WORDS where a match was found
       	; Loop while ZF=0 and CF=0:
      	;	1) We find a null in s1(EAX) ZF=1
        ;	2) We find a char that does not match CF=1 
      !JA FindStr_MainLoop
      ; Jump if CF=0, we found only matching chars  
      !JNC FindStr_StrNotFound
      
      ; possible match found at WordOffset in ECX
      !ADD ECX, ECX ; Word to Byte
      !ADD EAX, ECX ; save the possible match start
              
      !MOV [p.v_memEDX], EDX ; mov edi, edx; save EDX
      !MOV [p.v_memEAX], EAX ; mov esi, eax; save EAX
      
      ; ----------------------------------------------------------------------
      ; Compare String, at possible match postion in haystack, with needle
      ; ----------------------------------------------------------------------
      !SUB EDX, EAX
      !SUB EAX, 16  ; counter
      
      !PXOR XMM3, XMM3          ; XMM3 = 0
      
      ; compare the strings
      !FindStr_Compare:
        !ADD EAX, 16  ; Counter
        !MOVDQU XMM1, DQWORD[EAX+EDX] ; Haystack          
        ; mask out invalid bytes in the haystack
       ;!PCMPISTRM XMM3, XMM1, 1011000b   ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK  ; for ASCII Strings
        !PCMPISTRM XMM3, XMM1, 1011001b   ; EQUAL_EACH + NEGATIVE_POLARITY + BYTE_MASK + UNSIGNED_WORDS
        ; PCMPISTRM writes as result a Mask To XMM0, we used BYTE_MASK
        !MOVDQU XMM4, DQWORD[EAX] ; haystack  
        !PAND XMM4, XMM0
        
       ;!PCMPISTRI XMM1, XMM4, 0011000b ; EQUAL_EACH + NEGATIVE_POLARITY ; for ASCII Strings
        !PCMPISTRI XMM1, XMM4, 0011001b ; EQUAL_EACH + NEGATIVE_POLARITY + UNSIGNED_WORDS
       	; Loop while ZF=0 and CF=0:
      	;	1) We find a null in s1(EDX+ECX) ZF=1      {JA CF=0 & ZF=0} {JE : ZF=1)
        ;	2) We find a char that does not match CF=1 {JC, JNC}
        ; 3) We find a null in s2 SF=1               {JS, JNS}
        ;!JS FindStr_StrNotFound 
      !JA FindStr_Compare ; CF=0 AND ZF=0
      
      !MOV EDX, [p.v_memEDX]
      !MOV EAX, [p.v_memEAX]
      !JNC FindStr_StrFound
      
      ;!SUB EAX, 15  ; for ASCII Strings
      !SUB EAX, 14
      !JMP FindStr_MainLoop
      
      !FindStr_StrNotFound:
        !XOR EAX, EAX
        !JMP FindStr_End
        
      !FindStr_StrFound:
        ; because EAX contains the Pointer we have to calculate the Char-No.
        !SUB EAX, [p.p_String]    ; Sub the Haystack Start-Pointer
        !SHR EAX, 1  ; Byte to Word: not needed for ASCII Strings
        !ADD EAX, 1  ; Add 1 to start with 1 as first Char-No.
      !FindStr_End:
     
      ProcedureReturn 
    CompilerEndIf  ; #PB_Compiler_32Bit
    
  CompilerElse    ; C-Backend
    
    ; for now use PB FindString. So it will work on other Platforms too.
    ; maybe provide a C optimized version in the future
    Protected *pStr.String = *String
    Protected *pStrToFind.String = *StringToFind
    
    ProcedureReturn FindString(*pStr\s, *pStrToFind\s)    
  CompilerEndIf    
  
EndProcedure
Define BigSize = 512 ; 512 Char = 1KB
Define Big$=Space(BigSize) 
Define Search$ = "search"
Define LB = Len(Search$)*2
Debug "Len(Big$)= " + Str (Len(Big$))
CopyMemory(@Search$, @Big$ + BigSize-LB-10 , LB)
Debug Search$
Debug "---- Big ----"
Debug Right(Big$, 50)
Debug "-------------"
Define pos
pos = SSE_FindStr(@Big$, @Search$)
Debug "found " + Search$ + " at Character " + pos
; do the timing test only without Debugger
CompilerIf Not #PB_Compiler_Debugger
  #Loops = (1024*1024) * 30  ; 1024*1024 => 1 GB * 30 => 30GB to search
  
  Define I, t1
  
  t1 = ElapsedMilliseconds()
  For I = 1 To #Loops
    pos = SSE_FindStr(@Big$, @Search$)
  Next
  t1 = ElapsedMilliseconds() - t1
  
  Define Msg$
  
  Msg$ = Str(t1) + " " + "ms"
  
  MessageRequester("Timing", Msg$)
CompilerEndIf