It is currently Fri Nov 22, 2019 5:01 pm

All times are UTC + 1 hour




Post new topic Reply to topic  [ 19 posts ]  Go to page 1, 2  Next
Author Message
 Post subject: FindData module
PostPosted: Wed Jul 01, 2015 2:32 pm 
Offline
PureBasic Expert
PureBasic Expert

Joined: Sun Aug 08, 2004 5:21 am
Posts: 3520
Location: Netherlands
Code:
; FindData module by Wilbert

; last update July 28, 2015

DeclareModule FindData
 
  ; all routines return a zero based index or -1 (not found)
 
  Declare TS(*HayStack, HayStackSize, *Needle, NeedleSize.l)          ; Tailed Substring algorithm
  Declare QS(*HayStack, HayStackSize, *Needle, NeedleSize.l)          ; Quick Search algorithm
  Declare BM(*HayStack, HayStackSize, *Needle, NeedleSize.u)          ; Boyer-Moore algorithm
  Declare FastSearch(*HayStack, HayStackSize, *Needle, NeedleSize)    ; Based on the Python fast search algorithm
  Declare SSE2_Find(*Haystack, HaystackSize, *Needle, NeedleSize, Count = #False)
 
EndDeclareModule

Module FindData
 
  EnableASM
  EnableExplicit
  DisableDebugger
 
  CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
    Macro rax : eax : EndMacro
    Macro rbx : ebx : EndMacro   
    Macro rcx : ecx : EndMacro
    Macro rdx : edx : EndMacro
    Macro rsi : esi : EndMacro
    Macro rdi : edi : EndMacro
    Macro rbp : ebp : EndMacro
    Macro rsp : esp : EndMacro
  CompilerEndIf
 
  Macro M_movdqa(arg1, arg2)
    !movdqa arg1, arg2
  EndMacro
 
 
  ; *** Tailed Substring algorithm code ***
 
  Macro M_TS_Phase(phase)
    !finddata.l_ts_#phase#0:
    movzx rax, byte [rsi + rcx]
    !finddata.l_ts_#phase#1:
    cmp al, [rdi + rcx]
    !je finddata.l_ts_#phase#2
    inc rdi
    cmp rdi, rbp
    !jbe finddata.l_ts_#phase#1
    !jmp finddata.l_ts_exit_notfound
    !finddata.l_ts_#phase#2:
    sub rdx, rdx
    !finddata.l_ts_#phase#3:
    movzx rax, byte [rsi + rdx]
    cmp al, [rdi + rdx]
    !jne finddata.l_ts_#phase#4
    inc rdx
    cmp rdx, rbx
    !jne finddata.l_ts_#phase#3
    mov rax, rdi
    sub rax, [p.p_HayStack]
    !jmp finddata.l_ts_exit_found
    !finddata.l_ts_#phase#4:
    CompilerIf phase = 1
      mov rdx, rcx
      dec rdx
      !js finddata.l_ts_#phase#6
      movzx rax, byte [rsi + rcx]
      !finddata.l_ts_#phase#5:
      cmp al, [rsi + rdx]
      !je finddata.l_ts_#phase#6
      dec rdx
      !jns finddata.l_ts_#phase#5
      !finddata.l_ts_#phase#6:
      mov rax, rcx
      sub rax, rdx
      cmp rax, [rsp - 48]             ; if i-h > dim => {k=i; dim=i-h;}
      !jng finddata.l_ts_#phase#7
      mov [rsp - 40], rcx
      mov [rsp - 48], rax
      !finddata.l_ts_#phase#7:
      add rdi, rcx                    ;  s + i
      sub rdi, rdx                    ;  s - h
      dec rcx     
      cmp rcx, [rsp - 48]
      !jb finddata.l_ts_phase2        ; if i < dim => proceed with phase 2
    CompilerElse
      ; phase 2
      add rdi, [rsp - 48]
    CompilerEndIf
    cmp rdi, rbp
    !jbe finddata.l_ts_#phase#0
    !jmp finddata.l_ts_exit_notfound
   
  EndMacro
 
  Procedure TS(*HayStack, HayStackSize, *Needle, NeedleSize.l)
   
    ; backup some registers
    mov [rsp -  8], rbx
    mov [rsp - 16], rsi
    mov [rsp - 24], rdi
    mov [rsp - 32], rbp
   
    ; init some things
    mov ebx, [p.v_NeedleSize]
    mov ecx, ebx
    dec ecx
    !js finddata.l_ts_exit_notfound   ; exit when NeedleSize < 1
    mov rbp, [p.v_HayStackSize]
    sub rbp, rbx
    !jc finddata.l_ts_exit_notfound   ; exit when NeedleSize > HayStackSize
    mov rsi, [p.p_Needle]
    mov rdi, [p.p_HayStack]
    add rbp, rdi
    mov rax, 1
    mov [rsp - 40], rcx               ; k
    mov [rsp - 48], rax               ; dim
   
    ; search
    M_TS_Phase(1)
    !finddata.l_ts_phase2:
    mov rcx, [rsp - 40]
    M_TS_Phase(2)
   
    ; exit
    !finddata.l_ts_exit_notfound: 
    mov rax, -1
    !finddata.l_ts_exit_found:
    mov rbx, [rsp -  8]
    mov rsi, [rsp - 16]
    mov rdi, [rsp - 24]
    mov rbp, [rsp - 32]
    ProcedureReturn
   
  EndProcedure
 
  ; *** End of Tailed Substring algorithm code ***
 
 
  ; *** Code based on Quick Search / Sunday algorithm ***
 
  Macro M_QS_Search(n = 0)
    !finddata.l_qs_search0#n:
    !xor ecx, ecx
    !finddata.l_qs_search1#n:
    movzx eax, byte [rsi + rcx]
    cmp al, [rdi + rcx]
    !je finddata.l_qs_continue#n
    CompilerIf n = 0
      movzx eax, byte [rdi + rbx]
      mov eax, [rsp + rax * 4 - 1024]
      add rdi, rax
      cmp rdi, rdx
      !jb finddata.l_qs_search0#n
      !je finddata.l_qs_finalcheck
    CompilerEndIf
    !jmp finddata.l_qs_exit_notfound
    !finddata.l_qs_continue#n:
    !inc ecx
    !cmp ecx, ebx
    !jb finddata.l_qs_search1#n
    mov rax, rdi
    sub rax, [p.p_HayStack]
    !jmp finddata.l_qs_exit_found     
  EndMacro
 
  Procedure QS(*HayStack, HayStackSize, *Needle, NeedleSize.l)
   
    ; backup some registers
    mov [rsp - 1032], rbx
    mov [rsp - 1040], rsi
    mov [rsp - 1048], rdi
   
    ; perform some checks
    mov ebx, [p.v_NeedleSize]
    cmp ebx, 1
    !jl finddata.l_qs_exit_notfound
    mov rdx, [p.v_HayStackSize]
    sub rdx, rbx
    !jc finddata.l_qs_exit_notfound   ; exit when NeedleSize > HayStackSize
   
    ; prepare
    lea rdi, [rsp - 1024]
    lea eax, [ebx + 1]
    mov ecx, 256
    cld
    rep stosd
    mov rsi, [p.p_Needle]
    mov rdx, rsi
    mov ecx, ebx
    !finddata.l_qs_prep_table:
    movzx eax, byte [rdx]
    mov [rsp + rax * 4 - 1024], ecx
    inc rdx
    dec ecx
    !jnz finddata.l_qs_prep_table
   
    ; search
    mov rdi, [p.p_HayStack]
    mov rdx, [p.v_HayStackSize]
    sub rdx, rbx
    !jz finddata.l_qs_finalcheck      ; jump to finalcheck if NeedleSize = HayStackSize
    add rdx, rdi
    M_QS_Search(0)
    !finddata.l_qs_finalcheck:
    M_QS_Search(1)
   
    ; exit
    !finddata.l_qs_exit_notfound: 
    mov rax, -1
    !finddata.l_qs_exit_found:
    mov rbx, [rsp - 1032]
    mov rsi, [rsp - 1040]
    mov rdi, [rsp - 1048]
    ProcedureReturn
   
  EndProcedure
 
  ; *** End of code based on Quick Search / Sunday algorithm ***
 
 
  ; *** Code based on Boyer-Moore algorithm ***
 
  Macro M_BM(n=0)
    CompilerIf n=0
      add rsi, rcx     
    CompilerElse
      add rsi, 1
    CompilerEndIf
    mov ecx, ebx
    cmp rsi, rdx
    !jna finddata.l_bm_search1
    !jmp finddata.l_bm_exit
  EndMacro
 
  Procedure BM(*HayStack, HayStackSize, *Needle, NeedleSize.u)
   
    ; Boyer-Moore algorithm
    ; based on code from 'schic'
    ; http://www.purebasic.fr/english/viewtopic.php?p=130032#p130032
   
    ; backup some registers
    mov [rsp - 520], rbx
    mov [rsp - 528], rsi
    mov [rsp - 536], rdi
   
    ; perform some checks
    movzx ebx, word [p.v_NeedleSize]   ; exit if NeedleSize = 0
    cmp ebx, 0
    !je finddata.l_bm_exit
    mov rdx, [p.v_HayStackSize]        ; exit if NeedleSize > HayStackSize
    sub rdx, rbx
    !jc finddata.l_bm_exit
   
    ; prepare
    lea rdi, [rsp - 512]
    mov eax, 0xff
    mov ecx, 512
    cld
    rep stosb
    mov rdi, [p.p_Needle]
    mov ecx, ebx
    mov rsi, rdi
    !finddata.l_bm_table0:
    movzx eax, byte [rsi]
    add rsi, 1
    sub ecx, 1
    mov [rsp + rax * 2 - 512], cx
    !jnz finddata.l_bm_table0
    mov rsi, [p.p_HayStack]
    add rdx, rsi
   
    ; search
    mov ecx, ebx
   
    !finddata.l_bm_search1:
    movzx eax, byte [rsi + rcx - 1]
    cmp al, [rdi + rcx - 1]
    !je finddata.l_bm_search4
   
    movzx eax, word [rsp + rax * 2 - 512]
    cmp ax, 0xffff
    !jne finddata.l_bm_search2
    M_BM()
   
    !finddata.l_bm_search2:
    add ecx, eax
    sub ecx, ebx
    !jna finddata.l_bm_search3
    M_BM()
   
    !finddata.l_bm_search3:
    M_BM(1)
   
    !finddata.l_bm_search4:
    sub ecx, 1
    !jnz finddata.l_bm_search1
   
    mov rax, rsi
    sub rax, *HayStack
    !finddata.l_bm_return:
    mov rbx, [rsp - 520]
    mov rsi, [rsp - 528]
    mov rdi, [rsp - 536]
    ProcedureReturn
   
    ; exit not found
    !finddata.l_bm_exit:
    mov rax, -1
    !jmp finddata.l_bm_return
   
  EndProcedure
 
  ; *** End of code based on Boyer-Moore algorithm ***
 
  ; *** Code based on Python fast search algorithm ***
 
  Procedure FastSearch(*HayStack, HayStackSize, *Needle, NeedleSize)
   
    ; Based on the Python fast search algorithm (Python License)
   
    Protected.i reg_bx, reg_si, reg_di
    Protected.i hs_end, skip
   
    ; backup registers
    mov [p.v_reg_bx], rbx
    mov [p.v_reg_si], rsi
    mov [p.v_reg_di], rdi
   
    ; perform some checks
    mov rbx, [p.v_NeedleSize]
    mov rcx, [p.v_HayStackSize]
    cmp rbx, rcx
    !jg finddata.l_fs_exit                ; exit when NeedleSize > HayStackSize
    cmp rbx, 1
    !jg finddata.l_fs_prep
    !jne finddata.l_fs_exit               ; exit when NeedleSize < 1
   
    ; handle needle size 1
    mov rsi, [p.p_HayStack]   
    mov rdi, [p.p_Needle]
    mov rax, rcx
    mov bl, [rdi]
    !finddata.l_fs_sns0:
    cmp bl, [rsi]
    !je finddata.l_fs_sns1
    inc rsi
    dec rcx
    !jnz finddata.l_fs_sns0
    !jmp finddata.l_fs_exit
    !finddata.l_fs_sns1:
    sub rax, rcx
    !jmp finddata.l_fs_return
   
    ; prepare (skip and register rdx mask)
    !finddata.l_fs_prep:
    mov rdi, [p.p_Needle]
    lea rsi, [rbx - 2]
    mov [p.v_skip], rsi
    mov rdx, 0
    movzx ecx, byte [rdi + rsi + 1]
    bts rdx, rcx
    !finddata.l_fs_prep0:
    movzx eax, byte [rdi + rsi]
    bts rdx, rax
    sub rsi, 1
    !js finddata.l_fs_prep2
    cmp eax, ecx
    !jne finddata.l_fs_prep0
    lea rax, [rsi + 1]
    sub [p.v_skip], rax
    !finddata.l_fs_prep1:
    movzx eax, byte [rdi + rsi]
    bts rdx, rax
    sub rsi, 1
    !jns finddata.l_fs_prep1
    !finddata.l_fs_prep2:
   
    ; search
    mov rax, [p.v_HayStackSize]
    sub rax, rbx
    mov rsi, [p.p_HayStack]
    add rax, rsi
    mov [p.v_hs_end], rax
    movzx ecx, byte [rdi + rbx - 1]
    dec rsi
   
    !finddata.l_fs_search0:
    inc rsi
    cmp rsi, [p.v_hs_end]                 ; check for haystack end boundary
    !jae finddata.l_fs_search7
    cmp cl, [rsi + rbx - 1]               ; compare last needle byte
    !je finddata.l_fs_search2             ; equal => continue with search2
    movzx eax, byte [rsi + rbx]
    bt rdx, rax                           ; 'Sunday' check
    !jc finddata.l_fs_search0
    !finddata.l_fs_search1:
    add rsi, rbx
    !jmp finddata.l_fs_search0
   
    !finddata.l_fs_search2:               ; compare first two needle bytes
    movzx eax, word [rdi]
    cmp ax, [rsi]
    !je finddata.l_fs_search4             ; equal => continue with search4
    !finddata.l_fs_search3:
    movzx eax, byte [rsi + rbx]           ; 'Sunday' check
    bt rdx, rax
    !jnc finddata.l_fs_search1
    add rsi, [p.v_skip]                   ; 'Horspool' skip
    !jmp finddata.l_fs_search0
   
    !finddata.l_fs_search4:               ; compare remaining needle bytes
    mov rax, rbx
    sub rax, 3
    !jna finddata.l_fs_search9            ; no more bytes left => found !
    push rcx                              ; keep a copy of rcx
    lea rcx, [rax + 1]
    shr rcx, 1
    !finddata.l_fs_search5:
    movzx eax, word [rdi + rcx * 2]       ; compare two bytes at a time
    cmp ax, [rsi + rcx * 2]
    !jne finddata.l_fs_search6            ; not equal => restore rcx and back to search3
    dec rcx
    !jnz finddata.l_fs_search5            ; no more bytes left ?
    pop rcx                               ; restore rcx
    !jmp finddata.l_fs_search9            ; => found !
    !finddata.l_fs_search6:
    pop rcx
    !jmp finddata.l_fs_search3
   
    !finddata.l_fs_search7:               ; code to handle haystack exactly on end boundary
    !ja finddata.l_fs_exit                ; boundary exceeded => exit
    !finddata.l_fs_search8:               ; final check
    movzx eax, byte [rdi + rbx - 1]
    cmp al, [rsi + rbx - 1]   
    !jne finddata.l_fs_exit
    dec rbx
    !jnz finddata.l_fs_search8
    !finddata.l_fs_search9:               ; return result
    mov rax, rsi
    sub rax, [p.p_HayStack]   
   
    ; restore registers & return result
    !finddata.l_fs_return:
    mov rbx, [p.v_reg_bx]
    mov rsi, [p.v_reg_si]
    mov rdi, [p.v_reg_di]
    ProcedureReturn
   
    ; exit not found
    !finddata.l_fs_exit:
    mov rax, -1
    !jmp finddata.l_fs_return
   
  EndProcedure
 
  ; *** End of code based on Python fast search algorithm ***
 
  Procedure SSE2_Find(*Haystack, HaystackSize, *Needle, NeedleSize, Count = #False)
   
    ; backup some registers
    mov [rsp -  8], rbx
    mov [rsp - 16], rsi
    mov [rsp - 24], rdi
   
    ; init count
    mov rax, [p.v_Count]
    sub rax, 1
    sbb rax, rax
    mov [rsp - 40], rax
   
    ; perform some checks
    mov rax, [p.v_HaystackSize]
    mov rbx, [p.v_NeedleSize]
    sub rax, rbx
    !jc finddata.l_sse2_search8     ; exit if NeedleSize > HaystackSize
    add rax, [p.p_Haystack] 
    mov [rsp - 32], rax             ; rsp - 32 = *SearchEnd
    cmp rbx, 1
    !jl finddata.l_sse2_search8     ; exit if NeedleSize < 1

    ; load first two needle bytes
    !pcmpeqb xmm4, xmm4
    mov rdi, [p.p_Needle]
    movzx eax, byte [rdi]
    !je finddata.l_sse2_search0
    mov ah, [rdi + 1]
    !pslldq xmm4, 15
    !finddata.l_sse2_search0:
    !movd xmm2, eax
    !punpcklbw xmm2, xmm2
    !punpcklwd xmm2, xmm2
    !pshufd xmm3, xmm2, 01010101b   ; xmm3 = 16 times second needle byte
    !pshufd xmm2, xmm2, 0           ; xmm2 = 16 times first needle byte
   
    ; start search
    mov rsi, [p.p_Haystack]
    mov rcx, rsi
    shr rsi, 4                      ; align Haystack to 16 bytes
    shl rsi, 4
    sub rcx, rsi
    M_movdqa(xmm0, [rsi])           ; handle first 16 bytes
    !movdqa xmm1, xmm0
    !pcmpeqb xmm0, xmm2             ; compare against first needle byte
    !pmovmskb eax, xmm0
    !shr eax, cl                    ; shift off unwanted bytes
    !shl eax, cl
    !test eax, eax
    !jnz finddata.l_sse2_search2
   
    ; main search loop
    !finddata.l_sse2_search1:
    add rsi, 16                     ; next 16 bytes
    cmp rsi, [rsp - 32]
    !ja finddata.l_sse2_search8
    M_movdqa(xmm0, [rsi])
    !movdqa xmm1, xmm0
    !pcmpeqb xmm0, xmm2             ; compare against first needle byte
    !pmovmskb eax, xmm0
    !test eax, eax
    !jz finddata.l_sse2_search1     ; no match ? => search1
    !finddata.l_sse2_search2:
    !pcmpeqb xmm1, xmm3             ; compare against second needle byte
    !psrldq xmm1, 1
    !por xmm1, xmm4
    !pmovmskb ecx, xmm1
    !and eax, ecx                   ; combine both searches
    !jz finddata.l_sse2_search1     ; no match ? => search1
   
    ; compare rest of bytes
    !finddata.l_sse2_search3:
    !bsf ecx, eax                   ; get index of first match
    !jz finddata.l_sse2_search1
    !btr eax, ecx
    lea rdx, [rsi + rcx]            ; create a pointer to it
    cmp rdx, [rsp - 32]
    mov rcx, [p.v_NeedleSize]
    !ja finddata.l_sse2_search8
    sub rcx, 2
    !jb finddata.l_sse2_search5     ; NeedleSize < 2 ? => search5 (already done)
   
    !finddata.l_sse2_search4:
    movzx ebx, word [rdx + rcx]     ; compare rest of needle right-to-left
    cmp bx, [rdi + rcx]             ; two bytes at a time
    !jne finddata.l_sse2_search3
    sub rcx, 2
    !jae finddata.l_sse2_search4
   
    !finddata.l_sse2_search5:
    mov rbx, [rsp - 40]
    cmp rbx, -1
    !je finddata.l_sse2_search6
    add rbx, 1                      ; increase count
    mov [rsp - 40], rbx
    !jmp finddata.l_sse2_search3
    !finddata.l_sse2_search6:
    mov rax, rdx                    ; return result
    sub rax, [p.p_Haystack]
    !finddata.l_sse2_search7:
    mov rbx, [rsp -  8]
    mov rsi, [rsp - 16]
    mov rdi, [rsp - 24] 
    ProcedureReturn
   
    ; not found / return count
    !finddata.l_sse2_search8:
    mov rax, [rsp - 40]
    !jmp finddata.l_sse2_search7
   
  EndProcedure   
 
EndModule

_________________
macOS 10.15 Catalina, PB 5.71 x64


Last edited by wilbert on Tue Jul 28, 2015 6:54 am, edited 6 times in total.

Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 01, 2015 2:33 pm 
Offline
PureBasic Expert
PureBasic Expert

Joined: Sun Aug 08, 2004 5:21 am
Posts: 3520
Location: Netherlands
Speed test
Code:
DisableDebugger

HayStackSize = 65536
NeedleSize = 64

*HayStack = AllocateMemory(HayStackSize)
*Needle = AllocateMemory(NeedleSize)

RandomSeed(0)
RandomData(*HayStack, HayStackSize)

CopyMemory(*HayStack + HayStackSize - NeedleSize - 32, *Needle, NeedleSize)

t0 = ElapsedMilliseconds()
For i = 1 To 20000
  x = FindData::TS(*HayStack, HayStackSize, *Needle, NeedleSize)
Next
t1 = ElapsedMilliseconds()
For i = 1 To 20000
  x = FindData::QS(*HayStack, HayStackSize, *Needle, NeedleSize)
Next
t2 = ElapsedMilliseconds()
For i = 1 To 20000
  x = FindData::BM(*HayStack, HayStackSize, *Needle, NeedleSize)
Next
t3 = ElapsedMilliseconds()
For i = 1 To 20000
  x = FindData::FastSearch(*HayStack, HayStackSize, *Needle, NeedleSize)
Next
t4 = ElapsedMilliseconds()
For i = 1 To 20000
  x = FindData::SSE2_Find(*HayStack, HayStackSize, *Needle, NeedleSize)
Next
t5 = ElapsedMilliseconds()

CompilerIf #PB_Compiler_OS = #PB_OS_Windows
  S.s = "Windows "
CompilerElseIf #PB_Compiler_OS = #PB_OS_MacOS
  S.s = "OSX "
CompilerElse
  S.s = "Linux "
CompilerEndIf

CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
  S + "(x86) "
CompilerElse
  S + "(x64) "
CompilerEndIf

S + #CRLF$ + CPUName() + #CRLF$ + #CRLF$

S + "Test 1" + #CRLF$
S + "----------" + #CRLF$
S + "Tailed Substring : " + Str(t1-t0) + "ms" + #CRLF$
S + "QuickSearch : " + Str(t2-t1) + "ms" + #CRLF$
S + "Boyer-Moore : " + Str(t3-t2) + "ms" + #CRLF$
S + "FastSearch : " + Str(t4-t3) + "ms" + #CRLF$
S + "SSE2 Find : " + Str(t5-t4) + "ms" + #CRLF$

PokeS(*HayStack, "A small test with a small haystack and short needle. This will perform differently because some algorithms take some time to initialize", -1, #PB_Ascii)
PokeS(*Needle, "initial", -1, #PB_Ascii)
HayStackSize2 = MemoryStringLength(*HayStack, #PB_Ascii)
NeedleSize2 = MemoryStringLength(*Needle, #PB_Ascii)


t0 = ElapsedMilliseconds()
For i = 1 To 1500000
  x = FindData::TS(*HayStack, HayStackSize2, *Needle, NeedleSize2)
Next
t1 = ElapsedMilliseconds()
For i = 1 To 1500000
  x = FindData::QS(*HayStack, HayStackSize2, *Needle, NeedleSize2)
Next
t2 = ElapsedMilliseconds()
For i = 1 To 1500000
  x = FindData::BM(*HayStack, HayStackSize2, *Needle, NeedleSize2)
Next
t3 = ElapsedMilliseconds()
For i = 1 To 1500000
  x = FindData::FastSearch(*HayStack, HayStackSize2, *Needle, NeedleSize2)
Next
t4 = ElapsedMilliseconds()
For i = 1 To 1500000
  x = FindData::SSE2_Find(*HayStack, HayStackSize2, *Needle, NeedleSize2)
Next
t5 = ElapsedMilliseconds()

S + #CRLF$
S + "Test 2" + #CRLF$
S + "----------" + #CRLF$
S + "Tailed Substring : " + Str(t1-t0) + "ms" + #CRLF$
S + "QuickSearch : " + Str(t2-t1) + "ms" + #CRLF$
S + "Boyer-Moore : " + Str(t3-t2) + "ms" + #CRLF$
S + "FastSearch : " + Str(t4-t3) + "ms" + #CRLF$
S + "SSE2 Find : " + Str(t5-t4) + "ms" + #CRLF$

SetClipboardText(S)
MessageRequester("Test results", S)


My results :
Quote:
OSX (x64)
Intel(R) Core(TM) i5-4570R CPU @ 2.70GHz

Test 1
----------
Tailed Substring : 416ms
QuickSearch : 103ms
Boyer-Moore : 54ms
FastSearch : 204ms
SSE2 Find : 71ms

Test 2
----------
Tailed Substring : 99ms
QuickSearch : 146ms
Boyer-Moore : 86ms
FastSearch : 57ms
SSE2 Find : 34ms

_________________
macOS 10.15 Catalina, PB 5.71 x64


Last edited by wilbert on Wed Jul 29, 2015 5:28 am, edited 3 times in total.

Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 01, 2015 3:08 pm 
Offline
Addict
Addict
User avatar

Joined: Tue Nov 09, 2010 10:15 pm
Posts: 1558
Thanks for sharing.

Just curious, did you learn PB first, or asm?


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 01, 2015 3:22 pm 
Offline
PureBasic Expert
PureBasic Expert

Joined: Sun Aug 08, 2004 5:21 am
Posts: 3520
Location: Netherlands
Tenaja wrote:
Thanks for sharing.

Just curious, did you learn PB first, or asm?

My history with asm starts with the Zilog Z80 processor from the early 90's :shock:
It has a bit of similar opcodes but when it comes to Intel x86 asm, I learned it along with PB.
It's very nice to have the ability to embed asm in a Basic source :)

_________________
macOS 10.15 Catalina, PB 5.71 x64


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 01, 2015 7:51 pm 
Offline
Addict
Addict

Joined: Fri Nov 09, 2012 11:04 pm
Posts: 1696
Location: Uttoxeter, UK
@wilbert,
Thank you for sharing.
Excellent. :D

_________________
DE AA EB


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 01, 2015 9:40 pm 
Offline
Addict
Addict
User avatar

Joined: Fri Sep 21, 2007 5:52 am
Posts: 3408
Location: New Zealand
nice thanks for sharing


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Fri Jul 03, 2015 9:59 am 
Offline
Addict
Addict
User avatar

Joined: Sun Nov 05, 2006 11:42 pm
Posts: 4528
Location: Lyon - France
Quote:
It's very nice to have the ability to embed asm in a Basic source
yes... you have right
Basic most simple language and ASM most powerfull language in the same house ??
It's the lion and the mouse, the fire and water, the T-rex and kermit the frog ...
Image
Who can believe that :shock:
Pb is like amazing shoes
All feets can use it together...the big like you, but also the small like me
Thanks to him the best can meet the beginner in the same place, i'm not sure It's a chance for the masters you can say to me :oops:
But obviously, it's an incredible chance for the beginner.

In fact...like pb united teachers and student..It's a very good school
Pb should be recognized public utility 8)

Thanks another time for your nice hieroglyphes sharing :wink:

_________________
ImageThe happiness is a road...
Not a destination


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 08, 2015 8:25 pm 
Offline
Addict
Addict
User avatar

Joined: Thu Jun 04, 2015 7:10 am
Posts: 1673
Excellent wilbert!! and nice and clean implmentations for 32 and 64 bits :)) thankyou very much for sharing your work

_________________
Thankyou to all the coders who generously helped & encouraged me in the nearly 2yrs when i was welcome here,
it was a tremendous privilege. I learned a lot. I wish you and your families all the best and success for the future.


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Fri Aug 28, 2015 9:34 am 
Offline
Addict
Addict
User avatar

Joined: Thu Jun 04, 2015 7:10 am
Posts: 1673
Wilbert today i realized i only had one string to search for and it was exactly 64bits, so I wondered if i could take advantage of 1) 32bit register search (cmp eax, not cmp al!), and 2) hard-coding the needle, as opposed to byte-by-byte searching with variable needle.

I have npt tried x64 as i just wanted to see how it would perform on x86 first, but mine takes ~5 seconds to find the 64bit string compared to your Boyer-Moore's ~4 seconds, even with the string hard-coded! :(

For example to find 64bit string "Micr"+"osof" in 32bit registers...
Code:
Procedure.l Find64(*Haystack, HaystackSize.l)
  EnableASM
  ! MOV ecx, [esp+8]     ;[p.v_HaystackSize]
  ! MOV eax, [esp+4]     ;[p.p_Haystack]
  nextbyte:
   
  ! CMP dword [eax], $7263694D      ;4D696372="Micr"           
  ! JE ll_find64_trydword2
 
  ! INC eax
  ! DEC ecx
  ! JNZ ll_find64_nextbyte
  ! JMP ll_find64_notfound64
 
  trydword2:
  ! CMP dword [eax+4], $666F736F    ;6F736F66="osof"
 
  ! JE ll_find64_found64
  ! INC eax
  ! DEC ecx
  ! JMP ll_find64_nextbyte
 
  found64:
  ! MOV eax, [esp+8]
  ! SUB eax, ecx
  ! ret 8
 
  notfound64: 
  ! MOV eax, -1
  ! ret 8
 DisableASM 
EndProcedure

So perhaps even for 64bit searches using registers the algorithms like Boyer-Moore are still faster? im not sure. I guess it does have to access every byte 4 times. I find it interesting anyway! :)

_________________
Thankyou to all the coders who generously helped & encouraged me in the nearly 2yrs when i was welcome here,
it was a tremendous privilege. I learned a lot. I wish you and your families all the best and success for the future.


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Fri Aug 28, 2015 10:04 am 
Offline
PureBasic Expert
PureBasic Expert

Joined: Sun Aug 08, 2004 5:21 am
Posts: 3520
Location: Netherlands
Keya wrote:
So perhaps even for 64bit searches using registers the algorithms like Boyer-Moore are still faster? im not sure. I guess it does have to access every byte 4 times. I find it interesting anyway! :)

In this case using 64 bit registers will probably not make a big difference. The main issue is that the memory location you are examining is only increased by 1 byte at a time.
The advantage of algorithms like Boyer-Moore is that they can skip a lot of bytes so I'm not really surprised to see that BM was faster.

For small needles like in this case only 64 bits, you might want to give the SSE2_Find procedure a try if you don't mind using SSE2.
From the procedures I added to my module it's the only one with a realistic chance of beating BM in this case.

If you have a very specific needle you always need to search for and know what kind of haystack can be expected you could choose a search algorithm based on that which performs best for that particular case.

_________________
macOS 10.15 Catalina, PB 5.71 x64


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Fri Aug 28, 2015 10:57 am 
Offline
Addict
Addict
User avatar

Joined: Thu Jun 04, 2015 7:10 am
Posts: 1673
I get very similar times searching for 64bit needle with BM and SSE2, maybe SSE2 is about 1% faster on average. I guess it makes sense that those are going to be faster than my full register checks when they're skipping bytes and i'm checking each one 4 times :oops: but it was a fun experiment :)

_________________
Thankyou to all the coders who generously helped & encouraged me in the nearly 2yrs when i was welcome here,
it was a tremendous privilege. I learned a lot. I wish you and your families all the best and success for the future.


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Tue Jul 24, 2018 12:40 pm 
Offline
PureBasic Expert
PureBasic Expert

Joined: Sun Aug 08, 2004 5:21 am
Posts: 3520
Location: Netherlands
Idle mentioned it would be nice to have support for an initial search position.
Here's an updated version which has.

Code:
; FindData module by Wilbert

; last update July 24, 2018

DeclareModule FindData
 
  ; all routines return a zero based index or -1 (not found)
 
  Declare TS(*HayStack, HayStackSize, *Needle, NeedleSize.l, Pos=0)       ; Tailed Substring algorithm
  Declare QS(*HayStack, HayStackSize, *Needle, NeedleSize.l, Pos=0)       ; Quick Search algorithm
  Declare BM(*HayStack, HayStackSize, *Needle, NeedleSize.u, Pos=0)       ; Boyer-Moore algorithm
  Declare FastSearch(*HayStack, HayStackSize, *Needle, NeedleSize, Pos=0) ; Based on the Python fast search algorithm
  Declare SSE2_Find(*HayStack, HayStackSize, *Needle, NeedleSize, Pos=0, Count=#False)
 
EndDeclareModule

Module FindData
 
  EnableASM
  EnableExplicit
  DisableDebugger
 
  CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
    Macro rax : eax : EndMacro
    Macro rbx : ebx : EndMacro   
    Macro rcx : ecx : EndMacro
    Macro rdx : edx : EndMacro
    Macro rsi : esi : EndMacro
    Macro rdi : edi : EndMacro
    Macro rbp : ebp : EndMacro
    Macro rsp : esp : EndMacro
  CompilerEndIf
 
  Macro M_movdqa(arg1, arg2)
    !movdqa arg1, arg2
  EndMacro
 
 
  ; *** Tailed Substring algorithm code ***
 
  Macro M_TS_Phase(phase)
    !.l_ts_#phase#0:
    movzx rax, byte [rsi + rcx]
    !.l_ts_#phase#1:
    cmp al, [rdi + rcx]
    !je .l_ts_#phase#2
    inc rdi
    cmp rdi, rbp
    !jbe .l_ts_#phase#1
    !jmp .l_ts_exit_notfound
    !.l_ts_#phase#2:
    sub rdx, rdx
    !.l_ts_#phase#3:
    movzx rax, byte [rsi + rdx]
    cmp al, [rdi + rdx]
    !jne .l_ts_#phase#4
    inc rdx
    cmp rdx, rbx
    !jne .l_ts_#phase#3
    mov rax, rdi
    sub rax, [p.p_HayStack]
    add rax, [p.v_Pos]   
    !jmp .l_ts_exit_found
    !.l_ts_#phase#4:
    CompilerIf phase = 1
      mov rdx, rcx
      dec rdx
      !js .l_ts_#phase#6
      movzx rax, byte [rsi + rcx]
      !.l_ts_#phase#5:
      cmp al, [rsi + rdx]
      !je .l_ts_#phase#6
      dec rdx
      !jns .l_ts_#phase#5
      !.l_ts_#phase#6:
      mov rax, rcx
      sub rax, rdx
      cmp rax, [rsp - 48]             ; if i-h > dim => {k=i; dim=i-h;}
      !jng .l_ts_#phase#7
      mov [rsp - 40], rcx
      mov [rsp - 48], rax
      !.l_ts_#phase#7:
      add rdi, rcx                    ;  s + i
      sub rdi, rdx                    ;  s - h
      dec rcx     
      cmp rcx, [rsp - 48]
      !jb .l_ts_phase2                ; if i < dim => proceed with phase 2
    CompilerElse
      ; phase 2
      add rdi, [rsp - 48]
    CompilerEndIf
    cmp rdi, rbp
    !jbe .l_ts_#phase#0
    !jmp .l_ts_exit_notfound
   
  EndMacro
 
  Procedure TS(*HayStack, HayStackSize, *Needle, NeedleSize.l, Pos=0)
   
    ; backup some registers
    mov [rsp -  8], rbx
    mov [rsp - 16], rsi
    mov [rsp - 24], rdi
    mov [rsp - 32], rbp
   
    ; init some things
    mov rcx, [p.v_Pos]
    sub [p.v_HayStackSize], rcx
    !jbe .l_ts_exit_notfound          ; exit when HayStackSize <= Pos
    add [p.p_HayStack], rcx
    mov ebx, [p.v_NeedleSize]
    mov ecx, ebx
    dec ecx
    !js .l_ts_exit_notfound           ; exit when NeedleSize < 1
    mov rbp, [p.v_HayStackSize]
    sub rbp, rbx
    !jc .l_ts_exit_notfound           ; exit when NeedleSize > HayStackSize
   
    mov rsi, [p.p_Needle]
    mov rdi, [p.p_HayStack]
    add rbp, rdi
    mov rax, 1
    mov [rsp - 40], rcx               ; k
    mov [rsp - 48], rax               ; dim
   
    ; search
    M_TS_Phase(1)
    !.l_ts_phase2:
    mov rcx, [rsp - 40]
    M_TS_Phase(2)
   
    ; exit
    !.l_ts_exit_notfound: 
    mov rax, -1
    !.l_ts_exit_found:
    mov rbx, [rsp -  8]
    mov rsi, [rsp - 16]
    mov rdi, [rsp - 24]
    mov rbp, [rsp - 32]
    ProcedureReturn
   
  EndProcedure
 
  ; *** End of Tailed Substring algorithm code ***
 
 
  ; *** Code based on Quick Search / Sunday algorithm ***
 
  Macro M_QS_Search(n = 0)
    !.l_qs_search0#n:
    !xor ecx, ecx
    !.l_qs_search1#n:
    movzx eax, byte [rsi + rcx]
    cmp al, [rdi + rcx]
    !je .l_qs_continue#n
    CompilerIf n = 0
      movzx eax, byte [rdi + rbx]
      mov eax, [rsp + rax * 4 - 1024]
      add rdi, rax
      cmp rdi, rdx
      !jb .l_qs_search0#n
      !je .l_qs_finalcheck
    CompilerEndIf
    !jmp .l_qs_exit_notfound
    !.l_qs_continue#n:
    !inc ecx
    !cmp ecx, ebx
    !jb .l_qs_search1#n
    mov rax, rdi
    sub rax, [p.p_HayStack]
    add rax, [p.v_Pos]   
    !jmp .l_qs_exit_found     
  EndMacro
 
  Procedure QS(*HayStack, HayStackSize, *Needle, NeedleSize.l, Pos=0)
   
    ; backup some registers
    mov [rsp - 1032], rbx
    mov [rsp - 1040], rsi
    mov [rsp - 1048], rdi
   
    ; perform some checks
    mov rcx, [p.v_Pos]
    sub [p.v_HayStackSize], rcx
    !jbe .l_qs_exit_notfound          ; exit when HayStackSize <= Pos
    add [p.p_HayStack], rcx   
    mov ebx, [p.v_NeedleSize]
    cmp ebx, 1
    !jl .l_qs_exit_notfound
    mov rdx, [p.v_HayStackSize]
    sub rdx, rbx
    !jc .l_qs_exit_notfound           ; exit when NeedleSize > HayStackSize
   
    ; prepare
    lea rdi, [rsp - 1024]
    lea eax, [ebx + 1]
    mov ecx, 256
    cld
    rep stosd
    mov rsi, [p.p_Needle]
    mov rdx, rsi
    mov ecx, ebx
    !.l_qs_prep_table:
    movzx eax, byte [rdx]
    mov [rsp + rax * 4 - 1024], ecx
    inc rdx
    dec ecx
    !jnz .l_qs_prep_table
   
    ; search
    mov rdi, [p.p_HayStack]
    mov rdx, [p.v_HayStackSize]
    sub rdx, rbx
    !jz .l_qs_finalcheck              ; jump to finalcheck if NeedleSize = HayStackSize
    add rdx, rdi
    M_QS_Search(0)
    !.l_qs_finalcheck:
    M_QS_Search(1)
   
    ; exit
    !.l_qs_exit_notfound: 
    mov rax, -1
    !.l_qs_exit_found:
    mov rbx, [rsp - 1032]
    mov rsi, [rsp - 1040]
    mov rdi, [rsp - 1048]
    ProcedureReturn
   
  EndProcedure
 
  ; *** End of code based on Quick Search / Sunday algorithm ***
 
 
  ; *** Code based on Boyer-Moore algorithm ***
 
  Macro M_BM(n=0)
    CompilerIf n=0
      add rsi, rcx     
    CompilerElse
      add rsi, 1
    CompilerEndIf
    mov ecx, ebx
    cmp rsi, rdx
    !jna .l_bm_search1
    !jmp .l_bm_exit
  EndMacro
 
  Procedure BM(*HayStack, HayStackSize, *Needle, NeedleSize.u, Pos=0)
   
    ; Boyer-Moore algorithm
    ; based on code from 'schic'
    ; http://www.purebasic.fr/english/viewtopic.php?p=130032#p130032
   
    ; backup some registers
    mov [rsp - 520], rbx
    mov [rsp - 528], rsi
    mov [rsp - 536], rdi
   
    ; perform some checks
    mov rcx, [p.v_Pos]
    sub [p.v_HayStackSize], rcx
    !jbe .l_bm_exit            ; exit when HayStackSize <= Pos
    add [p.p_HayStack], rcx   
    movzx ebx, word [p.v_NeedleSize]   ; exit if NeedleSize = 0
    cmp ebx, 0
    !je .l_bm_exit
    mov rdx, [p.v_HayStackSize]        ; exit if NeedleSize > HayStackSize
    sub rdx, rbx
    !jc .l_bm_exit
   
    ; prepare
    lea rdi, [rsp - 512]
    mov eax, 0xff
    mov ecx, 512
    cld
    rep stosb
    mov rdi, [p.p_Needle]
    mov ecx, ebx
    mov rsi, rdi
    !.l_bm_table0:
    movzx eax, byte [rsi]
    add rsi, 1
    sub ecx, 1
    mov [rsp + rax * 2 - 512], cx
    !jnz .l_bm_table0
    mov rsi, [p.p_HayStack]
    add rdx, rsi
   
    ; search
    mov ecx, ebx
   
    !.l_bm_search1:
    movzx eax, byte [rsi + rcx - 1]
    cmp al, [rdi + rcx - 1]
    !je .l_bm_search4
   
    movzx eax, word [rsp + rax * 2 - 512]
    cmp ax, 0xffff
    !jne .l_bm_search2
    M_BM()
   
    !.l_bm_search2:
    add ecx, eax
    sub ecx, ebx
    !jna .l_bm_search3
    M_BM()
   
    !.l_bm_search3:
    M_BM(1)
   
    !.l_bm_search4:
    sub ecx, 1
    !jnz .l_bm_search1
   
    mov rax, rsi
    sub rax, [p.p_HayStack]
    add rax, [p.v_Pos]
    !.l_bm_return:
    mov rbx, [rsp - 520]
    mov rsi, [rsp - 528]
    mov rdi, [rsp - 536]
    ProcedureReturn
   
    ; exit not found
    !.l_bm_exit:
    mov rax, -1
    !jmp .l_bm_return
   
  EndProcedure
 
  ; *** End of code based on Boyer-Moore algorithm ***
 
  ; *** Code based on Python fast search algorithm ***
 
  Procedure FastSearch(*HayStack, HayStackSize, *Needle, NeedleSize, Pos=0)
   
    ; Based on the Python fast search algorithm (Python License)
   
    Protected.i reg_bx, reg_si, reg_di
    Protected.i hs_end, skip
   
    ; backup registers
    mov [p.v_reg_bx], rbx
    mov [p.v_reg_si], rsi
    mov [p.v_reg_di], rdi
   
    ; perform some checks
    mov rcx, [p.v_Pos]
    sub [p.v_HayStackSize], rcx
    !jbe .l_fs_exit                       ; exit when HayStackSize <= Pos
    add [p.p_HayStack], rcx   
    mov rbx, [p.v_NeedleSize]
    mov rcx, [p.v_HayStackSize]
    cmp rbx, rcx
    !jg .l_fs_exit                        ; exit when NeedleSize > HayStackSize
    cmp rbx, 1
    !jg .l_fs_prep
    !jne .l_fs_exit                       ; exit when NeedleSize < 1
   
    ; handle needle size 1
    mov rsi, [p.p_HayStack]   
    mov rdi, [p.p_Needle]
    mov rax, rcx
    mov bl, [rdi]
    !.l_fs_sns0:
    cmp bl, [rsi]
    !je .l_fs_sns1
    inc rsi
    dec rcx
    !jnz .l_fs_sns0
    !jmp .l_fs_exit
    !.l_fs_sns1:
    sub rax, rcx
    add rax, [p.v_Pos]   
    !jmp .l_fs_return
   
    ; prepare (skip and register rdx mask)
    !.l_fs_prep:
    mov rdi, [p.p_Needle]
    lea rsi, [rbx - 2]
    mov [p.v_skip], rsi
    mov rdx, 0
    movzx ecx, byte [rdi + rsi + 1]
    bts rdx, rcx
    !.l_fs_prep0:
    movzx eax, byte [rdi + rsi]
    bts rdx, rax
    sub rsi, 1
    !js .l_fs_prep2
    cmp eax, ecx
    !jne .l_fs_prep0
    lea rax, [rsi + 1]
    sub [p.v_skip], rax
    !.l_fs_prep1:
    movzx eax, byte [rdi + rsi]
    bts rdx, rax
    sub rsi, 1
    !jns .l_fs_prep1
    !.l_fs_prep2:
   
    ; search
    mov rax, [p.v_HayStackSize]
    sub rax, rbx
    mov rsi, [p.p_HayStack]
    add rax, rsi
    mov [p.v_hs_end], rax
    movzx ecx, byte [rdi + rbx - 1]
    dec rsi
   
    !.l_fs_search0:
    inc rsi
    cmp rsi, [p.v_hs_end]                 ; check for haystack end boundary
    !jae .l_fs_search7
    cmp cl, [rsi + rbx - 1]               ; compare last needle byte
    !je .l_fs_search2                     ; equal => continue with search2
    movzx eax, byte [rsi + rbx]
    bt rdx, rax                           ; 'Sunday' check
    !jc .l_fs_search0
    !.l_fs_search1:
    add rsi, rbx
    !jmp .l_fs_search0
   
    !.l_fs_search2:                       ; compare first two needle bytes
    movzx eax, word [rdi]
    cmp ax, [rsi]
    !je .l_fs_search4                     ; equal => continue with search4
    !.l_fs_search3:
    movzx eax, byte [rsi + rbx]           ; 'Sunday' check
    bt rdx, rax
    !jnc .l_fs_search1
    add rsi, [p.v_skip]                   ; 'Horspool' skip
    !jmp .l_fs_search0
   
    !.l_fs_search4:                       ; compare remaining needle bytes
    mov rax, rbx
    sub rax, 3
    !jna .l_fs_search9                    ; no more bytes left => found !
    push rcx                              ; keep a copy of rcx
    lea rcx, [rax + 1]
    shr rcx, 1
    !.l_fs_search5:
    movzx eax, word [rdi + rcx * 2]       ; compare two bytes at a time
    cmp ax, [rsi + rcx * 2]
    !jne .l_fs_search6                    ; not equal => restore rcx and back to search3
    dec rcx
    !jnz .l_fs_search5                    ; no more bytes left ?
    pop rcx                               ; restore rcx
    !jmp .l_fs_search9                    ; => found !
    !.l_fs_search6:
    pop rcx
    !jmp .l_fs_search3
   
    !.l_fs_search7:                       ; code to handle haystack exactly on end boundary
    !ja .l_fs_exit                        ; boundary exceeded => exit
    !.l_fs_search8:                       ; final check
    movzx eax, byte [rdi + rbx - 1]
    cmp al, [rsi + rbx - 1]   
    !jne .l_fs_exit
    dec rbx
    !jnz .l_fs_search8
    !.l_fs_search9:                       ; return result
    mov rax, rsi
    sub rax, [p.p_HayStack]   
    add rax, [p.v_Pos]
   
    ; restore registers & return result
    !.l_fs_return:
    mov rbx, [p.v_reg_bx]
    mov rsi, [p.v_reg_si]
    mov rdi, [p.v_reg_di]
    ProcedureReturn
   
    ; exit not found
    !.l_fs_exit:
    mov rax, -1
    !jmp .l_fs_return
   
  EndProcedure
 
  ; *** End of code based on Python fast search algorithm ***
 
  Procedure SSE2_Find(*HayStack, HayStackSize, *Needle, NeedleSize, Pos=0, Count=#False)
   
    ; backup some registers
    mov [rsp -  8], rbx
    mov [rsp - 16], rsi
    mov [rsp - 24], rdi
   
    ; init count
    mov rax, [p.v_Count]
    sub rax, 1
    sbb rax, rax
    mov [rsp - 40], rax
   
    ; perform some checks
    mov rcx, [p.v_Pos]
    sub [p.v_HayStackSize], rcx
    !jbe .l_sse2_search8            ; exit when HayStackSize <= Pos
    add [p.p_HayStack], rcx   
    mov rax, [p.v_HayStackSize]
    mov rbx, [p.v_NeedleSize]
    sub rax, rbx
    !jc .l_sse2_search8             ; exit if NeedleSize > HaystackSize
    add rax, [p.p_HayStack] 
    mov [rsp - 32], rax             ; rsp - 32 = *SearchEnd
    cmp rbx, 1
    !jl .l_sse2_search8             ; exit if NeedleSize < 1

    ; load first two needle bytes
    !pcmpeqb xmm4, xmm4
    mov rdi, [p.p_Needle]
    movzx eax, byte [rdi]
    !je .l_sse2_search0
    mov ah, [rdi + 1]
    !pslldq xmm4, 15
    !.l_sse2_search0:
    !movd xmm2, eax
    !punpcklbw xmm2, xmm2
    !punpcklwd xmm2, xmm2
    !pshufd xmm3, xmm2, 01010101b   ; xmm3 = 16 times second needle byte
    !pshufd xmm2, xmm2, 0           ; xmm2 = 16 times first needle byte
   
    ; start search
    mov rsi, [p.p_HayStack]
    mov rcx, rsi
    shr rsi, 4                      ; align Haystack to 16 bytes
    shl rsi, 4
    sub rcx, rsi
    M_movdqa(xmm0, [rsi])           ; handle first 16 bytes
    !movdqa xmm1, xmm0
    !pcmpeqb xmm0, xmm2             ; compare against first needle byte
    !pmovmskb eax, xmm0
    !shr eax, cl                    ; shift off unwanted bytes
    !shl eax, cl
    !test eax, eax
    !jnz .l_sse2_search2
   
    ; main search loop
    !.l_sse2_search1:
    add rsi, 16                     ; next 16 bytes
    cmp rsi, [rsp - 32]
    !ja .l_sse2_search8
    M_movdqa(xmm0, [rsi])
    !movdqa xmm1, xmm0
    !pcmpeqb xmm0, xmm2             ; compare against first needle byte
    !pmovmskb eax, xmm0
    !test eax, eax
    !jz .l_sse2_search1             ; no match ? => search1
    !.l_sse2_search2:
    !pcmpeqb xmm1, xmm3             ; compare against second needle byte
    !psrldq xmm1, 1
    !por xmm1, xmm4
    !pmovmskb ecx, xmm1
    !and eax, ecx                   ; combine both searches
    !jz .l_sse2_search1             ; no match ? => search1
   
    ; compare rest of bytes
    !.l_sse2_search3:
    !bsf ecx, eax                   ; get index of first match
    !jz .l_sse2_search1
    !btr eax, ecx
    lea rdx, [rsi + rcx]            ; create a pointer to it
    cmp rdx, [rsp - 32]
    mov rcx, [p.v_NeedleSize]
    !ja .l_sse2_search8
    sub rcx, 2
    !jb .l_sse2_search5             ; NeedleSize < 2 ? => search5 (already done)
   
    !.l_sse2_search4:
    movzx ebx, word [rdx + rcx]     ; compare rest of needle right-to-left
    cmp bx, [rdi + rcx]             ; two bytes at a time
    !jne .l_sse2_search3
    sub rcx, 2
    !jae .l_sse2_search4
   
    !.l_sse2_search5:
    mov rbx, [rsp - 40]
    cmp rbx, -1
    !je .l_sse2_search6
    add rbx, 1                      ; increase count
    mov [rsp - 40], rbx
    !jmp .l_sse2_search3
    !.l_sse2_search6:
    mov rax, rdx                    ; return result
    sub rax, [p.p_HayStack]
    add rax, [p.v_Pos]   
    !.l_sse2_search7:
    mov rbx, [rsp -  8]
    mov rsi, [rsp - 16]
    mov rdi, [rsp - 24] 
    ProcedureReturn
   
    ; not found / return count
    !.l_sse2_search8:
    mov rax, [rsp - 40]
    !jmp .l_sse2_search7
   
  EndProcedure   
 
EndModule

_________________
macOS 10.15 Catalina, PB 5.71 x64


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Tue Jul 24, 2018 10:08 pm 
Offline
Addict
Addict
User avatar

Joined: Fri Sep 21, 2007 5:52 am
Posts: 3408
Location: New Zealand
Thanks Wilbert.


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 25, 2018 8:45 am 
Offline
Addict
Addict
User avatar

Joined: Thu Jun 07, 2007 3:25 pm
Posts: 3704
Location: Berlin, Germany
Many thanks, wilbert! :-)

_________________
Please excuse my flawed English. My native language is PureBasic.
Search
RSBasic's backups


Top
 Profile  
Reply with quote  
 Post subject: Re: FindData module
PostPosted: Wed Jul 25, 2018 9:40 am 
Offline
Addict
Addict
User avatar

Joined: Sun Nov 05, 2006 11:42 pm
Posts: 4528
Location: Lyon - France
Thanks Wilbert 8)

_________________
ImageThe happiness is a road...
Not a destination


Top
 Profile  
Reply with quote  
Display posts from previous:  Sort by  
Post new topic Reply to topic  [ 19 posts ]  Go to page 1, 2  Next

All times are UTC + 1 hour


Who is online

Users browsing this forum: No registered users and 13 guests


You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum

Search for:
Jump to:  

 


Powered by phpBB © 2008 phpBB Group
subSilver+ theme by Canver Software, sponsor Sanal Modifiye