FindData module
Posted: Wed Jul 01, 2015 2:32 pm
				
				Code: Select all
; FindData module by Wilbert
; last update July 28, 2015
DeclareModule FindData
  
  ; all routines return a zero based index or -1 (not found)
  
  Declare TS(*HayStack, HayStackSize, *Needle, NeedleSize.l)          ; Tailed Substring algorithm
  Declare QS(*HayStack, HayStackSize, *Needle, NeedleSize.l)          ; Quick Search algorithm
  Declare BM(*HayStack, HayStackSize, *Needle, NeedleSize.u)          ; Boyer-Moore algorithm
  Declare FastSearch(*HayStack, HayStackSize, *Needle, NeedleSize)    ; Based on the Python fast search algorithm
  Declare SSE2_Find(*Haystack, HaystackSize, *Needle, NeedleSize, Count = #False)
  
EndDeclareModule
Module FindData
  
  EnableASM
  EnableExplicit
  DisableDebugger
  
  CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
    Macro rax : eax : EndMacro
    Macro rbx : ebx : EndMacro   
    Macro rcx : ecx : EndMacro
    Macro rdx : edx : EndMacro
    Macro rsi : esi : EndMacro
    Macro rdi : edi : EndMacro
    Macro rbp : ebp : EndMacro
    Macro rsp : esp : EndMacro
  CompilerEndIf
  
  Macro M_movdqa(arg1, arg2)
    !movdqa arg1, arg2
  EndMacro
  
  
  ; *** Tailed Substring algorithm code ***
  
  Macro M_TS_Phase(phase)
    !finddata.l_ts_#phase#0:
    movzx rax, byte [rsi + rcx] 
    !finddata.l_ts_#phase#1:
    cmp al, [rdi + rcx]
    !je finddata.l_ts_#phase#2
    inc rdi
    cmp rdi, rbp
    !jbe finddata.l_ts_#phase#1
    !jmp finddata.l_ts_exit_notfound
    !finddata.l_ts_#phase#2:
    sub rdx, rdx
    !finddata.l_ts_#phase#3:
    movzx rax, byte [rsi + rdx]
    cmp al, [rdi + rdx]
    !jne finddata.l_ts_#phase#4
    inc rdx
    cmp rdx, rbx
    !jne finddata.l_ts_#phase#3
    mov rax, rdi
    sub rax, [p.p_HayStack]
    !jmp finddata.l_ts_exit_found
    !finddata.l_ts_#phase#4:
    CompilerIf phase = 1
      mov rdx, rcx
      dec rdx
      !js finddata.l_ts_#phase#6
      movzx rax, byte [rsi + rcx]
      !finddata.l_ts_#phase#5:
      cmp al, [rsi + rdx]
      !je finddata.l_ts_#phase#6
      dec rdx
      !jns finddata.l_ts_#phase#5
      !finddata.l_ts_#phase#6:
      mov rax, rcx
      sub rax, rdx
      cmp rax, [rsp - 48]             ; if i-h > dim => {k=i; dim=i-h;}
      !jng finddata.l_ts_#phase#7
      mov [rsp - 40], rcx
      mov [rsp - 48], rax
      !finddata.l_ts_#phase#7:
      add rdi, rcx                    ;  s + i
      sub rdi, rdx                    ;  s - h
      dec rcx      
      cmp rcx, [rsp - 48]
      !jb finddata.l_ts_phase2        ; if i < dim => proceed with phase 2
    CompilerElse
      ; phase 2
      add rdi, [rsp - 48]
    CompilerEndIf
    cmp rdi, rbp
    !jbe finddata.l_ts_#phase#0
    !jmp finddata.l_ts_exit_notfound
    
  EndMacro
  
  Procedure TS(*HayStack, HayStackSize, *Needle, NeedleSize.l)
    
    ; backup some registers
    mov [rsp -  8], rbx
    mov [rsp - 16], rsi
    mov [rsp - 24], rdi
    mov [rsp - 32], rbp
    
    ; init some things
    mov ebx, [p.v_NeedleSize]
    mov ecx, ebx
    dec ecx
    !js finddata.l_ts_exit_notfound   ; exit when NeedleSize < 1
    mov rbp, [p.v_HayStackSize]
    sub rbp, rbx
    !jc finddata.l_ts_exit_notfound   ; exit when NeedleSize > HayStackSize
    mov rsi, [p.p_Needle]
    mov rdi, [p.p_HayStack]
    add rbp, rdi
    mov rax, 1
    mov [rsp - 40], rcx               ; k
    mov [rsp - 48], rax               ; dim
    
    ; search
    M_TS_Phase(1)
    !finddata.l_ts_phase2:
    mov rcx, [rsp - 40]
    M_TS_Phase(2)
    
    ; exit
    !finddata.l_ts_exit_notfound:  
    mov rax, -1
    !finddata.l_ts_exit_found:
    mov rbx, [rsp -  8]
    mov rsi, [rsp - 16]
    mov rdi, [rsp - 24]
    mov rbp, [rsp - 32]
    ProcedureReturn
    
  EndProcedure
  
  ; *** End of Tailed Substring algorithm code ***
  
  
  ; *** Code based on Quick Search / Sunday algorithm ***
  
  Macro M_QS_Search(n = 0)
    !finddata.l_qs_search0#n:
    !xor ecx, ecx
    !finddata.l_qs_search1#n:
    movzx eax, byte [rsi + rcx]
    cmp al, [rdi + rcx]
    !je finddata.l_qs_continue#n
    CompilerIf n = 0
      movzx eax, byte [rdi + rbx]
      mov eax, [rsp + rax * 4 - 1024]
      add rdi, rax
      cmp rdi, rdx
      !jb finddata.l_qs_search0#n
      !je finddata.l_qs_finalcheck
    CompilerEndIf
    !jmp finddata.l_qs_exit_notfound
    !finddata.l_qs_continue#n:
    !inc ecx
    !cmp ecx, ebx
    !jb finddata.l_qs_search1#n
    mov rax, rdi
    sub rax, [p.p_HayStack]
    !jmp finddata.l_qs_exit_found      
  EndMacro
  
  Procedure QS(*HayStack, HayStackSize, *Needle, NeedleSize.l)
    
    ; backup some registers
    mov [rsp - 1032], rbx
    mov [rsp - 1040], rsi
    mov [rsp - 1048], rdi
    
    ; perform some checks
    mov ebx, [p.v_NeedleSize]
    cmp ebx, 1
    !jl finddata.l_qs_exit_notfound
    mov rdx, [p.v_HayStackSize]
    sub rdx, rbx
    !jc finddata.l_qs_exit_notfound   ; exit when NeedleSize > HayStackSize
    
    ; prepare
    lea rdi, [rsp - 1024]
    lea eax, [ebx + 1]
    mov ecx, 256
    cld
    rep stosd
    mov rsi, [p.p_Needle]
    mov rdx, rsi
    mov ecx, ebx
    !finddata.l_qs_prep_table:
    movzx eax, byte [rdx]
    mov [rsp + rax * 4 - 1024], ecx
    inc rdx
    dec ecx
    !jnz finddata.l_qs_prep_table
    
    ; search
    mov rdi, [p.p_HayStack]
    mov rdx, [p.v_HayStackSize]
    sub rdx, rbx
    !jz finddata.l_qs_finalcheck      ; jump to finalcheck if NeedleSize = HayStackSize
    add rdx, rdi
    M_QS_Search(0)
    !finddata.l_qs_finalcheck:
    M_QS_Search(1)
    
    ; exit
    !finddata.l_qs_exit_notfound:  
    mov rax, -1
    !finddata.l_qs_exit_found:
    mov rbx, [rsp - 1032]
    mov rsi, [rsp - 1040]
    mov rdi, [rsp - 1048]
    ProcedureReturn
    
  EndProcedure
  
  ; *** End of code based on Quick Search / Sunday algorithm ***
  
  
  ; *** Code based on Boyer-Moore algorithm ***
  
  Macro M_BM(n=0)
    CompilerIf n=0
      add rsi, rcx      
    CompilerElse
      add rsi, 1
    CompilerEndIf
    mov ecx, ebx
    cmp rsi, rdx
    !jna finddata.l_bm_search1
    !jmp finddata.l_bm_exit
  EndMacro
  
  Procedure BM(*HayStack, HayStackSize, *Needle, NeedleSize.u)
    
    ; Boyer-Moore algorithm
    ; based on code from 'schic'
    ; http://www.purebasic.fr/english/viewtopic.php?p=130032#p130032
    
    ; backup some registers
    mov [rsp - 520], rbx
    mov [rsp - 528], rsi
    mov [rsp - 536], rdi
    
    ; perform some checks
    movzx ebx, word [p.v_NeedleSize]   ; exit if NeedleSize = 0
    cmp ebx, 0
    !je finddata.l_bm_exit
    mov rdx, [p.v_HayStackSize]        ; exit if NeedleSize > HayStackSize
    sub rdx, rbx
    !jc finddata.l_bm_exit
    
    ; prepare
    lea rdi, [rsp - 512]
    mov eax, 0xff
    mov ecx, 512
    cld
    rep stosb
    mov rdi, [p.p_Needle]
    mov ecx, ebx
    mov rsi, rdi
    !finddata.l_bm_table0:
    movzx eax, byte [rsi]
    add rsi, 1
    sub ecx, 1
    mov [rsp + rax * 2 - 512], cx
    !jnz finddata.l_bm_table0
    mov rsi, [p.p_HayStack]
    add rdx, rsi
    
    ; search
    mov ecx, ebx
    
    !finddata.l_bm_search1:
    movzx eax, byte [rsi + rcx - 1]
    cmp al, [rdi + rcx - 1]
    !je finddata.l_bm_search4
    
    movzx eax, word [rsp + rax * 2 - 512]
    cmp ax, 0xffff
    !jne finddata.l_bm_search2
    M_BM()
    
    !finddata.l_bm_search2:
    add ecx, eax
    sub ecx, ebx
    !jna finddata.l_bm_search3
    M_BM()
    
    !finddata.l_bm_search3:
    M_BM(1)
    
    !finddata.l_bm_search4:
    sub ecx, 1
    !jnz finddata.l_bm_search1
    
    mov rax, rsi
    sub rax, *HayStack
    !finddata.l_bm_return:
    mov rbx, [rsp - 520]
    mov rsi, [rsp - 528]
    mov rdi, [rsp - 536]
    ProcedureReturn
    
    ; exit not found
    !finddata.l_bm_exit:
    mov rax, -1
    !jmp finddata.l_bm_return
    
  EndProcedure
  
  ; *** End of code based on Boyer-Moore algorithm ***
  
  ; *** Code based on Python fast search algorithm ***
  
  Procedure FastSearch(*HayStack, HayStackSize, *Needle, NeedleSize)
    
    ; Based on the Python fast search algorithm (Python License)
    
    Protected.i reg_bx, reg_si, reg_di
    Protected.i hs_end, skip
    
    ; backup registers
    mov [p.v_reg_bx], rbx
    mov [p.v_reg_si], rsi
    mov [p.v_reg_di], rdi
    
    ; perform some checks
    mov rbx, [p.v_NeedleSize]
    mov rcx, [p.v_HayStackSize]
    cmp rbx, rcx
    !jg finddata.l_fs_exit                ; exit when NeedleSize > HayStackSize
    cmp rbx, 1
    !jg finddata.l_fs_prep
    !jne finddata.l_fs_exit               ; exit when NeedleSize < 1
    
    ; handle needle size 1
    mov rsi, [p.p_HayStack]    
    mov rdi, [p.p_Needle]
    mov rax, rcx
    mov bl, [rdi]
    !finddata.l_fs_sns0:
    cmp bl, [rsi]
    !je finddata.l_fs_sns1
    inc rsi
    dec rcx
    !jnz finddata.l_fs_sns0
    !jmp finddata.l_fs_exit
    !finddata.l_fs_sns1:
    sub rax, rcx
    !jmp finddata.l_fs_return
    
    ; prepare (skip and register rdx mask)
    !finddata.l_fs_prep:
    mov rdi, [p.p_Needle]
    lea rsi, [rbx - 2]
    mov [p.v_skip], rsi
    mov rdx, 0
    movzx ecx, byte [rdi + rsi + 1]
    bts rdx, rcx
    !finddata.l_fs_prep0:
    movzx eax, byte [rdi + rsi]
    bts rdx, rax
    sub rsi, 1
    !js finddata.l_fs_prep2
    cmp eax, ecx
    !jne finddata.l_fs_prep0
    lea rax, [rsi + 1]
    sub [p.v_skip], rax
    !finddata.l_fs_prep1:
    movzx eax, byte [rdi + rsi]
    bts rdx, rax
    sub rsi, 1
    !jns finddata.l_fs_prep1
    !finddata.l_fs_prep2:
    
    ; search
    mov rax, [p.v_HayStackSize]
    sub rax, rbx
    mov rsi, [p.p_HayStack]
    add rax, rsi
    mov [p.v_hs_end], rax
    movzx ecx, byte [rdi + rbx - 1]
    dec rsi
    
    !finddata.l_fs_search0:
    inc rsi
    cmp rsi, [p.v_hs_end]                 ; check for haystack end boundary
    !jae finddata.l_fs_search7
    cmp cl, [rsi + rbx - 1]               ; compare last needle byte
    !je finddata.l_fs_search2             ; equal => continue with search2
    movzx eax, byte [rsi + rbx]
    bt rdx, rax                           ; 'Sunday' check
    !jc finddata.l_fs_search0
    !finddata.l_fs_search1:
    add rsi, rbx
    !jmp finddata.l_fs_search0
    
    !finddata.l_fs_search2:               ; compare first two needle bytes
    movzx eax, word [rdi]
    cmp ax, [rsi]
    !je finddata.l_fs_search4             ; equal => continue with search4
    !finddata.l_fs_search3:
    movzx eax, byte [rsi + rbx]           ; 'Sunday' check
    bt rdx, rax
    !jnc finddata.l_fs_search1
    add rsi, [p.v_skip]                   ; 'Horspool' skip
    !jmp finddata.l_fs_search0
    
    !finddata.l_fs_search4:               ; compare remaining needle bytes
    mov rax, rbx
    sub rax, 3
    !jna finddata.l_fs_search9            ; no more bytes left => found !
    push rcx                              ; keep a copy of rcx
    lea rcx, [rax + 1]
    shr rcx, 1
    !finddata.l_fs_search5:
    movzx eax, word [rdi + rcx * 2]       ; compare two bytes at a time
    cmp ax, [rsi + rcx * 2]
    !jne finddata.l_fs_search6            ; not equal => restore rcx and back to search3
    dec rcx
    !jnz finddata.l_fs_search5            ; no more bytes left ?
    pop rcx                               ; restore rcx
    !jmp finddata.l_fs_search9            ; => found !
    !finddata.l_fs_search6:
    pop rcx
    !jmp finddata.l_fs_search3
    
    !finddata.l_fs_search7:               ; code to handle haystack exactly on end boundary
    !ja finddata.l_fs_exit                ; boundary exceeded => exit
    !finddata.l_fs_search8:               ; final check
    movzx eax, byte [rdi + rbx - 1]
    cmp al, [rsi + rbx - 1]    
    !jne finddata.l_fs_exit
    dec rbx
    !jnz finddata.l_fs_search8
    !finddata.l_fs_search9:               ; return result
    mov rax, rsi
    sub rax, [p.p_HayStack]    
    
    ; restore registers & return result
    !finddata.l_fs_return:
    mov rbx, [p.v_reg_bx]
    mov rsi, [p.v_reg_si]
    mov rdi, [p.v_reg_di]
    ProcedureReturn
    
    ; exit not found
    !finddata.l_fs_exit:
    mov rax, -1
    !jmp finddata.l_fs_return
    
  EndProcedure
  
  ; *** End of code based on Python fast search algorithm ***
  
  Procedure SSE2_Find(*Haystack, HaystackSize, *Needle, NeedleSize, Count = #False)
    
    ; backup some registers
    mov [rsp -  8], rbx
    mov [rsp - 16], rsi
    mov [rsp - 24], rdi
    
    ; init count
    mov rax, [p.v_Count]
    sub rax, 1
    sbb rax, rax
    mov [rsp - 40], rax
    
    ; perform some checks
    mov rax, [p.v_HaystackSize]
    mov rbx, [p.v_NeedleSize]
    sub rax, rbx
    !jc finddata.l_sse2_search8     ; exit if NeedleSize > HaystackSize
    add rax, [p.p_Haystack]  
    mov [rsp - 32], rax             ; rsp - 32 = *SearchEnd
    cmp rbx, 1
    !jl finddata.l_sse2_search8     ; exit if NeedleSize < 1
    ; load first two needle bytes
    !pcmpeqb xmm4, xmm4
    mov rdi, [p.p_Needle]
    movzx eax, byte [rdi]
    !je finddata.l_sse2_search0
    mov ah, [rdi + 1]
    !pslldq xmm4, 15
    !finddata.l_sse2_search0:
    !movd xmm2, eax
    !punpcklbw xmm2, xmm2
    !punpcklwd xmm2, xmm2
    !pshufd xmm3, xmm2, 01010101b   ; xmm3 = 16 times second needle byte
    !pshufd xmm2, xmm2, 0           ; xmm2 = 16 times first needle byte
    
    ; start search
    mov rsi, [p.p_Haystack]
    mov rcx, rsi
    shr rsi, 4                      ; align Haystack to 16 bytes
    shl rsi, 4
    sub rcx, rsi
    M_movdqa(xmm0, [rsi])           ; handle first 16 bytes
    !movdqa xmm1, xmm0
    !pcmpeqb xmm0, xmm2             ; compare against first needle byte
    !pmovmskb eax, xmm0
    !shr eax, cl                    ; shift off unwanted bytes
    !shl eax, cl
    !test eax, eax
    !jnz finddata.l_sse2_search2
    
    ; main search loop
    !finddata.l_sse2_search1:
    add rsi, 16                     ; next 16 bytes
    cmp rsi, [rsp - 32]
    !ja finddata.l_sse2_search8
    M_movdqa(xmm0, [rsi])
    !movdqa xmm1, xmm0
    !pcmpeqb xmm0, xmm2             ; compare against first needle byte
    !pmovmskb eax, xmm0
    !test eax, eax
    !jz finddata.l_sse2_search1     ; no match ? => search1
    !finddata.l_sse2_search2:
    !pcmpeqb xmm1, xmm3             ; compare against second needle byte
    !psrldq xmm1, 1
    !por xmm1, xmm4
    !pmovmskb ecx, xmm1
    !and eax, ecx                   ; combine both searches
    !jz finddata.l_sse2_search1     ; no match ? => search1
    
    ; compare rest of bytes
    !finddata.l_sse2_search3:
    !bsf ecx, eax                   ; get index of first match
    !jz finddata.l_sse2_search1
    !btr eax, ecx
    lea rdx, [rsi + rcx]            ; create a pointer to it
    cmp rdx, [rsp - 32]
    mov rcx, [p.v_NeedleSize]
    !ja finddata.l_sse2_search8
    sub rcx, 2
    !jb finddata.l_sse2_search5     ; NeedleSize < 2 ? => search5 (already done) 
    
    !finddata.l_sse2_search4:
    movzx ebx, word [rdx + rcx]     ; compare rest of needle right-to-left
    cmp bx, [rdi + rcx]             ; two bytes at a time
    !jne finddata.l_sse2_search3
    sub rcx, 2
    !jae finddata.l_sse2_search4
    
    !finddata.l_sse2_search5:
    mov rbx, [rsp - 40]
    cmp rbx, -1
    !je finddata.l_sse2_search6
    add rbx, 1                      ; increase count
    mov [rsp - 40], rbx
    !jmp finddata.l_sse2_search3
    !finddata.l_sse2_search6:
    mov rax, rdx                    ; return result
    sub rax, [p.p_Haystack]
    !finddata.l_sse2_search7:
    mov rbx, [rsp -  8]
    mov rsi, [rsp - 16]
    mov rdi, [rsp - 24]  
    ProcedureReturn
    
    ; not found / return count
    !finddata.l_sse2_search8:
    mov rax, [rsp - 40]
    !jmp finddata.l_sse2_search7
    
  EndProcedure   
  
EndModule