and done some own on base of that.
Maybe an interesting suggestion to this topic.
Code: Select all
Global GbTmp.l 
Global tmpcount
Structure MemoryArray
  Char.c[0]
EndStructure
Procedure.l Quicksearch(StartPos.l, *MainMem, MainLen.l, *FindMem, FindLen.l)
  ; Quicksearch Boyer-Moore Algorithm im Basic from Paul Dwyer
  
  *MainByteArray.MemoryArray = *MainMem
  *FindByteArray.MemoryArray = *FindMem
  
  ; Build BadChr Array
  Dim BadChar.l(255)
  
  ; set all alphabet to max shift pos (length of find string plus 1)
  For i = 0 To 255
    BadChar(i)  =  FindLen + 1
  Next
  
  ;Update chars that are in the find string to their position from the end.
  For i = 0 To FindLen -1
    BadChar(*FindByteArray\Char[i]) = FindLen - i   
  Next     
  
  MainArrayLoop.l = StartPos
  EndSearchPos.l = MainLen - FindLen
  
  While MainArrayLoop <= EndSearchPos
    
    If CompareMemory(*MainMem + MainArrayLoop, *FindMem, FindLen) = 1
      FoundPos = MainArrayLoop + 1
      Break
    EndIf
    
    ;Didn't find the string so shift as per the table.
    MainArrayLoop + BadChar(*MainByteArray.MemoryArray\Char[MainArrayLoop + FindLen])
    
  Wend
  
  ProcedureReturn FoundPos
  
EndProcedure
Procedure.l MyFind( *Source.l,SrcLen, *Pattern.l,PatLen, StartPos.l)
  
  ;
  ; MyFind routine, who needs Bayer & Moore???
  ;
  ; This uses two tricks I discovered some 12 odd years ago.
  ; First assumption: Find out the length of BOTH strings.
  ; Second test all strings through the instructions "REPNE SCASB" for the length of a string (NULL ending).
  ; Compare strings using the instruction "REPE   CMPSB"
  ; Third test the first character, if found test the LAST character in the string to search,
  ;   If a match TEST BOTH strings,
  ;   if NO match continue searching where we found the last character.
  ; Pro's:
  ;   1) We do ! Not compare ALL found strings.
  ;   2) Easy to implement
  ;   3) No intensive comparing of found first string match in de string.
  ;
  ; Con's: We have to do a liniar search, no 'skipping and sliding' like in Bayer-Moore...
  ;
  ; NEEDS to run as a EXE ! Not from the IDE, or we get false results!!!
  ;
  ! Xor      Eax, Eax      ; EAX = 0
  ! Xor    Ecx, Ecx      ; ECX = 0
  ! Not    Ecx            ; ECX = -1, or 4,294,967,295 max bytes to search, bloody BIG number!!
  ;
  MOV      Edi, *Pattern   ; Adress of the patern in EDI..
  CLD                  ; Looking UP (to the end if the string)..
  ; REPNE   scasb         ; Find the NULL byte..
  ; ! Not      Ecx            ; ECX is strlen (but in minus and +2), invers and included is a -1..
  ; DEC      Ecx            ; ECX -1, ECX now contains the length of the Pattern string..
  MOV Ecx, PatLen
  ; ECX now has the length of the pattern string, test if it is longer than zero. Otherwise we DO have ! Nothing to search for..
  ;
  CMP      Ecx, 1
  JG      label1
  MOV      Eax, 0         ; Return = ! Not FOUND..
  JMP      labelExit
  ;   
  ! label1:
  ; First leg is done, we know length of the search pattern. Keep it in EBX for reference..
  MOV      Ebx, Ecx
  ;
  ; Find the length of the source string, to prevent we search ALL memory (OOPS!)...
  SUB    Ecx, Ecx      ; ECX = 0
  ! Not      Ecx            ; ECX = -1, or 4,294,967,295
  SUB      al, al         ; ZERO AL or AL = 0 the byte we look for..
  MOV      Edi, *Source   ; Adress in EDI..
  CLD                  ; Looking UP (to the end if the string)..
  ; REPNE   scasb         ; Find the NULL byte..
  ; ! Not      Ecx            ; ECX is strlen (but in minus and +2), invers and included a -1..
  ; DEC      Ecx            ; ECX -1, ECX now contains the length of the Source string..
  MOV Ecx, SrcLen
  ; Test if the source is a NULL string?
  CMP      Ecx, 1
  JG      label2
  MOV      Eax, 0         ; Return = ! Not FOUND..
  JMP      labelExit
  ;   
  
  ! label2 :
  MOV      Edx, Ecx      ; Keep a record for the length later on.
  ;
  ; Load into AH the LAST! Character of the search pattern...
  MOV      Esi, *Pattern   ; Adress of the patern in ESI..
  MOV      AH, [Esi+Ebx-1]     ; Now we have the  LAST  char of the search string..
  MOV      al, [Esi]        ; Here we have the FIRST char of the search string..
  ;
  MOV      Edi, *Source   ; Adress of the Source in EDI..
  ! label3:
  CLD                  ; Looking UP (to the end if the Source string)..
  REPNE   scasb         ; Find the FIRST character of the search string (a byte)..
  JECXZ   labelZeroExit   ; ECX is ZERO so ! Not found in the Source, jump exit..
  JMP      labelCont      ; Continue, jump..
  ;
  ! labelZeroExit:
  MOV      Eax, 0         ; ! Not found..
  ProcedureReturn
  
  ! labelCont:
  ;
  ; We have a HIT!!
  ; Look if the LAST char of the Pattern string (which is in AH) is the same as the in the Source string on the same position!!
  ; If so we have a real hit if ! Not, go back and search further..
  CMP      AH, [Edi+Ebx-2]   ; the trick of storing the length of the Pattern string. The -2 is needed because EDI shoots one further AFTER a REPNE..
  JNE      label3         ; IF ! Not jump and search...
  ;
  ; We have the FIRST and LAST position of the Patern string found in the Source string,
  ;   now compare the rest of the characters (from the start+1). Keep the start in the source string AND the count..
  PUSH   Edi            ; save these we need them if we DO ! Not have a match!!
  PUSH   Ecx
  ;
  DEC      Edi            ; Correct the Source pointer..
  MOV      Ecx, Ebx      ; The length of the Pattern string we want to find ! Not of the source string!!..
  CLD
  REPE   cmpsb         ; Compare the two strings (REPE = REPeat while Equal)..
  ;
  POP      Ecx            ; Restore these in case we do ! Not have a match..
  POP      Edi
  ;
  JE      Label4         ; Jump if EQUAL (strings)..
  ;
  ; So ! Not equal ! Not found!! Search further..
  MOV      Esi, *Pattern   ; Reload the adress of the Pattern in ESI..
  JMP      label3
  
  ! Label4:
  ;
  ; YES!!! We have found the string!!! And where is in ECX (coded)..
  MOV      Eax, Edx      ; retrieve the length of the Source string..
  SUB      Eax, Ecx      ; Now we have the length, ready!! Result = EAX register!!
  
  ProcedureReturn
  ; JMP      labelExit
  
  ! labelExit:
  MOV      Eax, 0         ; Error and/or ! Not found..
  ProcedureReturn
EndProcedure
Procedure.l inMemStr5(StartPos.l,*Source.l,SrcLen.l,*Pattern.l,PatLen.l)
  
  ; Boyer-Moore-Algorithmus in Flat-Assembler
  ; von schic nach Ausführungen der Uni Flensburg 
  ; http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/bm2.htm
  ; siehe auch Darstellung von Strother Moore auf 
  ; http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
  
  If StartPos >=SrcLen Or *Source = 0 Or SrcLen < PatLen Or SrcLen < 2 Or *Pattern < 1
    ProcedureReturn -1
  EndIf
  
  result.l
  PMax.l
  PMore.l
  
  shiftTable.l = AllocateMemory($400)
  
  MOV Ebx, PatLen
  
  CMP Ebx,    1
  JG  _lbl1
  MOV result, -1            ; Muster muss mind. 2 Zeichen sein
  JMP _lblExit
  
  ! _lbl1:
  MOV Esi,    *Source
  ADD Esi,    SrcLen
  SUB Esi,    Ebx
  MOV Edx,    Esi           ; setze Ausstiegslänge auf Source-Ende - Musterlänge
  
  ; ---------------------------------------------------------------------
  ; Tabelle füllen, mit Position für Zeichen die in Findstring vorkommen
  ; ---------------------------------------------------------------------
  MOV Ecx,          Ebx             ; Muster-Länge in ECX
  MOV Esi,          *Pattern        ; Adresse von Muster in ESI
  MOV Edi,          shiftTable      ; Adresse von Shift-Tabelle in Edi
  
  ! Xor Eax,          Eax           ; Eax leeren, da nur al gefüllt, aber EAX zum Weiterarbeiten verwendet wird
  ! _lblFillTable:
  MOV al,           [Esi]           ; Zeichen holen
  INC Esi                           ; Zähler für Muster Eins hoch
  MOV [Edi+Eax*4],  Ecx             ; shift-Wert für Zeichen an ASCII-Stelle in Tabelle
  DEC Ecx                           ; shift-Wert um Eins runter
  JNS _lblFillTable                ; wend while Ecx > 0
  
  ; -------------------------------------------
  ; setup für Schleife "While t<=TMax And p>=0"
  ; -------------------------------------------
  MOV PMax,         Ebx
  DEC PMax                          ; PMax=Len(pattern)-1
  MOV PMore,        Ebx             ; PMore=PMax+1
  MOV Ebx,          shiftTable      ; Zeiger auf shiftTabelle
  MOV Esi,          *Source         ; esi = *Source
  ADD Esi,          StartPos        ; + Startposition
  MOV Edi,          *Pattern        ; Edi = *pattern
  
  ; ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  ! _lblWhileTMax:
  CMP Edx,          Esi
  JL _lblExit                      ; While t<=TMax
  
  MOV Ecx,          PMax            ; Zähler zurücksetzen p=PMax
  ! Xor Eax,          Eax             ; Eax leeren, da nur al gefüllt, aber Eax als long zum Weiterarbeiten verwendet wird
  ! _lblWhile_p_greater_0:         ; zwischen-Einsprung, da Source-Zeiger nicht verändert, kann obiger Vergleich gespart werden
  MOV al,           [Esi+Ecx]       ; Zeichen von Source nach al
  ; CMP al, 65 
  ; JL doneCase
  ; CMP al, 90 
  ; JG doneCase
  ; ADD al, 32
  ; ! doneCase:
  CMP al,           [Edi+Ecx]       ; mit Zeichen in Muster vergleichen (If PeekB(t+p)<>PeekB(@pattern+p))
  JE _lblElse                      ; ein Zeichen ist gleich -> weitere vergleichen
  
  MOV Eax,          [Ebx+Eax*4]     ; shift-Wert für source-Zeichen, von shift-Tabelle laden
  CMP Eax,          0               ; wenn kein Wert (Zeichen kommt nicht im Muster vor)
  JNZ _lblCalcShift                  ; berechne Verschiebung
  
  ADD Esi,          Ecx
  INC Esi                           ; t=t+p+1
  JMP _lblWhileTMax                ; Wend
  
  ! _lblCalcShift:
  ADD Eax,          Ecx
  SUB Eax,          PMore           ; tmpSkip=tmpSkip+p-PMore
  JNL _lblAdd_Shift                  ; wenn Verschiebung < 0
  
  MOV Eax,          1               ; kleinste Verschiebung ist 1
  
  ! _lblAdd_Shift:
  ADD Esi,          Eax             ; t=t+tmpSkip
  JMP _lblWhileTMax                ; Wend
  
  ! _lblElse:
  DEC Ecx                           ; p-1
  JNS _lblWhile_p_greater_0        ; Wend While p>=0
  ; ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
  
  SUB Esi,          *Source 
  INC Esi                           ; t+1-@text
  MOV result,       Esi
  
  ! _lblExit:
  FreeMemory(shiftTable)
  
  ProcedureReturn result
EndProcedure
Procedure.l inMemStr2(*SrcMem, memSizeSrc, *Pattern, memSizePattern, StartPos)
  
  ; memSizePattern = MemoryStringLength(*Pattern)                                     
  ; memSizeSrc = MemoryStringLength(*SrcMem)
  
  result.l
  
  MOV Esi,*Pattern          
  MOV Edx,memSizePattern
  
  ! Or Edx,Edx
  JZ l_fertig              ;Länge *Pattern = 0
  MOV Edi,*SrcMem
  ADD Edi,StartPos
  DEC Edi
  MOV Ecx,memSizeSrc
  SUB Ecx,StartPos
  ADD Ecx,2
  SUB Ecx,Edx
  JB l_fertig              ;memSizePattern > memSizeSrc     
  
  CLD                      ;clear direction flag -> DF=0
  !l_search:
  LODSB                    ;= mov al, [esi]: if DF=0 esi+1 (else esi-1)
  ; first char of pattern to al
  
  REPNZ scasb              ;Al - [edi]:edi+1:ecx-1 and repeat while not null or ecx>0
  ; compare first char of pattern with source until it matches or ecx is counted down
  
  JNZ l_fertig             ;Al - [edi]<>0 but ecx=0 (end of SrcMem reached)
  
  MOV Eax,Edi
  MOV Ebx,Ecx
  MOV Ecx,Edx
  DEC Ecx                  ;
  REPZ cmpsb               ; [esi] - [edi]: (if DF=0) esi+1: edi+1: ecx-1 and repeat while null or ecx>0
  JZ l_found               ; [esi] - [edi] was 0 and ecx is 0 -> whole pattern matched
    
  ;else ecx is 0 but [esi]-[edi] <> 0
  MOV Edi,Eax
  MOV Ecx,Ebx
  MOV Esi,*Pattern
  JMP l_search
  
  !l_found:        
  SUB Eax,*SrcMem
  MOV result,Eax
  
  !l_fertig:
  
  ProcedureReturn result 
EndProcedure 
Procedure BMBinSearch (StartPos.l,lpSource.l,srcLngth.l,lpSubStr.l,subLngth.l)
  ; Boyer Moore Exact pattern Matching Algorithms
  ; ASM-Code from
  ; Steve Hutchesson
  ; transfered to (PB-)FASM with slight modifications from schic
  
  ;LOCAL cval   :DWORD
  cVal.l
  ;LOCAL shift_table[256]:DWORD
  shift_table = AllocateMemory($400)
  result.l
  
  MOV Ebx, subLngth
  
  CMP Ebx, 1
  JG jump1
  MOV result, -2                 ; string too short, must be > 1
  JMP Cleanup
  ! jump1:
  MOV Esi, lpSource
  ADD Esi, srcLngth
  SUB Esi, Ebx
  MOV Edx, Esi            ; set Exit Length: edx = pointer to end of source - srcLngth
  
  ; ----------------------------------------
  ; load shift table with value in subLngth
  ; ----------------------------------------
  MOV Ecx, 256
  MOV Eax, Ebx
  MOV Edi, shift_table
  REP stosd
  
  ; ----------------------------------------------
  ; load decending count values into shift table
  ; ----------------------------------------------
  MOV Ecx, Ebx                ; SubString length in ECX
  DEC Ecx                     ; correct for zero based index
  MOV Esi, lpSubStr           ; address of SubString in ESI
  MOV Edi, shift_table
  
  ! Xor Eax, Eax
  
  ! Write_Shift_Chars:
  MOV al, [Esi]               ; get the character
  INC Esi
  MOV [Edi+Eax*4], Ecx        ; write shift for each character
  DEC Ecx                     ; to ascii location in table
  JNZ Write_Shift_Chars
  
  ; -----------------------------
  ; set up for main compare loop
  ; -----------------------------
  MOV Ecx, Ebx          ;Ecx = subLngth
  DEC Ecx               ;Ecx - 1
  MOV cVal, Ecx         ;cval = subLngth -1
  
  MOV Esi, lpSource     ;esi = lpSource
  MOV Edi, lpSubStr     ;Edi = lpSubStr
  ADD Esi, StartPos           ; add starting position
  JMP Pre_Loop
  ; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  
  ! Calc_Suffix_Shift:    ;source-char is in SubStr
  ADD Eax, Ecx            ;add counter in SubStr to value of shift-table
  SUB Eax, cVal               ; sub loop count (cval = subLngth -1)
  JNS Add_Suffix_Shift
  MOV Eax, 1                  ; minimum shift is 1
  
  ! Add_Suffix_Shift:
  ADD Esi, Eax                ; add SUFFIX shift
  MOV Ecx, cVal               ; reset counter in compare loop
  
  ! Test_Length:
  CMP Edx, Esi                ; test exit condition: esi <= pointer to end of source - srcLngth
  JL No_Match            ;if esi > edx (pointer to end of source) then search is ending -> no match
  
  ! Pre_Loop:
  ! Xor Eax, Eax                ; zero EAX for following partial writes
  MOV al, [Esi+Ecx]      ;char from lpSource to al
  
  CMP al, [Edi+Ecx]      ;compare characters from lpSource (esi) to char from lpSubStr (Edi)
  JE Jump2
  ;MOV eax, shift_table[eax*4]
  SHL   Eax,      2h     ;if lpSource (esi) = char from lpSubStr clc shift-value
  ADD Eax,shift_table
  MOV Eax, [Eax]         ;load shift value for source-char from shift-table
  CMP Ebx, Eax           ;compare shift value with subLngth
  JNE Calc_Suffix_Shift;Add_Suffix_Shift   ;if source-char not in SubStr -> bypass SUFFIX calculations
  LEA Esi, [Esi+Ecx+1]   ;if source-char is in SubStr -> add BAD CHAR shift (esi=lpSource+lpSubStr+1)
  JMP Test_Length
  ! Jump2:
  DEC Ecx
  ! Xor Eax, Eax                ; zero EAX for following partial writes
  
  ! Cmp_Loop:
  MOV al, [Esi+Ecx]
  CMP al, [Edi+Ecx]           ; cmp characters in ESI / EDI
  JNE Set_Shift               ; if not equal, get next shift
  DEC Ecx
  JNS Cmp_Loop
  JMP match                   ; fall through on match
  
  ! Set_Shift:
  ;MOV eax, shift_table[eax*4]
  SHL   Eax,      2h 
  ADD Eax,shift_table
  MOV Eax, [Eax]
  CMP Ebx, Eax
  JNE Calc_Suffix_Shift       ;if source-char <> subLngth -> source-char is in SubStr -> run SUFFIX calculations
  LEA Esi, [Esi+Ecx+1]        ; add BAD CHAR shift
  JMP Test_Length
  
  ; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  
  ! match:
  SUB Esi, lpSource           ; sub source from ESI
  ;MOV eax, esi               ; put length in eax
  INC Esi
  MOV result, Esi
  
  JMP Cleanup
  
  ! No_Match:
  MOV Eax, -1
  
  ! Cleanup:
  FreeMemory(shift_table)
  
  ProcedureReturn result 
EndProcedure
Procedure.l inMemStr(StartPos, *Source, memSizeSrc, strStr, memSizePattern)
  
  ; naiver Algorithmus überprüft ein gesuchtes Muster an 
  ; allen Positionen i eines Speicherbereichs
  ; Ende des Speicherbereiches muß null-terminiert sein
  
  result.l
  
  ;init pointers etc.
  MOV Ebx,strStr    ; Ebx = Pointer to first Char in string
  MOV Ecx,*Source   ; Ecx = Pointer to akt. Char in source
  MOV Edx, Ecx
  ADD Edx, memSizeSrc
  ADD Ecx,StartPos  ; set source-pointer to startposition
  DEC Ecx      ; StartPos - 1
  
  ! rpt_Src:        ; startpoint for loop scanning through the source
  MOV al,byte[Ecx]
  INC Ecx           ; Ecx + 1
  CMP Edx,Ecx          ; if null (end of source-string)
  JS endProc        ; -> end Procedure, Result=0
  CMP al, 65 
  JS endCase1
  CMP al, 90 
  JNS endCase1
  ADD al, 32
  ! endCase1:
  CMP byte[Ebx],al  ; if found first Char of strStr look for the rest
  JNE rpt_Src       ; else go on with next Char in source
  
  ;found the first Char of strStr in source
  ;now look if the rest does match
  ! Xor Esi,Esi             ; esi = 0, esi = Pointer to akt. Char in strStr
  ! rpt_Str:              ; startpoint for loop pounding the string
  MOV Edi, Ecx
  CMP byte[Ebx+Esi],0     ; if 0 (end of string) 
  JE got_it               ; -> got it, all Chars of the string did match
  ADD Edi, Esi
  CMP Edx,Edi               ; if null then end of source 
  JS endProc              ; -> end Procedure, Result=0
  INC Esi                 ; esi + 1 (the first Char in string is already found)
  MOV al,byte[Edi]        ; move actual Char in source to accumulator, have 
  CMP al, 65 
  JS endCase
  CMP al, 90 
  JNS endCase
  ADD al, 32
  ! endCase:
  CMP byte[Ebx+Esi],al    ; if actual Char in source (Ebx+esi) = act Char in string (al)
  JE rpt_Str              ; then take next Char
  JMP rpt_Src             ; and go on with scanning source
  
  ! got_it:
  ;Result = Ecx - Source, to get the place in the source-string
  SUB Ecx,*Source
  MOV result,Ecx
  
  ! endProc:
  ProcedureReturn result
EndProcedure
Macro FindReport()
  PrintN ("pattern found at position: " + Str(a) + " in " + Str(timeGetTime_()-StartTime ) + " milliseconds")
  PrintN (" ")
EndMacro
Text.s="axbbaaxbbaaxbbabbal axbbaaxbbaaxbbabbal axbbaaxbbaaxbbabbalaxbbaaxbbaaxbbabbal This is any text of any number of characters with no content"
       ;123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789
       ;  0        10          20        30         40      50      60          70        80        90        100       110       120         130     140         150     160
findstr.s="gxbba" ; "gxbba" very adverse for Boyer-Moore algorithm with the "axbba..."-text
;findstr.s= "anypattern" 
OpenConsole()
  
For i = 1 To 8
  Text=Text+Text
Next
Text=Text+findstr
  
loopcount = 1500
PrintN ("Quicksearch Boyer-Moore Algorithm im Basic from Paul Dwyer")
lentxt=Len(Text) 
lenfind=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
  a=Quicksearch(1, @Text,lentxt,@findstr,lenfind)
Next i
FindReport()
PrintN ("MyFind ASM, from Jan Vooijs")
lentxt=Len(Text) 
lenfind=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
  a=MyFind(@Text,lentxt,@findstr,lenfind,1)
Next i
FindReport()
PrintN ("PB FindString")
StartTime = timeGetTime_()
For i = 1 To loopcount
  a=FindString(Text, findstr, 1)
Next i
FindReport()
PrintN ("inMemStr5 ASM - Boyer Moore 2")
lentxt=Len(Text)
lenfind=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
  a=inMemStr5(1,@Text, lentxt, @findstr,lenfind)
Next i
FindReport()
PrintN ("inMemStr2 ASM")
lentxt=Len(Text)
lenfindstr=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
  a=inMemStr2(@Text, lentxt, @findstr, lenfindstr, 1)
Next i
FindReport()
PrintN ("BMBinSearch ASM from Steve Hutchesson")
lentxt=Len(Text)
lenfindstr=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
  a=inMemStr2(@Text, lentxt, @findstr, lenfindstr, 1)
Next i
FindReport()
PrintN ("Search in ASM naive Alg.")
lentxt=Len(Text)
lenfindstr=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
  a=inMemStr(1, @Text, lentxt, @findstr, lenfindstr)
Next i
FindReport()
Input()
CloseConsole()
End