and done some own on base of that.
Maybe an interesting suggestion to this topic.
Code: Select all
Global GbTmp.l
Global tmpcount
Structure MemoryArray
Char.c[0]
EndStructure
Procedure.l Quicksearch(StartPos.l, *MainMem, MainLen.l, *FindMem, FindLen.l)
; Quicksearch Boyer-Moore Algorithm im Basic from Paul Dwyer
*MainByteArray.MemoryArray = *MainMem
*FindByteArray.MemoryArray = *FindMem
; Build BadChr Array
Dim BadChar.l(255)
; set all alphabet to max shift pos (length of find string plus 1)
For i = 0 To 255
BadChar(i) = FindLen + 1
Next
;Update chars that are in the find string to their position from the end.
For i = 0 To FindLen -1
BadChar(*FindByteArray\Char[i]) = FindLen - i
Next
MainArrayLoop.l = StartPos
EndSearchPos.l = MainLen - FindLen
While MainArrayLoop <= EndSearchPos
If CompareMemory(*MainMem + MainArrayLoop, *FindMem, FindLen) = 1
FoundPos = MainArrayLoop + 1
Break
EndIf
;Didn't find the string so shift as per the table.
MainArrayLoop + BadChar(*MainByteArray.MemoryArray\Char[MainArrayLoop + FindLen])
Wend
ProcedureReturn FoundPos
EndProcedure
Procedure.l MyFind( *Source.l,SrcLen, *Pattern.l,PatLen, StartPos.l)
;
; MyFind routine, who needs Bayer & Moore???
;
; This uses two tricks I discovered some 12 odd years ago.
; First assumption: Find out the length of BOTH strings.
; Second test all strings through the instructions "REPNE SCASB" for the length of a string (NULL ending).
; Compare strings using the instruction "REPE CMPSB"
; Third test the first character, if found test the LAST character in the string to search,
; If a match TEST BOTH strings,
; if NO match continue searching where we found the last character.
; Pro's:
; 1) We do ! Not compare ALL found strings.
; 2) Easy to implement
; 3) No intensive comparing of found first string match in de string.
;
; Con's: We have to do a liniar search, no 'skipping and sliding' like in Bayer-Moore...
;
; NEEDS to run as a EXE ! Not from the IDE, or we get false results!!!
;
! Xor Eax, Eax ; EAX = 0
! Xor Ecx, Ecx ; ECX = 0
! Not Ecx ; ECX = -1, or 4,294,967,295 max bytes to search, bloody BIG number!!
;
MOV Edi, *Pattern ; Adress of the patern in EDI..
CLD ; Looking UP (to the end if the string)..
; REPNE scasb ; Find the NULL byte..
; ! Not Ecx ; ECX is strlen (but in minus and +2), invers and included is a -1..
; DEC Ecx ; ECX -1, ECX now contains the length of the Pattern string..
MOV Ecx, PatLen
; ECX now has the length of the pattern string, test if it is longer than zero. Otherwise we DO have ! Nothing to search for..
;
CMP Ecx, 1
JG label1
MOV Eax, 0 ; Return = ! Not FOUND..
JMP labelExit
;
! label1:
; First leg is done, we know length of the search pattern. Keep it in EBX for reference..
MOV Ebx, Ecx
;
; Find the length of the source string, to prevent we search ALL memory (OOPS!)...
SUB Ecx, Ecx ; ECX = 0
! Not Ecx ; ECX = -1, or 4,294,967,295
SUB al, al ; ZERO AL or AL = 0 the byte we look for..
MOV Edi, *Source ; Adress in EDI..
CLD ; Looking UP (to the end if the string)..
; REPNE scasb ; Find the NULL byte..
; ! Not Ecx ; ECX is strlen (but in minus and +2), invers and included a -1..
; DEC Ecx ; ECX -1, ECX now contains the length of the Source string..
MOV Ecx, SrcLen
; Test if the source is a NULL string?
CMP Ecx, 1
JG label2
MOV Eax, 0 ; Return = ! Not FOUND..
JMP labelExit
;
! label2 :
MOV Edx, Ecx ; Keep a record for the length later on.
;
; Load into AH the LAST! Character of the search pattern...
MOV Esi, *Pattern ; Adress of the patern in ESI..
MOV AH, [Esi+Ebx-1] ; Now we have the LAST char of the search string..
MOV al, [Esi] ; Here we have the FIRST char of the search string..
;
MOV Edi, *Source ; Adress of the Source in EDI..
! label3:
CLD ; Looking UP (to the end if the Source string)..
REPNE scasb ; Find the FIRST character of the search string (a byte)..
JECXZ labelZeroExit ; ECX is ZERO so ! Not found in the Source, jump exit..
JMP labelCont ; Continue, jump..
;
! labelZeroExit:
MOV Eax, 0 ; ! Not found..
ProcedureReturn
! labelCont:
;
; We have a HIT!!
; Look if the LAST char of the Pattern string (which is in AH) is the same as the in the Source string on the same position!!
; If so we have a real hit if ! Not, go back and search further..
CMP AH, [Edi+Ebx-2] ; the trick of storing the length of the Pattern string. The -2 is needed because EDI shoots one further AFTER a REPNE..
JNE label3 ; IF ! Not jump and search...
;
; We have the FIRST and LAST position of the Patern string found in the Source string,
; now compare the rest of the characters (from the start+1). Keep the start in the source string AND the count..
PUSH Edi ; save these we need them if we DO ! Not have a match!!
PUSH Ecx
;
DEC Edi ; Correct the Source pointer..
MOV Ecx, Ebx ; The length of the Pattern string we want to find ! Not of the source string!!..
CLD
REPE cmpsb ; Compare the two strings (REPE = REPeat while Equal)..
;
POP Ecx ; Restore these in case we do ! Not have a match..
POP Edi
;
JE Label4 ; Jump if EQUAL (strings)..
;
; So ! Not equal ! Not found!! Search further..
MOV Esi, *Pattern ; Reload the adress of the Pattern in ESI..
JMP label3
! Label4:
;
; YES!!! We have found the string!!! And where is in ECX (coded)..
MOV Eax, Edx ; retrieve the length of the Source string..
SUB Eax, Ecx ; Now we have the length, ready!! Result = EAX register!!
ProcedureReturn
; JMP labelExit
! labelExit:
MOV Eax, 0 ; Error and/or ! Not found..
ProcedureReturn
EndProcedure
Procedure.l inMemStr5(StartPos.l,*Source.l,SrcLen.l,*Pattern.l,PatLen.l)
; Boyer-Moore-Algorithmus in Flat-Assembler
; von schic nach Ausführungen der Uni Flensburg
; http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/bm2.htm
; siehe auch Darstellung von Strother Moore auf
; http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/index.html
If StartPos >=SrcLen Or *Source = 0 Or SrcLen < PatLen Or SrcLen < 2 Or *Pattern < 1
ProcedureReturn -1
EndIf
result.l
PMax.l
PMore.l
shiftTable.l = AllocateMemory($400)
MOV Ebx, PatLen
CMP Ebx, 1
JG _lbl1
MOV result, -1 ; Muster muss mind. 2 Zeichen sein
JMP _lblExit
! _lbl1:
MOV Esi, *Source
ADD Esi, SrcLen
SUB Esi, Ebx
MOV Edx, Esi ; setze Ausstiegslänge auf Source-Ende - Musterlänge
; ---------------------------------------------------------------------
; Tabelle füllen, mit Position für Zeichen die in Findstring vorkommen
; ---------------------------------------------------------------------
MOV Ecx, Ebx ; Muster-Länge in ECX
MOV Esi, *Pattern ; Adresse von Muster in ESI
MOV Edi, shiftTable ; Adresse von Shift-Tabelle in Edi
! Xor Eax, Eax ; Eax leeren, da nur al gefüllt, aber EAX zum Weiterarbeiten verwendet wird
! _lblFillTable:
MOV al, [Esi] ; Zeichen holen
INC Esi ; Zähler für Muster Eins hoch
MOV [Edi+Eax*4], Ecx ; shift-Wert für Zeichen an ASCII-Stelle in Tabelle
DEC Ecx ; shift-Wert um Eins runter
JNS _lblFillTable ; wend while Ecx > 0
; -------------------------------------------
; setup für Schleife "While t<=TMax And p>=0"
; -------------------------------------------
MOV PMax, Ebx
DEC PMax ; PMax=Len(pattern)-1
MOV PMore, Ebx ; PMore=PMax+1
MOV Ebx, shiftTable ; Zeiger auf shiftTabelle
MOV Esi, *Source ; esi = *Source
ADD Esi, StartPos ; + Startposition
MOV Edi, *Pattern ; Edi = *pattern
; ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
! _lblWhileTMax:
CMP Edx, Esi
JL _lblExit ; While t<=TMax
MOV Ecx, PMax ; Zähler zurücksetzen p=PMax
! Xor Eax, Eax ; Eax leeren, da nur al gefüllt, aber Eax als long zum Weiterarbeiten verwendet wird
! _lblWhile_p_greater_0: ; zwischen-Einsprung, da Source-Zeiger nicht verändert, kann obiger Vergleich gespart werden
MOV al, [Esi+Ecx] ; Zeichen von Source nach al
; CMP al, 65
; JL doneCase
; CMP al, 90
; JG doneCase
; ADD al, 32
; ! doneCase:
CMP al, [Edi+Ecx] ; mit Zeichen in Muster vergleichen (If PeekB(t+p)<>PeekB(@pattern+p))
JE _lblElse ; ein Zeichen ist gleich -> weitere vergleichen
MOV Eax, [Ebx+Eax*4] ; shift-Wert für source-Zeichen, von shift-Tabelle laden
CMP Eax, 0 ; wenn kein Wert (Zeichen kommt nicht im Muster vor)
JNZ _lblCalcShift ; berechne Verschiebung
ADD Esi, Ecx
INC Esi ; t=t+p+1
JMP _lblWhileTMax ; Wend
! _lblCalcShift:
ADD Eax, Ecx
SUB Eax, PMore ; tmpSkip=tmpSkip+p-PMore
JNL _lblAdd_Shift ; wenn Verschiebung < 0
MOV Eax, 1 ; kleinste Verschiebung ist 1
! _lblAdd_Shift:
ADD Esi, Eax ; t=t+tmpSkip
JMP _lblWhileTMax ; Wend
! _lblElse:
DEC Ecx ; p-1
JNS _lblWhile_p_greater_0 ; Wend While p>=0
; ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
SUB Esi, *Source
INC Esi ; t+1-@text
MOV result, Esi
! _lblExit:
FreeMemory(shiftTable)
ProcedureReturn result
EndProcedure
Procedure.l inMemStr2(*SrcMem, memSizeSrc, *Pattern, memSizePattern, StartPos)
; memSizePattern = MemoryStringLength(*Pattern)
; memSizeSrc = MemoryStringLength(*SrcMem)
result.l
MOV Esi,*Pattern
MOV Edx,memSizePattern
! Or Edx,Edx
JZ l_fertig ;Länge *Pattern = 0
MOV Edi,*SrcMem
ADD Edi,StartPos
DEC Edi
MOV Ecx,memSizeSrc
SUB Ecx,StartPos
ADD Ecx,2
SUB Ecx,Edx
JB l_fertig ;memSizePattern > memSizeSrc
CLD ;clear direction flag -> DF=0
!l_search:
LODSB ;= mov al, [esi]: if DF=0 esi+1 (else esi-1)
; first char of pattern to al
REPNZ scasb ;Al - [edi]:edi+1:ecx-1 and repeat while not null or ecx>0
; compare first char of pattern with source until it matches or ecx is counted down
JNZ l_fertig ;Al - [edi]<>0 but ecx=0 (end of SrcMem reached)
MOV Eax,Edi
MOV Ebx,Ecx
MOV Ecx,Edx
DEC Ecx ;
REPZ cmpsb ; [esi] - [edi]: (if DF=0) esi+1: edi+1: ecx-1 and repeat while null or ecx>0
JZ l_found ; [esi] - [edi] was 0 and ecx is 0 -> whole pattern matched
;else ecx is 0 but [esi]-[edi] <> 0
MOV Edi,Eax
MOV Ecx,Ebx
MOV Esi,*Pattern
JMP l_search
!l_found:
SUB Eax,*SrcMem
MOV result,Eax
!l_fertig:
ProcedureReturn result
EndProcedure
Procedure BMBinSearch (StartPos.l,lpSource.l,srcLngth.l,lpSubStr.l,subLngth.l)
; Boyer Moore Exact pattern Matching Algorithms
; ASM-Code from
; Steve Hutchesson
; transfered to (PB-)FASM with slight modifications from schic
;LOCAL cval :DWORD
cVal.l
;LOCAL shift_table[256]:DWORD
shift_table = AllocateMemory($400)
result.l
MOV Ebx, subLngth
CMP Ebx, 1
JG jump1
MOV result, -2 ; string too short, must be > 1
JMP Cleanup
! jump1:
MOV Esi, lpSource
ADD Esi, srcLngth
SUB Esi, Ebx
MOV Edx, Esi ; set Exit Length: edx = pointer to end of source - srcLngth
; ----------------------------------------
; load shift table with value in subLngth
; ----------------------------------------
MOV Ecx, 256
MOV Eax, Ebx
MOV Edi, shift_table
REP stosd
; ----------------------------------------------
; load decending count values into shift table
; ----------------------------------------------
MOV Ecx, Ebx ; SubString length in ECX
DEC Ecx ; correct for zero based index
MOV Esi, lpSubStr ; address of SubString in ESI
MOV Edi, shift_table
! Xor Eax, Eax
! Write_Shift_Chars:
MOV al, [Esi] ; get the character
INC Esi
MOV [Edi+Eax*4], Ecx ; write shift for each character
DEC Ecx ; to ascii location in table
JNZ Write_Shift_Chars
; -----------------------------
; set up for main compare loop
; -----------------------------
MOV Ecx, Ebx ;Ecx = subLngth
DEC Ecx ;Ecx - 1
MOV cVal, Ecx ;cval = subLngth -1
MOV Esi, lpSource ;esi = lpSource
MOV Edi, lpSubStr ;Edi = lpSubStr
ADD Esi, StartPos ; add starting position
JMP Pre_Loop
; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
! Calc_Suffix_Shift: ;source-char is in SubStr
ADD Eax, Ecx ;add counter in SubStr to value of shift-table
SUB Eax, cVal ; sub loop count (cval = subLngth -1)
JNS Add_Suffix_Shift
MOV Eax, 1 ; minimum shift is 1
! Add_Suffix_Shift:
ADD Esi, Eax ; add SUFFIX shift
MOV Ecx, cVal ; reset counter in compare loop
! Test_Length:
CMP Edx, Esi ; test exit condition: esi <= pointer to end of source - srcLngth
JL No_Match ;if esi > edx (pointer to end of source) then search is ending -> no match
! Pre_Loop:
! Xor Eax, Eax ; zero EAX for following partial writes
MOV al, [Esi+Ecx] ;char from lpSource to al
CMP al, [Edi+Ecx] ;compare characters from lpSource (esi) to char from lpSubStr (Edi)
JE Jump2
;MOV eax, shift_table[eax*4]
SHL Eax, 2h ;if lpSource (esi) = char from lpSubStr clc shift-value
ADD Eax,shift_table
MOV Eax, [Eax] ;load shift value for source-char from shift-table
CMP Ebx, Eax ;compare shift value with subLngth
JNE Calc_Suffix_Shift;Add_Suffix_Shift ;if source-char not in SubStr -> bypass SUFFIX calculations
LEA Esi, [Esi+Ecx+1] ;if source-char is in SubStr -> add BAD CHAR shift (esi=lpSource+lpSubStr+1)
JMP Test_Length
! Jump2:
DEC Ecx
! Xor Eax, Eax ; zero EAX for following partial writes
! Cmp_Loop:
MOV al, [Esi+Ecx]
CMP al, [Edi+Ecx] ; cmp characters in ESI / EDI
JNE Set_Shift ; if not equal, get next shift
DEC Ecx
JNS Cmp_Loop
JMP match ; fall through on match
! Set_Shift:
;MOV eax, shift_table[eax*4]
SHL Eax, 2h
ADD Eax,shift_table
MOV Eax, [Eax]
CMP Ebx, Eax
JNE Calc_Suffix_Shift ;if source-char <> subLngth -> source-char is in SubStr -> run SUFFIX calculations
LEA Esi, [Esi+Ecx+1] ; add BAD CHAR shift
JMP Test_Length
; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
! match:
SUB Esi, lpSource ; sub source from ESI
;MOV eax, esi ; put length in eax
INC Esi
MOV result, Esi
JMP Cleanup
! No_Match:
MOV Eax, -1
! Cleanup:
FreeMemory(shift_table)
ProcedureReturn result
EndProcedure
Procedure.l inMemStr(StartPos, *Source, memSizeSrc, strStr, memSizePattern)
; naiver Algorithmus überprüft ein gesuchtes Muster an
; allen Positionen i eines Speicherbereichs
; Ende des Speicherbereiches muß null-terminiert sein
result.l
;init pointers etc.
MOV Ebx,strStr ; Ebx = Pointer to first Char in string
MOV Ecx,*Source ; Ecx = Pointer to akt. Char in source
MOV Edx, Ecx
ADD Edx, memSizeSrc
ADD Ecx,StartPos ; set source-pointer to startposition
DEC Ecx ; StartPos - 1
! rpt_Src: ; startpoint for loop scanning through the source
MOV al,byte[Ecx]
INC Ecx ; Ecx + 1
CMP Edx,Ecx ; if null (end of source-string)
JS endProc ; -> end Procedure, Result=0
CMP al, 65
JS endCase1
CMP al, 90
JNS endCase1
ADD al, 32
! endCase1:
CMP byte[Ebx],al ; if found first Char of strStr look for the rest
JNE rpt_Src ; else go on with next Char in source
;found the first Char of strStr in source
;now look if the rest does match
! Xor Esi,Esi ; esi = 0, esi = Pointer to akt. Char in strStr
! rpt_Str: ; startpoint for loop pounding the string
MOV Edi, Ecx
CMP byte[Ebx+Esi],0 ; if 0 (end of string)
JE got_it ; -> got it, all Chars of the string did match
ADD Edi, Esi
CMP Edx,Edi ; if null then end of source
JS endProc ; -> end Procedure, Result=0
INC Esi ; esi + 1 (the first Char in string is already found)
MOV al,byte[Edi] ; move actual Char in source to accumulator, have
CMP al, 65
JS endCase
CMP al, 90
JNS endCase
ADD al, 32
! endCase:
CMP byte[Ebx+Esi],al ; if actual Char in source (Ebx+esi) = act Char in string (al)
JE rpt_Str ; then take next Char
JMP rpt_Src ; and go on with scanning source
! got_it:
;Result = Ecx - Source, to get the place in the source-string
SUB Ecx,*Source
MOV result,Ecx
! endProc:
ProcedureReturn result
EndProcedure
Macro FindReport()
PrintN ("pattern found at position: " + Str(a) + " in " + Str(timeGetTime_()-StartTime ) + " milliseconds")
PrintN (" ")
EndMacro
Text.s="axbbaaxbbaaxbbabbal axbbaaxbbaaxbbabbal axbbaaxbbaaxbbabbalaxbbaaxbbaaxbbabbal This is any text of any number of characters with no content"
;123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789
; 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160
findstr.s="gxbba" ; "gxbba" very adverse for Boyer-Moore algorithm with the "axbba..."-text
;findstr.s= "anypattern"
OpenConsole()
For i = 1 To 8
Text=Text+Text
Next
Text=Text+findstr
loopcount = 1500
PrintN ("Quicksearch Boyer-Moore Algorithm im Basic from Paul Dwyer")
lentxt=Len(Text)
lenfind=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
a=Quicksearch(1, @Text,lentxt,@findstr,lenfind)
Next i
FindReport()
PrintN ("MyFind ASM, from Jan Vooijs")
lentxt=Len(Text)
lenfind=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
a=MyFind(@Text,lentxt,@findstr,lenfind,1)
Next i
FindReport()
PrintN ("PB FindString")
StartTime = timeGetTime_()
For i = 1 To loopcount
a=FindString(Text, findstr, 1)
Next i
FindReport()
PrintN ("inMemStr5 ASM - Boyer Moore 2")
lentxt=Len(Text)
lenfind=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
a=inMemStr5(1,@Text, lentxt, @findstr,lenfind)
Next i
FindReport()
PrintN ("inMemStr2 ASM")
lentxt=Len(Text)
lenfindstr=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
a=inMemStr2(@Text, lentxt, @findstr, lenfindstr, 1)
Next i
FindReport()
PrintN ("BMBinSearch ASM from Steve Hutchesson")
lentxt=Len(Text)
lenfindstr=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
a=inMemStr2(@Text, lentxt, @findstr, lenfindstr, 1)
Next i
FindReport()
PrintN ("Search in ASM naive Alg.")
lentxt=Len(Text)
lenfindstr=Len(findstr)
StartTime = timeGetTime_()
For i = 1 To loopcount
a=inMemStr(1, @Text, lentxt, @findstr, lenfindstr)
Next i
FindReport()
Input()
CloseConsole()
End