Posted: Sun Nov 11, 2007 3:57 pm
a point of interest
myfind fails with search patterns long 1 character
a very informative post
cheers
myfind fails with search patterns long 1 character
a very informative post
cheers
http://www.purebasic.com
https://www.purebasic.fr/english/
Code: Select all
Structure MemoryArray
Byte.c[0]
EndStructure
Procedure.l Quicksearch(StartPos.l, *MainMem, MainLen.l, *FindMem, FindLen.l)
*MainByteArray.MemoryArray = *MainMem
*FindByteArray.MemoryArray = *FindMem
StartPos =- 1
; Build BadChr Array
Dim BadChar.l(255)
; set all alphabet to max shift pos (length of find string plus 1)
For i = 0 To 255
BadChar(i) = FindLen + 1
Next
;Update chars that are in the find string to their position from the end.
For i = 0 To findlen -1
BadChar(*FindByteArray\byte[i]) = findlen - i
Next
MainArrayLoop.l = StartPos
EndSearchPos.l = MainLen - (FindLen -1)
While MainArrayLoop <= EndSearchPos
If CompareMemory(*MainMem + MainArrayLoop, *FindMem, FindLen) = 1
FoundPos = MainArrayLoop + 1
Break
EndIf
;Didn't find the string so shift as per the table.
MainArrayLoop + BadChar(*MainByteArray.MemoryArray\byte[MainArrayLoop + FindLen])
Wend
ProcedureReturn FoundPos
EndProcedure
Debug Quicksearch(1, @"ABCDEF", 6, @"A", 1)
Debug Quicksearch(1, @"ABCDEF", 6, @"F", 1)
Debug Quicksearch(1, @"ABCDEF", 6, @"BC", 2)
Code: Select all
; set all alphabet to max shift pos [...]
For i = 0 To 255
Code: Select all
StartPos =- 1
Code: Select all
StartPos - 1
Code: Select all
Structure MemoryArray
Byte.b[0]
EndStructure
Procedure.l QuickSearch(StartPos, *MainMem, MainLen, *FindMem, FindLen)
Protected MainArrayLoop, EndSearchPos, *MainByteArray.MemoryArray, *FindByteArray.MemoryArray
*MainByteArray = *MainMem
*FindByteArray = *FindMem
StartPos =- 1
; Build BadChr Array
Protected Dim BadChar(255)
; set all alphabet to max shift pos (length of find string plus 1)
For i = 0 To 255
BadChar(i) = FindLen + 1
Next
;Update chars that are in the find string to their position from the end.
For i = 0 To FindLen -1
BadChar(*FindByteArray\Byte[i]) = FindLen - i
Next
MainArrayLoop = StartPos
EndSearchPos = MainLen - (FindLen -1)
While MainArrayLoop <= EndSearchPos
If CompareMemory(*MainMem + MainArrayLoop, *FindMem, FindLen) = 1
FoundPos = MainArrayLoop + 1
Break
EndIf
;Didn't find the string so shift as per the table.
MainArrayLoop + BadChar(*MainByteArray.MemoryArray\Byte[MainArrayLoop + FindLen])
Wend
ProcedureReturn FoundPos
EndProcedure
Procedure QuickStringSearch(StartPos, String$, StringToFind$)
CompilerIf #PB_Compiler_Unicode
ProcedureReturn QuickSearch(StartPos, @String$, StringByteLength(String$), @StringToFind$, StringByteLength(StringToFind$))/2+1
CompilerElse
ProcedureReturn QuickSearch(StartPos, @String$, Len(String$), @StringToFind$, Len(StringToFind$))
CompilerEndIf
EndProcedure
Debug QuickStringSearch(1, "ABCDEF34637347347", "A")
Debug QuickStringSearch(1, "ABCDEF34637347347", "F")
Debug QuickStringSearch(1, "ABCDEF34637347347", "BC")
Code: Select all
Procedure.l StringFind(*String,*Search,StartPos)
Protected MemString=MemoryStringLength(*String); StringLen(*String);
Protected MemSearch=MemoryStringLength(*Search); StringLen(*Search);
MOV Esi,*Search
MOV Edx,MemSearch
OR Edx,Edx
JZ l_FSFertig ; Länge des Suchtexts= 0
MOV Edi,*String
ADD Edi,StartPos
DEC Edi
MOV Ecx,MemString
SUB Ecx,StartPos
ADD Ecx,2
SUB Ecx,Edx
JB l_FSFertig ; Suchtext länger als String
CLD ; clear direction flag -> DF=0
!l_FSSearch:
LODSB ; = mov al, [esi]: if DF=0 esi+1 (else esi-1)
; first char of pattern to al
REPNZ SCASB ; Al - [edi]:edi+1:ecx-1 and repeat while not null or ecx>0
; compare first char of pattern with source until it matches or ecx is counted down
JNZ l_FSFertig ; Al - [edi]<>0 but ecx=0 (end of SrcMem reached)
MOV Eax,Edi
MOV Ebx,Ecx
MOV Ecx,Edx
DEC Ecx
REPZ CMPSB ; [esi] - [edi]: (if DF=0) esi+1: edi+1: ecx-1 and repeat while null or ecx>0
JZ l_FSFound ; [esi] - [edi] was 0 and ecx is 0 -> whole pattern matched
; else ecx is 0 but [esi]-[edi] <> 0
MOV Edi,Eax
MOV Ecx,Ebx
MOV Esi,*Search
JMP l_FSSearch
!l_FSFound:
SUB Eax,*String
ProcedureReturn
!l_FSFertig:
MOV eax,0
ProcedureReturn
EndProcedure
Procedure.l QuickSearch(StartPos,*MainMem,MainLen,*FindMem,FindLen)
Structure MemoryArray
Byte.b[0]
EndStructure
Protected MainArrayLoop,EndSearchPos,*MainByteArray.MemoryArray,*FindByteArray.MemoryArray
*MainByteArray=*MainMem
*FindByteArray=*FindMem
StartPos=- 1
; Build BadChr Array
Protected Dim BadChar(255)
; set all alphabet to max shift pos (length of find string plus 1)
For i=0 To 255
BadChar(i)=FindLen+1
Next
;Update chars that are in the find string to their position from the end.
For i=0 To FindLen-1
BadChar(*FindByteArray\Byte[i])=FindLen-i
Next
MainArrayLoop=StartPos
EndSearchPos=MainLen-(FindLen-1)
While MainArrayLoop<=EndSearchPos
If CompareMemory(*MainMem+MainArrayLoop,*FindMem,FindLen)=1
FoundPos=MainArrayLoop+1
Break
EndIf
;Didn't find the string so shift as per the table.
MainArrayLoop+BadChar(*MainByteArray.MemoryArray\Byte[MainArrayLoop+FindLen])
Wend
ProcedureReturn FoundPos
EndProcedure
Procedure QuickStringSearch(StartPos,String$,StringToFind$)
CompilerIf #PB_Compiler_Unicode
ProcedureReturn QuickSearch(StartPos,@String$,StringByteLength(String$),@StringToFind$,StringByteLength(StringToFind$))/2+1
CompilerElse
ProcedureReturn QuickSearch(StartPos,@String$,Len(String$),@StringToFind$,Len(StringToFind$))
CompilerEndIf
EndProcedure
Global a.s="This is a simple string for testing several find procedures ~ the results will show, if any of the tested functions are faster than the internal FindString() function"
Global f1.s="find"
Global f2.s="Find"
Global f3.s="FIND"
Procedure test(function)
Protected s
#Loop=100000
t=-ElapsedMilliseconds()
For i=0 To #Loop
Select function
Case 1
s1=FindString(a,f1)
s1+FindString(a,f2)
s1+FindString(a,f3)
Case 2
s2=StringFind(@a,@f1,1)
s2+StringFind(@a,@f2,1)
s2+StringFind(@a,@f3,1)
Case 3
s3=QuickSearch(1,@a,Len(a),@f1,4)
s3+QuickSearch(1,@a,Len(a),@f2,4)
s3+QuickSearch(1,@a,Len(a),@f3,4)
EndSelect
Next i
ProcedureReturn t+ElapsedMilliseconds()
EndProcedure
s.s=""
For i=1 To 3
s+"Test #"+Str(i)+": "+Str(test(i))+" ms"+#CR$
Next i
MessageRequester("Results",s)
Are you sure ?Michael Vogel wrote:The assembler routine is the fastest here, quicksearch takes a little bit longer...
Regards, Little JohnLittle John wrote:A big advantage over the built-in FindString() function is, that QuickSearch() can be used for arbitrary chunks of memory, regardless whether or not they contain NULL bytes!
Code: Select all
;- FindString with SSE4.2, 32-Bit-Version. no Unicode
;- "Helle" Klaus Helbing, 28.02.2012, PB 4.60 (x86)
;- Versions for 64-Bit and Unicode exists
Procedure.l FindString_SSE42_32Bit(pStr1, pStr2, StartPos)
!mov eax,[p.v_StartPos]
!cmp eax,1
!jge @f
!mov eax,1 ;or End or Error
!@@:
!mov edx,[p.v_pStr2]
!mov ecx,[p.v_pStr1] ;push ebx!
!push ebx
!push edi
!push esi
!mov ebx,ecx
!sub ebx,1
!add ebx,eax ;eax=StartPos
!movdqu xmm0,[edx] ;first max.16 Bytes of Find$
!xor ecx,ecx
!Search:
!add eax,ecx
!add ebx,ecx
!pcmpistri xmm0,dqword[ebx],00001100b ;Least Significant Index, Positive Polarity (IntRes2=IntRes1), Equal Ordered, Unsigned Bytes (=No Unicode)
!jz Base$End ;Zero-Flag is set if found end of Base$
!js Find$End ;Sign-Flag is set if found end of Find$
!jecxz @f ;jump if ECX is zero
!jmp Search
!@@:
!movdqa xmm1,xmm0 ;save xmm0
!mov edi,ebx
!mov esi,edx
!@@:
!add esi,16
!add edi,16 ;next 16 Bytes of Base$
!movdqu xmm1,[esi] ;next 16 Bytes of Find$
!pcmpistri xmm1,dqword[edi],00001100b ;compare the strings
!jz Base$End
!js Find$End
!jecxz @b
!jmp Search
!Base$End:
!cmp ecx,16
!jne Found
!xor eax,eax
!jmp NoFound
!Find$End:
!or ecx,ecx ;ECX Zero? Conventional
!jnz Search
!Found:
!add eax,ecx
!NoFound:
!pop esi
!pop edi
!pop ebx
ProcedureReturn ;EAX
EndProcedure
Base$ = "This is a simple string for testing several find procedures ~ the results will show, if any of the tested functions are faster than the internal FindString() function"
Find$ = "find"
Pos = 1 ;Start-Position
FindSSE42 = FindString_SSE42_32Bit(@Base$, @Find$, Pos)
SSE42$ = "Found String at Position : " + Str(FindSSE42)
PB$ = "Test with PB : " + Str(FindString(Base$, Find$, Pos))
MessageRequester("FindString with SSE4.2", SSE42$ + #LFCR$ + PB$)