RFindString()

Share your advanced PureBasic knowledge/code with the community.
User avatar
Psychophanta
Always Here
Always Here
Posts: 5153
Joined: Wed Jun 11, 2003 9:33 pm
Location: Anare
Contact:

RFindString()

Post by Psychophanta »

Function similar to FindString(), but this one returns the position of the last string found (starting searching from most right side to left side)

Code: Select all

; PB function in ASM to return the location (search starting from the right side) of a given string inside another given string.

; Author: Psychophanta
; Date: 21. Oct 2003

Procedure.l RFindString(a.s,s.s)
  !;cld          ;clear DF (Direction Flag). (Normally not necessary)
  !mov edi,dword[esp]   ;load edi register with pointer to first string
  !mov esi,dword[esp+4] ;load esi register with pointer to second string (the string to search)
  !xor eax,eax  ;set eax register to NULL
  !mov ebx,eax    ;set ebx register to NULL
  !mov edx,esi  ;save this value in edx to avoid, as much as possible, to have to read data from memory in the main loop.
  !;If any of two strings is empty then end program and matches found is 0:
  !cmp byte[esi],bl  ;test if second string is empty
  !jz near .RFindStringgo  ;if yes, then end
  !.RFindStringmainloop:
  !cmp byte[edi],bl  ;check if end of first string is reached
  !jz near .RFindStringgo   ;if not reached then end
  !mov esi,edx  ;restore this
  !cmpsb  ;what this instruction do is just compare byte[edi] with byte[esi] and increment edi and esi values by 1
  !jnz near .RFindStringmainloop ;if byte[edi]=byte[esi] then goto match label, because a match byte was found...
  !;match: ;here we have got inside a second treatment: We are in a possible total match, lets see if it is a complete match or it is a fake:
  !mov ecx,edi ;save this position
  !@@:
  !cmp byte[esi],bl  ;check if end of second string is reached
  !jz near @f   ;if so, here was a complete match
  !cmpsb   ;compare one more byte
  !jz near @r   ;if equal, lets see if the deceit continues, or rather it could be a real complete match.
  !mov edi,ecx  ;ohhh! it was a deceit! Restore this
  !jmp near .RFindStringmainloop ;What a patient! lets continue searching for another possible match and why not, a possible complete match...
  !;complete match was found:
  !@@:mov eax,ecx    ;capture position
  !jmp near .RFindStringmainloop   ;lets search for another possible complete match!
  !.RFindStringgo:cmp eax,ebx ;Check if there was any
  !jz near @f  ;if not then exit and return 0
  !sub eax,dword[esp] ;some adjust
  !@@:
  ProcedureReturn
EndProcedure

;Prove it:
a$="PurePure"
MessageRequester("Should be 9",Str(RFindString(a$+a$,a$)),0)
MessageRequester("Should be 1",Str(RFindString(a$,"PurePure")),0)
MessageRequester("Should be 0",Str(RFindString(a$,"Pur e")),0)
MessageRequester("Should be 7",Str(RFindString("asdfas"+a$,"PureP")),0)
MessageRequester("Should be 12",Str(RFindString("asdfs"+a$,"r")),0)

;Can use function input parameters as normal pointers:
;Procedure.l RFindString(*a,*s)
;but then you have to call function using pointers too:
;RFindString(@a$,@"Pure")
User avatar
Psychophanta
Always Here
Always Here
Posts: 5153
Joined: Wed Jun 11, 2003 9:33 pm
Location: Anare
Contact:

Post by Psychophanta »

Updated for PB4.31

Code: Select all

; PB function in ASM to return the location (search starting from the right side) of a given string inside another given string.

; Author: Psychophanta
; Date: 21. Oct 2003

Procedure.l RFindString(a.s,s.s)
  !;cld          ;clear DF (Direction Flag). (Normally not necessary)
  !push ebx edi esi
  !mov edi,dword[p.v_a+12]   ;load edi register with pointer to first string
  !mov esi,dword[p.v_s+12] ;load esi register with pointer to second string (the string to search)
  !xor eax,eax  ;set eax register to NULL
  !mov ebx,eax    ;set ebx register to NULL
  !mov edx,esi  ;save this value in edx to avoid, as much as possible, to have to read data from memory in the main loop.
  !;If any of two strings is empty then end program and matches found is 0:
  !cmp byte[esi],bl  ;test if second string is empty
  !jz near .RFindStringgo  ;if yes, then end
  !.RFindStringmainloop:
  !cmp byte[edi],bl  ;check if end of first string is reached
  !jz near .RFindStringgo   ;if not reached then end
  !mov esi,edx  ;restore this
  !cmpsb  ;what this instruction do is just compare byte[edi] with byte[esi] and increment edi and esi values by 1
  !jnz near .RFindStringmainloop ;if byte[edi]=byte[esi] then goto match label, because a match byte was found...
  !;match: ;here we have got inside a second treatment: We are in a possible total match, lets see if it is a complete match or it is a fake:
  !mov ecx,edi ;save this position
  !@@:
  !cmp byte[esi],bl  ;check if end of second string is reached
  !jz near @f   ;if so, here was a complete match
  !cmpsb   ;compare one more byte
  !jz near @r   ;if equal, lets see if the deceit continues, or rather it could be a real complete match.
  !mov edi,ecx  ;ohhh! it was a deceit! Restore this
  !jmp near .RFindStringmainloop ;What a patient! lets continue searching for another possible match and why not, a possible complete match...
  !;complete match was found:
  !@@:mov eax,ecx    ;capture position
  !jmp near .RFindStringmainloop   ;lets search for another possible complete match!
  !.RFindStringgo:cmp eax,ebx ;Check if there was any
  !jz near @f  ;if not then exit and return 0
  !sub eax,dword[p.v_a+12] ;some adjust
  !@@:
  !pop esi edi ebx
  ProcedureReturn
EndProcedure

;Prove it:
a$="PurePure"
MessageRequester("Should be 9",Str(RFindString(a$+a$,a$)),0)
MessageRequester("Should be 1",Str(RFindString(a$,"PurePure")),0)
MessageRequester("Should be 0",Str(RFindString(a$,"Pur e")),0)
MessageRequester("Should be 7",Str(RFindString("asdfas"+a$,"PureP")),0)
MessageRequester("Should be 12",Str(RFindString("asdfs"+a$,"r")),0)

;Can use function input parameters as normal pointers:
;Procedure.l RFindString(*a,*s)
;but then you have to call function using pointers too:
;RFindString(@a$,@"Pure")
http://www.zeitgeistmovie.com

while (world==business) world+=mafia;
hss
User
User
Posts: 69
Joined: Thu Mar 08, 2007 6:02 pm

Post by hss »

w/o asm,

Code: Select all


Procedure.i RFindString2(haystack.s,needle.s)

 For x=(Len(needle)-1) To Len(haystack)

  If(Mid(haystack,(Len(haystack)-x),Len(needle))=needle)
  pos=(Len(haystack)-x):If Not(pos):pos=1:EndIf:Break
  EndIf

 Next

ProcedureReturn(pos)

EndProcedure


a$="PurePure"
MessageRequester("Should be 9",Str(RFindString2(a$+a$,a$)),0)
MessageRequester("Should be 1",Str(RFindString2(a$,"PurePure")),0)
MessageRequester("Should be 0",Str(RFindString2(a$,"Pur e")),0)
MessageRequester("Should be 7",Str(RFindString2("asdfas"+a$,"PureP")),0)
MessageRequester("Should be 12",Str(RFindString2("asdfs"+a$,"r")),0) 


AND51
Addict
Addict
Posts: 1040
Joined: Sun Oct 15, 2006 8:56 pm
Location: Germany
Contact:

Post by AND51 »

Here I sent my procedure into the competition. :)

It also searched from right to left, so the longer the string, the more effective is the procedure.

Bonus 1: My proc does not search from the Last_Prosition, but it starts from Last_Position-StringToSearch_Length!
Bonus 2: It also supports Case-In-Sensivity.

Can you do a speed test? I would be intersted in this, but I'm in a rush now...

Code: Select all

Procedure FindLastString(String$, StringToFind$, CaseInSensitive=0) 
   Protected length=Len(StringToFind$), *position=@String$+(Len(String$)-length)<<#PB_Compiler_Unicode 
   If StringToFind$ 
      While *position >= @String$ 
         If CompareMemoryString(*position, @StringToFind$, CaseInSensitive, length) = #PB_String_Equal 
            ProcedureReturn (*position-@String$)>>#PB_Compiler_Unicode+1 
         EndIf 
         *position-SizeOf(Character) 
      Wend 
   EndIf 
EndProcedure 


Debug FindLastString("The longer the String, the more effective is this procedure, because it searches strings from right to left.", "String")
Debug FindLastString("The longer the String, the more effective is this procedure, because it searches strings from right to left.", "String", #PB_String_NoCase)
PB 4.30

Code: Select all

onErrorGoto(?Fred)
tycoon
New User
New User
Posts: 8
Joined: Sun Oct 23, 2005 1:43 pm

Re: RFindString()

Post by tycoon »

Hi!

Old post, but I've collected some code about "rfindstring" and have done a little speedtest with parameters.

Code: Select all

Procedure.i RFindstring1(string$,stringtofind$,c.b=1)
  ; Author: Tycoon
  ; Date: 30. Jan 2011
  Protected a
  If c=0
    a = FindString(ReverseString(LCase(string$)),ReverseString(LCase(stringtofind$)),1)
  Else
    a = FindString(ReverseString(string$),ReverseString(stringtofind$),1)
  EndIf
  If a
    ProcedureReturn Len(string$)-a-Len(stringtofind$)+2
  Else
    ProcedureReturn 0
  EndIf
EndProcedure

Procedure FindLastString(String$, StringToFind$, CaseInSensitive=0)
  Protected length=Len(StringToFind$), *position=@String$+(Len(String$)-length)<<#PB_Compiler_Unicode
  If StringToFind$
    While *position >= @String$
      If CompareMemoryString(*position, @StringToFind$, CaseInSensitive, length) = #PB_String_Equal
        ProcedureReturn (*position-@String$)>>#PB_Compiler_Unicode+1
      EndIf
      *position-SizeOf(Character)
    Wend
  EndIf
EndProcedure 

Procedure.i RFindString2(haystack.s,needle.s)
  For x=(Len(needle)-1) To Len(haystack)
    If(Mid(haystack,(Len(haystack)-x),Len(needle))=needle)
      pos=(Len(haystack)-x):If Not(pos):pos=1:EndIf:Break
    EndIf
  Next
  ProcedureReturn(pos)
  
EndProcedure

Procedure.l RFindString3(a.s,s.s)
  !;cld          ;clear DF (Direction Flag). (Normally not necessary)
  !push ebx edi esi
  !mov edi,dword[p.v_a+12]   ;load edi register with pointer to first string
  !mov esi,dword[p.v_s+12] ;load esi register with pointer to second string (the string to search)
  !xor eax,eax  ;set eax register to NULL
  !mov ebx,eax    ;set ebx register to NULL
  !mov edx,esi  ;save this value in edx to avoid, as much as possible, to have to read data from memory in the main loop.
  !;If any of two strings is empty then end program and matches found is 0:
  !cmp byte[esi],bl  ;test if second string is empty
  !jz near .RFindStringgo  ;if yes, then end
  !.RFindStringmainloop:
  !cmp byte[edi],bl  ;check if end of first string is reached
  !jz near .RFindStringgo   ;if not reached then end
  !mov esi,edx  ;restore this
  !cmpsb  ;what this instruction do is just compare byte[edi] with byte[esi] and increment edi and esi values by 1
  !jnz near .RFindStringmainloop ;if byte[edi]=byte[esi] then goto match label, because a match byte was found...
  !;match: ;here we have got inside a second treatment: We are in a possible total match, lets see if it is a complete match or it is a fake:
  !mov ecx,edi ;save this position
  !@@:
  !cmp byte[esi],bl  ;check if end of second string is reached
  !jz near @f   ;if so, here was a complete match
  !cmpsb   ;compare one more byte
  !jz near @r   ;if equal, lets see if the deceit continues, or rather it could be a real complete match.
  !mov edi,ecx  ;ohhh! it was a deceit! Restore this
  !jmp near .RFindStringmainloop ;What a patient! lets continue searching for another possible match and why not, a possible complete match...
  !;complete match was found:
  !@@:mov eax,ecx    ;capture position
  !jmp near .RFindStringmainloop   ;lets search for another possible complete match!
  !.RFindStringgo:cmp eax,ebx ;Check if there was any
  !jz near @f  ;if not then exit and return 0
  !sub eax,dword[p.v_a+12] ;some adjust
  !@@:
  !pop esi edi ebx
  ProcedureReturn
EndProcedure

Procedure.l RFindString5(s.s,f.s,p.l)
  p=p-Len(f)+1
  While p>=0
    If PeekS(@s+p,Len(f))=f
      ProcedureReturn p+1
    EndIf
    p-1
  Wend   
  ProcedureReturn 0
EndProcedure

Procedure RFindString6(string$,match$)
  pos=1 : Repeat : a=FindString(string$,match$,pos) : If a<>0 : pos=a+1 : EndIf : Until a=0
  ProcedureReturn pos-1
EndProcedure

; generate search list
m         =1000 ; Max Stringlenght
a         =100  ; List rows
max_len   =100  ; Searchstring length
loops     =10   ; Overall searches

Structure _l
  string$
  tofind$
EndStructure


NewList s._l() ; Searchstrings

For r = 1 To a
  AddElement(s())
  For rr=1 To m/2
    s()\string$+Chr(Random(122-32)+32) ; Fill struct with random chars
  Next rr
  s()\string$+s()\string$+s()\string$+s()\string$ ; Duplicate 4 times
  l=Random(m/2-max_len)+max_len ; at least max_len Chars
  s()\tofind$=Mid(s()\string$,m/2+Random(m/2-l),l)
Next r

; ----------------------------------------------------------------

StartTime = ElapsedMilliseconds()   

For r= 1 To loops
  ForEach s()
    a=RFindstring1(s()\string$,s()\tofind$,1)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$= "Time: 1 Reverse Case sensitive > "+Str(ElapsedTime)+" ms"+#LF$

StartTime = ElapsedMilliseconds()   

For r= 1 To loops
  ForEach s()
    a=RFindstring1(s()\string$,s()\tofind$)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$+ "Time: 1 Reverse Case Insensitive > "+Str(ElapsedTime)+" ms"+#LF$


StartTime = ElapsedMilliseconds()   
For r= 1 To loops
  ForEach s()
    a=RFindstring3(s()\string$,s()\tofind$)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$+ "Time: 4 ASM   > "+Str(ElapsedTime)+" ms"+#LF$

StartTime = ElapsedMilliseconds()   
For r= 1 To 1000
  ForEach s()
    a=FindLastString(s()\string$,s()\tofind$)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$+ "Time: 3 FindLastString  Insensitive > "+Str(ElapsedTime)+" ms"+#LF$

StartTime = ElapsedMilliseconds()   
For r= 1 To loops
  ForEach s()
    a=FindLastString(s()\string$,s()\tofind$,1)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$+ "Time: 3 FindLastString  Sensitive > "+Str(ElapsedTime)+" ms"+#LF$
StartTime = ElapsedMilliseconds()   
For r= 1 To 1000
  ForEach s()
    a=RFindString5(s()\string$,s()\tofind$,1)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$+ "Time: 5 RFindString5 > "+Str(ElapsedTime)+" ms"+#LF$


StartTime = ElapsedMilliseconds()   
For r= 1 To 1000
  ForEach s()
    a=RFindString6(s()\string$,s()\tofind$)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$+ "Time: 6 RFindString6 > "+Str(ElapsedTime)+" ms"+#LF$

StartTime = ElapsedMilliseconds()   
For r= 1 To loops
  ForEach s()
    a=RFindstring2(s()\string$,s()\tofind$)
  Next
Next r
ElapsedTime = ElapsedMilliseconds()-StartTime

a$+ "Time: 2 LOOP   > "+Str(ElapsedTime)+" ms"+#LF$
MessageRequester("Result",a$)
Last edited by tycoon on Sun Jan 30, 2011 8:22 pm, edited 1 time in total.
Trond
Always Here
Always Here
Posts: 7446
Joined: Mon Sep 22, 2003 6:45 pm
Location: Norway

Re: RFindString()

Post by Trond »

You can't do speed testing with the debugger on, the results will be completely unreliable.
tycoon
New User
New User
Posts: 8
Joined: Sun Oct 23, 2005 1:43 pm

Re: RFindString()

Post by tycoon »

No difference at all, but changed the code to "no debugger".
This test is only for one system and not against another computer. So all functions run in the same environment and a direkt compare is possible even with the debugger.

The test should only compare the routines on a low level to get an impression.

Time: 1 Reverse Case sensitive > 15 ms
Time: 1 Reverse Case Insensitive > 16 ms
Time: 4 ASM > 15 ms
Time: 3 FindLastString Insensitive > 3766 ms
Time: 3 FindLastString Sensitive > 16 ms
Time: 5 RFindString5 > 531 ms
Time: 6 RFindString6 > 2312 ms
Time: 2 LOOP > 813 ms
Post Reply