Wildcard String Compare

Share your advanced PureBasic knowledge/code with the community.
Johan_Haegg
User
User
Posts: 60
Joined: Wed Apr 30, 2003 2:25 pm
Location: Västerås
Contact:

Wildcard String Compare

Post by Johan_Haegg »

This will allow you to compare a wildcard pattern with a string
Remember to use a * at the end of the pattern or it will not know what to do with the last word.
I am amazed that it worked so quickly, got lucky with the "fulhacks" i guess :>

[EDIT]
ooops, found a bug

Code: Select all

Procedure WCString(pattern.s, string.s, sens.b)
  spos = 0
  If sens.b = 0
    pattern.s = UCase(pattern.s)
    string.s = UCase(string.s)
  EndIf

  If Left(pattern.s, 1) = "*"
    pos = FindString(pattern.s, "*", 2)
    Debug Mid(pattern.s, 2, pos - 2) 
    spos = FindString(string, Mid(pattern.s, 2, pos - 2), spos)
    If spos = 0
      ProcedureReturn 0
    EndIf
  Else
    pos = FindString(pattern.s, "*", 1) - 1
    If Left(string.s, pos) = Left(pattern.s, pos)
      spos = pos + 1
      pos + 1
    EndIf
    If spos = 0
      ProcedureReturn 0
    EndIf
  EndIf
  Repeat
    opos = pos + 1
    pos = FindString(pattern.s, "*", opos)
    If pos = 0
      Break
    EndIf
    spos = FindString(string, Mid(pattern.s, opos, pos-opos), spos)
    If spos = 0
      ProcedureReturn 0
    EndIf
  ForEver
ProcedureReturn 1
EndProcedure
PB
PureBasic Expert
PureBasic Expert
Posts: 7581
Joined: Fri Apr 25, 2003 5:24 pm

Re: Wildcard String Compare

Post by PB »

> This will allow you to compare a wildcard pattern with a string

Can you please post an example of how to use it? Thanks!
Johan_Haegg
User
User
Posts: 60
Joined: Wed Apr 30, 2003 2:25 pm
Location: Västerås
Contact:

Post by Johan_Haegg »

WCString("*like*cookie*", "I like those cookies", 0) <- 1
WCString("*like*cookie*", "I Like those cookies", 1) <- 0
WCString("like*cookie*", "I like those cookies", 0) <- 0
WCString("like*cookie*", "like those cookies", 0) <- 1
User avatar
blueznl
PureBasic Expert
PureBasic Expert
Posts: 6161
Joined: Sat May 17, 2003 11:31 am
Contact:

Post by blueznl »

slightly more complex...

Code: Select all

Procedure.l x_matchpattern(string.s,pattern.s,ulcase.l)
  Protected s_l, s_p, m_l, m_p, nomatch.l, match.l, ff.l, m.s
  ;
  ; *** check if string matches pattern
  ;
  ; in:     string.s   - string to check
  ;         pattern.s  - pattern including wildcards * and ? and multiple patterns seperated by |
  ;         ulcase.l   - 0 check case 1 don't care about case
  ; retval: 0          - no match
  ;         1          - match
  ;
  If ulcase.l = 1
    string.s = LCase(string.s)
    pattern.s = LCase(pattern.s)
  EndIf
  ;
  s_l = Len(string)
  s_p = 1
  m_l = Len(pattern)
  m_p = 0
  ;
  nomatch.l = #False
  match.l = #False
  ff.l = #False
  While m_p < m_l And match = #False
    m_p = m_p+1
    m.s = Mid(pattern,m_p,1)
    If m = "*"
      If m_p = m_l
        match = #True
      Else
        ff= #True
      EndIf
    EndIf
    If m = "|"
      If s_p = s_l+1
        match = #True
      EndIf
    ElseIf m = "?"
      If s_p <= s_l
        s_p = s_p+1
      Else
        nomatch = #True
      EndIf
    ElseIf m <> "*"
      If ff = #True
        ff = #False
        Repeat
          s_p = s_p+1
        Until s_p > s_l Or m=Mid(string,s_p,1)
        If m = Mid(string,s_p,1)
          s_p = s_p+1
        Else
          nomatch = #True
        EndIf
      Else
        If m = Mid(string,s_p,1)
          s_p = s_p+1
        Else
          nomatch = #True
        EndIf
      EndIf
    EndIf
    If nomatch = #False And s_p = s_l+1 And m_p = m_l
      match = #True
    EndIf
    If nomatch = #True
      m_p = FindString(pattern,"|",m_p+1)
      If m_p = 0
        m_p = m_l
      Else
        nomatch = #False
        ff = #False
        s_p = 1
      EndIf
    EndIf
  Wend
  ;
  ProcedureReturn match
EndProcedure
( PB6.00 LTS Win11 x64 Asrock AB350 Pro4 Ryzen 5 3600 32GB GTX1060 6GB)
( The path to enlightenment and the PureBasic Survival Guide right here... )
chen
Enthusiast
Enthusiast
Posts: 338
Joined: Fri Dec 23, 2005 2:20 pm
Location: Quebec, Canada
Contact:

Post by chen »

blueznl;

I saw your code "compare a wildcard pattern with a string" a litle later, because I almost finished mine....

It works ok when I run it alone..... but when I put it as a part of my
application sometimes return 0, when it must be 1...
I use the same data to test it inside and outside the application....

Hope you or some of the many experts here can detect something
I can't.

Modified on feb 03, 2006

Code: Select all

Procedure ToLowerCase(*pt)
  Repeat
    char1.l = Asc(LCase(Chr(PeekB(*pt))))
    PokeB( *pt,char1 )
    *pt = *pt + 1
  Until PeekB(*pt) = 0
EndProcedure

Procedure.l StringRegExpMatch(*string1, *string2 , caseSensitive.l)

match.l = -1
matchChar.l = 0
starAsc = Asc("*")
questionAsc = Asc("?")

If caseSensitive = 0                      ; if 0 convert to lower case 
  ToLowerCase(*string1)
  ToLowerCase(*string2)
EndIf 

Repeat
  
  ; the first "If" tries to find the first valid char of string2 in string1. 
  ; pointers are moved and left them where the match happens
  
  If  PeekB(*string2) = starAsc And matchChar = 0 
    *string2 = *string2 + 1 
    Repeat   
      While PeekB(*string2) = questionAsc ; if a ?'s exist after the beginning *: move both pointers
        *string1 = *string1 + 1
        *string2 = *string2 + 1
      Wend                 

        If PeekB(*string1) = PeekB(*string2)     ;and look where that char is in string1
          matchChar = 1
          match = 1 
        Else
          *string1 = *string1 + 1              
          match = 0 
        EndIf 
      Until PeekB(*string1) = 0  Or matchChar = 1
    EndIf   
    
    
    If PeekB(*string1) = PeekB(*string2)
      match = 1
    Else
      If PeekB(*string2) = questionAsc
        match = 1
      ElseIf PeekB(*string2) = starAsc
        match = 1
        ProcedureReturn 1
      Else
        match = 0
        ProcedureReturn 0
      EndIf 
    EndIf 
    *string1 = *string1 + 1
    *string2 = *string2 + 1
    
    ; these two If's are executed if no ? or *: means-> string must be same size
    If PeekB(*string1) = 0 And PeekB(*string2) <> 0: ProcedureReturn 0: EndIf
    If PeekB(*string1) <> 0 And PeekB(*string2) = 0: ProcedureReturn 0: EndIf
    
  Until PeekB(*string1) = 0 Or PeekB(*string2) = 0
  
  ProcedureReturn match
EndProcedure

Test data

a.s = "<TITLE>COntact Us</TITLE>" 
b.s = "*?????????nt??t Us</TI????*"
c.s = "Strings Match"

res.l = StringRegExpMatch(@a, @b, 1)

If res
  b=a
  new$ = ReplaceString(a,b,c)
EndIf 

Debug res
Debug new$

User avatar
blueznl
PureBasic Expert
PureBasic Expert
Posts: 6161
Joined: Sat May 17, 2003 11:31 am
Contact:

Post by blueznl »

chen, you probably have an older version, here's my last one...

Code: Select all

Procedure.l x_matchpattern(string.s,pattern.s,ulcase.l)              ; check if a string matches one or more patterns  (wildcards *? separator |)
  ;
  ; *** check if string matches one or more patters using * and ? as wildcards
  ;
  ; in:     string.s                             - string to check
  ;         pattern.s                            - pattern including wildcards * and ? and multiple patterns seperated by |
  ;         ulcase.l    = 0 (#x_parse_matchcase) - check case
  ;                     = 1 (#x_parse_anycase)   - don't care about case
  ; retval: 0 or #False                          - no match
  ;         1 or #True                           - match
  ;
  ; notes:
  ;
  ; - ? must be filled by a character, this is different in windows / dos where it can be empty
  ; - note that an 'empty' pattern will matches an empty original
  ; - rewrite number three, and it's still horrible
  ;
  If ulcase = 1
    pattern = LCase(pattern)
    string = LCase(string)
  EndIf
  ;
  p_p = 0
  p_l = Len(pattern)
  s_l = Len(string)
  ;
  If s_l = 0                                            ; 2265 handle emtpy strings
    If p_l = 0
      match = #True
    ElseIf FindString(pattern,"||",1) > 0
      match = #True
    ElseIf Right(pattern,1) = "|"
      match = #True
    Else
      match = #False
    EndIf
  Else
    Repeat
      s_p = 1
      ;
      Repeat
        ff_max = 0
        ff_min = 0
        p_sub.s = ""
        p_char.s = ""
        done = #False
        match = #False
        Repeat
          p_p = p_p+1
          p_char = Mid(pattern,p_p,1)
          If p_char = "*"
            ff_max = s_l
          ElseIf p_char = "?"
            ff_min = ff_min+1
            ff_max = x_max(ff_max,ff_min)
          ElseIf p_char = "|"
            done = #True
          Else
            p_sub = p_sub+p_char
            Repeat
              p_char = Mid(pattern,p_p+1,1)
              If p_char = "*" Or p_char = "?"
                done = #True
              ElseIf p_char = "|"
                done = #True
                ; p_p = p_p+1                               ; 2265 small quickfix... hope it works
              Else
                p_sub = p_sub+p_char
                If p_p < p_l
                  p_p = p_p+1
                Else
                  done = #True
                EndIf
              EndIf
            Until done = #True
          EndIf
          If p_p >= p_l
            done = #True
          EndIf
        Until done = #True
        ;
        If p_sub = ""
          If s_l-s_p+1 <= ff_max And s_l-s_p+1 >= ff_min
            match = #True
          Else
          EndIf
          s_p = s_l+1
        Else
          x = FindString(string,p_sub,s_p)
          If x > 0
            If x-s_p > ff_max
            ElseIf x-s_p < ff_min
            Else
              match = #True
            EndIf
            s_p = x+Len(p_sub)
          Else
            s_p = s_l+1
          EndIf
        EndIf
      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = false
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
          Wend
        Else
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
            If p_char = "|"
            ElseIf p_char <> "*"
              match = #False
            EndIf
          Wend
        EndIf
      EndIf
    Until match = #True Or p_p >= p_l
  EndIf
  ;
  ProcedureReturn match
EndProcedure
( PB6.00 LTS Win11 x64 Asrock AB350 Pro4 Ryzen 5 3600 32GB GTX1060 6GB)
( The path to enlightenment and the PureBasic Survival Guide right here... )
chen
Enthusiast
Enthusiast
Posts: 338
Joined: Fri Dec 23, 2005 2:20 pm
Location: Quebec, Canada
Contact:

Post by chen »

thanks for your reply...

the old version works OK...

the new fail here.... looking for this function

ff_min = ff_min+1
ff_max = x_max(ff_max,ff_min)
ok I built a simple funtion than return the max

Code: Select all

Procedure.l x_max(first.l,second.l)
max = 0
If first >= Second 
   max = first 
 Else
   max = second 
 EndIf  
 ProcedureReturn max
EndProcedure
if dont know if your version is taken care on something else...
chen
Enthusiast
Enthusiast
Posts: 338
Joined: Fri Dec 23, 2005 2:20 pm
Location: Quebec, Canada
Contact:

Post by chen »

ok....

I did some tests...

the old version failed
the new is ok

data

a.s = " position: relative;"
b.s = " *ion: *;"
c.s = "Strings Match"

res.l = x_matchpattern(a, b, 0)

If res
b=a
new$ = ReplaceString(a,b,c)
EndIf

Debug res
Debug new$
User avatar
blueznl
PureBasic Expert
PureBasic Expert
Posts: 6161
Joined: Sat May 17, 2003 11:31 am
Contact:

Post by blueznl »

chen wrote:thanks for your reply...

the old version works OK...

the new fail here.... looking for this function

ff_min = ff_min+1
ff_max = x_max(ff_max,ff_min)
ok I built a simple funtion than return the max

Code: Select all

Procedure.l x_max(first.l,second.l)
max = 0
If first >= Second 
   max = first 
 Else
   max = second 
 EndIf  
 ProcedureReturn max
EndProcedure
if dont know if your version is taken care on something else...
no that's fine

i'll have to release a new version of my x_lib.pb include file on of these days...
( PB6.00 LTS Win11 x64 Asrock AB350 Pro4 Ryzen 5 3600 32GB GTX1060 6GB)
( The path to enlightenment and the PureBasic Survival Guide right here... )
akj
Enthusiast
Enthusiast
Posts: 665
Joined: Mon Jun 09, 2003 10:08 pm
Location: Nottingham

Post by akj »

I decided to write my own pattern matching routine as I could not understand the logic of the published ones.

Like blueznl's routine it supports "*" and "?" and "|" and with "?" matching exactly one character.

It is written as two procedures: QueryMatch() which handles "?" and Match() which handles "*" and "|".
However, as Match() calls QueryMatch() only once, the two procedures can theoretically be combined into one, though I don't think this would be an easy task.

Code: Select all

; Pattern Matching  AKJ  26-Mar-06

Procedure.l QueryMatch(string$, pattern$, startposn=1)
; Determine whether pattern$ appears within string$ from startposn onwards
; If pattern$ is within string$, return the position within string$ of the first
;   byte just beyond the match [in readiness for the next call to QueryMatch()]
; If pattern$ is not within string$, return 0
; Pattern$ may contain the wildcard "?" that matches a single character in string$
; No other wildcards are recognised in pattern$
; All comparisons are case sensitive
Protected s$ ; A substring of string$
Protected s ; Position within string$ and s$
Protected p ; Position within pattern$
Protected c$ ; Character within pattern$
Protected ls, lp ; Length of string$ and pattern$
ls=Len(string$): lp=Len(pattern$)
If startposn<1: startposn=1: EndIf ; Sanity check
If lp=0: ProcedureReturn startposn: EndIf ; Special case
; Get the first non-wildcard character within pattern$ and it's position
c$=Left(RemoveString(pattern$,"?"),1): p=FindString(pattern$, c$, 1)
If p=0 ; If pattern contains only wildcards
  startposn+lp
  If startposn<=ls+1: ProcedureReturn startposn: Else: ProcedureReturn 0: EndIf
EndIf
Repeat
  ; Get the first position of string$ that might match pattern$ 
  s=FindString(string$, c$, startposn+p-1)
  If s=0: ProcedureReturn 0: EndIf
  s-p+1: startposn=s+1 ; Set new start position for case of no match
  ; Extract in s$ the portion of string$ that might match pattern$
  s$=Mid(string$, s, lp)
  ; Compare s$ with pattern$, looking for a match
  For p=1 To lp
    c$=Mid(pattern$, p, 1)
    If c$<>"?" And c$<>Mid(s$, p, 1): Break: EndIf
  Next p
Until p>lp ; If no match, look for one later in string$
ProcedureReturn s+lp ; Match.  Return start position
EndProcedure

Procedure.l Match(string$, pattern$, casesensitive=#False)
; Returns True if pattern$ appears within string$, otherwise False
; Pattern$ may contain wildcards of "*" (match >=0 chars) and/or "?" (match 1 char)
; Pattern$ may contain "|" meaning 'or'
; The last parameter determines whether the matching will be case sensitive
Protected s ; Position within string$
Protected w$ ; Wildcard string that may contain "?" or "*" but not "|"
Protected w ; Index to wildcard strings in pattern$
Protected q$ ; Query string that may contain "?" but not "*"
Protected q ; Index to query strings in w$
; Try to simplify the pattern
While FindString(pattern$, "**", 1)
  pattern$=ReplaceString(pattern$, "**", "*")
Wend
While FindString(pattern$, "||", 1)
  pattern$=ReplaceString(pattern$, "||", "|")
Wend
If Left(pattern$,1)="|": pattern$=Mid(pattern$, 2, 9999): EndIf
If Right(pattern$,1)="|": pattern$=Left(pattern$, Len(pattern$)-1): EndIf
; Check for string case sensitivity
If Not casesensitive
  string$ = LCase(string$): pattern$ = LCase(pattern$)
EndIf
; Loop for each wildcard string
string$="|"+string$+"|" ; Add start/end anchorages
For w=1 To CountString(pattern$, "|")+1
  w$="|"+StringField(pattern$, w, "|")+"|"  ; Add matching anchorages
  s=1 ; Position at start of string$
  ; Loop for each query string
  For q=1 To CountString(w$, "*")+1
    q$=StringField(w$, q, "*")
    s=QueryMatch(string$, q$, s)
    If s=0: Break: EndIf ; Unmatched so far
  Next q
  If s: ProcedureReturn #True: EndIf ; Match
Next w
ProcedureReturn #False ; No match
EndProcedure

; Examples
Define text$="abracadabra"
Debug "01  "+Str(Match(text$, "*????*"))
Debug "02  "+Str(Match(text$, "*DAB*"))
Debug "03  "+Str(Match(text$, "*DAB*", #True)) ; Case sensitive
Debug "04  "+Str(Match(text$, "ab|*xyz*|abra?|bra"))
Debug "05  "+Str(Match(text$, "???????????"))
Debug "06  "+Str(Match(text$, "???????"))
Debug "07  "+Str(Match(text$, "*"))
Debug "08  "+Str(Match(text$, "|"))
Debug "09  "+Str(Match(text$, "*a*a*a*a*a"))
Debug "10  "+Str(Match(" position: relative;", " *ion: *;"))
Debug "11  "+Str(Match("", ""))

End
Anthony Jordan
Dummy
Enthusiast
Enthusiast
Posts: 162
Joined: Wed Jun 09, 2004 11:10 am
Location: Germany
Contact:

Post by Dummy »

Hey BlueZNL! You have a small mistake in your code ;)

Code: Select all

      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = false
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1) 
should be

Code: Select all

      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = #False
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1) 
as the variable false is not defined in most codes it won't make a real difference but it should be fixed anyways ;)

here's your full code with that correction and the call to x_max replaced with some proper code:

Code: Select all

Procedure.l x_matchpattern(string.s,pattern.s,ulcase.l)              ; check if a string matches one or more patterns  (wildcards *? separator |)
  ;
  ; *** check if string matches one or more patters using * and ? as wildcards
  ;
  ; in:     string.s                             - string to check
  ;         pattern.s                            - pattern including wildcards * and ? and multiple patterns seperated by |
  ;         ulcase.l    = 0 (#x_parse_matchcase) - check case
  ;                     = 1 (#x_parse_anycase)   - don't care about case
  ; retval: 0 or #False                          - no match
  ;         1 or #True                           - match
  ;
  ; notes:
  ;
  ; - ? must be filled by a character, this is different in windows / dos where it can be empty
  ; - note that an 'empty' pattern will matches an empty original
  ; - rewrite number three, and it's still horrible
  ;
  If ulcase = 1
    pattern = LCase(pattern)
    string = LCase(string)
  EndIf
  ;
  p_p = 0
  p_l = Len(pattern)
  s_l = Len(string)
  ;
  If s_l = 0                                            ; 2265 handle emtpy strings
    If p_l = 0
      match = #True
    ElseIf FindString(pattern,"||",1) > 0
      match = #True
    ElseIf Right(pattern,1) = "|"
      match = #True
    Else
      match = #False
    EndIf
  Else
    Repeat
      s_p = 1
      ;
      Repeat
        ff_max = 0
        ff_min = 0
        p_sub.s = ""
        p_char.s = ""
        done = #False
        match = #False
        Repeat
          p_p = p_p+1
          p_char = Mid(pattern,p_p,1)
          If p_char = "*"
            ff_max = s_l
          ElseIf p_char = "?"
            ff_min = ff_min+1
            If ff_min > ff_max
              ff_max = ff_min
            EndIf
          ElseIf p_char = "|"
            done = #True
          Else
            p_sub = p_sub+p_char
            Repeat
              p_char = Mid(pattern,p_p+1,1)
              If p_char = "*" Or p_char = "?"
                done = #True
              ElseIf p_char = "|"
                done = #True
                ; p_p = p_p+1                               ; 2265 small quickfix... hope it works
              Else
                p_sub = p_sub+p_char
                If p_p < p_l
                  p_p = p_p+1
                Else
                  done = #True
                EndIf
              EndIf
            Until done = #True
          EndIf
          If p_p >= p_l
            done = #True
          EndIf
        Until done = #True
        ;
        If p_sub = ""
          If s_l-s_p+1 <= ff_max And s_l-s_p+1 >= ff_min
            match = #True
          Else
          EndIf
          s_p = s_l+1
        Else
          x = FindString(string,p_sub,s_p)
          If x > 0
            If x-s_p > ff_max
            ElseIf x-s_p < ff_min
            Else
              match = #True
            EndIf
            s_p = x+Len(p_sub)
          Else
            s_p = s_l+1
          EndIf
        EndIf
      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = #False
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
          Wend
        Else
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
            If p_char = "|"
            ElseIf p_char <> "*"
              match = #False
            EndIf
          Wend
        EndIf
      EndIf
    Until match = #True Or p_p >= p_l
  EndIf
  ;
  ProcedureReturn match
EndProcedure 
Dummy
Enthusiast
Enthusiast
Posts: 162
Joined: Wed Jun 09, 2004 11:10 am
Location: Germany
Contact:

Post by Dummy »

I wrote a new wildcard checking engine... It should perform better the others in ths thread...

So here are the current sources:

Code: Select all

; Author: DaDummy

Structure wc_new1_data ; 2008-12-28
  ; attention: this structure is shared across all recursive calls - marked content musn't be invalidated 
  starCount.l ; preserve
  s_pos.l
  p_pos.l
  s.s         ; preserve
  p.s         ; preserve
EndStructure

Procedure   _WildcardCompare_new1_recursiveSearch(*vars.wc_new1_data) ; 2008-11-12
  Protected *s_c.character, *p_c.character, tmp.l
  
  ; restore last saved position
  With *vars
    *s_c = \s_pos
    *p_c = \p_pos
  EndWith
  
  While *p_c\c = '*' Or *p_c\c = '?' ; move till next non-wildcard char
    If *p_c\c = '?' ; handle questionmarks
      If *s_c\c = 0 ; there should be another char but there is none
        ProcedureReturn #False
      EndIf
      
      *s_c + SizeOf(character)
    EndIf
    *p_c + SizeOf(character)
  Wend
  
  If *p_c\c = 0 ; no regular character after the last star and string did match so far
    ProcedureReturn #True
  EndIf
  
  *vars\p_pos = *p_c
  
  If *s_c\c = 0 ; string finished and there are characters left in the pattern
    ProcedureReturn #False
  EndIf
  
  Repeat
    While *p_c\c <> *s_c\c And *s_c\c ; find candidates for next part
      *s_c + SizeOf(character)
    Wend
    
    If *s_c\c = 0 ; string finished and there are characters left in the pattern
      ProcedureReturn #False
    EndIf
    
    ; save a loop as the current character was checked in the last loop
    *p_c + SizeOf(character)
    *s_c + SizeOf(character)
    
    ; save current position
    With *vars
      \p_pos = *p_c
      \s_pos = *s_c
    EndWith
    
    While (*p_c\c = *s_c\c Or *p_c\c = '?') And *s_c\c
      *p_c + SizeOf(character)
      *s_c + SizeOf(character)
    Wend
    
    Select *p_c\c
      Case 0
        If *s_c\c = 0 ; string and pattern did match
          ProcedureReturn #True
        Else ; pattern is done, string is not
          ProcedureReturn #False
        EndIf
        
      Case '*'
        ; save current position and restore last saved position
        With *vars
          ; Swap \p_pos, *p_c
          tmp    = \p_pos
          \p_pos = *p_c
          *p_c   = tmp
          
          ; Swap \s_pos, *s_c
          tmp    = \s_pos
          \s_pos = *s_c
          *s_c   = tmp
        EndWith
        
        ; next recursion (one per star)
        If _WildcardCompare_new1_recursiveSearch(*vars)
          ProcedureReturn #True
        EndIf
        
      Default
        If *s_c\c = 0 ; string is done, pattern is not
          ProcedureReturn #False
        Else
          ; restore last saved position
          With *vars
            *p_c = \p_pos
            *s_c = \s_pos
          EndWith
        EndIf
    EndSelect
  ForEver
EndProcedure

Procedure   WildcardCompare_new1(string.s, pattern.s) ; 2008-12-28
  ; remarks:
  ; - pattern should not contain multiple stars in a row to optimze performance
  ; - string is not allowed to contain stars
  ; - empty string only matches empty string
  ; - '?' must to be filled with a character
  ; - attention: 'vars' is shared across all recursive calls - marked content musn't be invalidated
  
  Protected vars.wc_new1_data
  Protected *s_c.character, *p_c.character
  
  ; stack is zeroed by pb so the following lines are commented
  ; vars\starCount = 0
  
  With vars
    \s = string
    \p = pattern
  EndWith
  
  *s_c = @vars\s
  *p_c = @vars\p
  
  While (vars\starCount Or *p_c\c = *s_c\c Or *p_c\c = '?' Or *p_c\c = '*') And *p_c\c And *s_c\c
    Select *p_c\c
      Case '*' ; found first wildcard && and strings match so far
        If vars\starCount = 0 ; if this is the first star
          ; save current position
          With vars
            \p_pos = *p_c
            \s_pos = *s_c
          EndWith 
        EndIf
        
        vars\starCount + 1
    EndSelect
    
    *p_c + SizeOf(character)
    *s_c + SizeOf(character)
  Wend
  
  Select vars\starCount
    Case 0 ; pattern does not contain any stars
      If *p_c\c = 0 And *s_c\c = 0 ; string = pattern
        ProcedureReturn #True
      Else ; len(string) < len(pattern) || len(string) > len(pattern) || strinng <> pattern
        ProcedureReturn #False
      EndIf
      
    Case 1 ; special treatment for patterns with only 1 star
      If *p_c\c
        *p_c + SizeOf(character)
        
        If *p_c\c ; len(string) < (len(pattern) - starCount)
          ProcedureReturn #False
        EndIf
      EndIf
      
      While *s_c\c
        *s_c + SizeOf(character)
      Wend
      
      ; save a loop as both are 0 right now
      *p_c - SizeOf(character)
      *s_c - SizeOf(character)
      
      While (*p_c\c = *s_c\c Or *p_c\c = '?') ; there is a star so this will return before the beginning of the string
        *p_c - SizeOf(character)
        *s_c - SizeOf(character)
      Wend
      
      If *p_c\c = '*' ; everything around the star does match
        ProcedureReturn #True
      Else
        ProcedureReturn #False
      EndIf
    Default ; more than 1 star - do recursive search
      ProcedureReturn _WildcardCompare_new1_recursiveSearch(vars)
  EndSelect
EndProcedure

Procedure.s WildcardCompare_new1_optimizePattern(pattern.s) ; 2008-11-12
  Protected retVal.s
  Protected *p_c.character, *r_c.character
  
  *p_c = @pattern
  
  While *p_c\c
    If *p_c\c = '*' And Right(retVal, 1) <> "*"
      retVal + Chr(*p_c\c)
    ElseIf *p_c\c <> '*'
      retVal + Chr(*p_c\c)
    EndIf
    
    *p_c + SizeOf(character)
  Wend
  
  ProcedureReturn retVal
EndProcedure

Procedure   WildcardCompare_new1_checkString(string.s) ; 2008-11-12
  If FindString(string, "*", 1) Or FindString(string, "?", 1)
    ProcedureReturn #False
  Else
    ProcedureReturn #True
  EndIf
EndProcedure

Procedure   WildcardCompare_new1_multiplePatterns(string.s, patterns.s) ; 2008-11-12
  Protected i.l, count.l
  
  count = CountString(patterns, "|")
  
  For i = 1 To count + 1
    If WildcardCompare_new1(string, StringField(patterns, i, "|"))
      ProcedureReturn #True
    EndIf
  Next
  
  ProcedureReturn #False
EndProcedure
2008-12-28:
-Fixed a buffer overflow that could happen in rare cases
-Removed some useless lines
Last edited by Dummy on Fri Nov 28, 2008 2:20 pm, edited 6 times in total.
rsts
Addict
Addict
Posts: 2736
Joined: Wed Aug 24, 2005 8:39 am
Location: Southwest OH - USA

Post by rsts »

thanks. I had used the other one.

will test this one as i have some more time available.

cheers
Dummy
Enthusiast
Enthusiast
Posts: 162
Joined: Wed Jun 09, 2004 11:10 am
Location: Germany
Contact:

Post by Dummy »

I ran some tests to compare speed and find the fastest function...

The one by Johan_Haegg seems to be the fastest but I didn't take it into account as it doesn't support the '?'-symbol.

My testing code and files:
http://www.dashtec.net/wildcards.rar

And my results:

Code: Select all

*/tp-downloads/*/.htaccess: blueznl: 1638ms akj: 1591ms dummy: 405ms 
*avatars: blueznl: 874ms akj: 1201ms dummy: 359ms 
*avatars*: blueznl: 842ms akj: 1264ms dummy: 359ms 
/srv: blueznl: 811ms akj: 1108ms dummy: 312ms 
/srv/www/bfl*: blueznl: 1248ms akj: 1606ms dummy: 344ms 
/srv/*/bfl/*: blueznl: 1154ms akj: 1794ms dummy: 343ms 
*/srv/*/bfl/*: blueznl: 1201ms akj: 2184ms dummy: 359ms 
*srv/*/bfl/*: blueznl: 1124ms akj: 2121ms dummy: 359ms 
*srv/*/bfl?*: blueznl: 1092ms akj: 1981ms dummy: 297ms 
*srv/*/b?l/*: blueznl: 982ms akj: 1763ms dummy: 281ms 
*?rv/*/bfl/*: blueznl: 920ms akj: 1763ms dummy: 281ms 
????: blueznl: 281ms akj: 967ms dummy: 265ms 
User avatar
pcfreak
User
User
Posts: 75
Joined: Sat May 22, 2004 1:38 am

Post by pcfreak »

here is one even more complex ;)
(not for x64)

Code: Select all

#WildCardsStringMatchEscapeCharacter = '/'

Declare.l EvalWildCardsStringMatch(string.s,wildcards.s,flags.l)
Declare.l WildCardsStringMatch(string.s,wildcards.s,flag.l)

Procedure.l EvalWildCardsStringMatch(string.s,wildcards.s,flags.l)
 *char.CHARACTER=@wildcards
 lastChar.c=0
 Part$=""
 Result$=""
 Negate=#False
 While *char\c<>0
  Select *char\c
   Case '|'
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
     Default
      Result$+Str(WildCardsStringMatch(string,Part$,flags) ! Negate)+"|"
      Part$=""
      Negate=#False
    EndSelect
    lastChar=0
   Case '&'
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
     Default
      Result$+Str(WildCardsStringMatch(string,Part$,flags) ! Negate)+"&"
      Part$=""
      Negate=#False
    EndSelect
    lastChar=0
   Case '~'
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
     Default
      If Part$="" And Negate=#False
       Negate=#True
      Else
       Part$+Chr(*char\c)
      EndIf
    EndSelect
    lastChar=0
   Case #WildCardsStringMatchEscapeCharacter
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
      lastChar=0
     Default
      lastChar=#WildCardsStringMatchEscapeCharacter
    EndSelect
   Default
    If lastChar=0
     Part$+Chr(*char\c)
    Else
     Part$+Chr(lastChar)+Chr(*char\c)
     lastChar=0
    EndIf
  EndSelect
  *char+1+#PB_Compiler_Unicode
 Wend
 Result$+Str(WildCardsStringMatch(string,Part$,flags) ! Negate)

 If Len(Result$)>1
  DataSection
   !@@wcsm_eval_ORs:
    Data.l 0
   !@@wcsm_eval_ANDs:
    Data.l 0
  EndDataSection
  !MOV esi, dword [p.v_Result$]
  CompilerIf #PB_Compiler_Unicode
  !MOVZX ebx, word [esi]
  CompilerElse
  !MOVZX ebx, byte [esi]
  CompilerEndIf
  !CMP ebx, 0
  !JZ @@wcsm_eval_wend
  !@@wcsm_eval_while:
   !CMP ebx, '|'
   !JNE @@wcsm_eval_case2
    !@@wcsm_eval_case1:;case '|'
     !MOV ecx, dword [@@wcsm_eval_ORs]
     !JECXZ @@wcsm_eval_subwend1
     !@@wcsm_eval_subwhile1:
      !POP eax
      !POP edx
      !OR eax, edx
      !PUSH eax
      !DEC ecx
     !JNZ @@wcsm_eval_subwhile1
     !@@wcsm_eval_subwend1:
     !INC ecx
     !MOV dword [@@wcsm_eval_ORs], ecx
     !JMP @@wcsm_eval_endselect
    !@@wcsm_eval_case2:;case '&'
    !CMP ebx, '&'
    !JNE @@wcsm_eval_default
     !INC dword [@@wcsm_eval_ANDs]
     !JMP @@wcsm_eval_endselect
    !@@wcsm_eval_default:;default
     !SUB ebx, 30h
     !PUSH ebx
     !MOV ecx, dword [@@wcsm_eval_ANDs]
     !JECXZ @@wcsm_eval_subwend2
     !@@wcsm_eval_subwhile2:
      !POP eax
      !POP edx
      !AND eax, edx
      !PUSH eax
      !DEC ecx
     !JNZ @@wcsm_eval_subwhile2
     !@@wcsm_eval_subwend2:
     !MOV dword [@@wcsm_eval_ANDs], ecx
    !@@wcsm_eval_endselect:
   CompilerIf #PB_Compiler_Unicode
   !INC esi
   !INC esi
   !MOVZX ebx, word [esi]
   CompilerElse
   !INC esi
   !MOVZX ebx, byte [esi]
   CompilerEndIf
   !CMP ebx, 0
   !JNZ @@wcsm_eval_while
  !@@wcsm_eval_wend:
  !MOV ecx, dword [@@wcsm_eval_ORs]
  !JECXZ @@wcsm_eval_subwend3
  !@@wcsm_eval_subwhile3:
   !POP eax
   !POP edx
   !OR eax, edx
   !PUSH eax
   !DEC ecx
  !JNZ @@wcsm_eval_subwhile3
  !@@wcsm_eval_subwend3:
  !MOV dword [@@wcsm_eval_ORs], ecx
  !POP eax
  ProcedureReturn
 Else
  ProcedureReturn Val(Result$)
 EndIf
EndProcedure

Procedure.l WildCardsStringMatch(string.s,wildcards.s,flag.l)
 If flag=0
  string=LCase(string)
  wildcards=LCase(wildcards)
 EndIf
 If wildcards=""
  ProcedureReturn #True
 EndIf
 If Left(wildcards,1)<>"*" And Left(wildcards,1)<>"?" And Left(wildcards,1)<>"#" And Right(wildcards,1)<>"*" And Right(wildcards,1)<>"?" And Right(wildcards,1)<>"#" : wildcards="*"+wildcards+"*" : EndIf
 *Wide1.CHARACTER
 *Wide2.CHARACTER
 *pos.CHARACTER=@string
 *char.CHARACTER=@wildcards
 *sPos.CHARACTER
 CompilerIf #PB_Compiler_Unicode
  Macro UnicodeNumberWildCardCompare(var)
   ((var>='0' And var<='9') Or (var>=$FF10 And var<=$FF19) Or (var>=$00BC And var<=$00BE) Or (var>=2153 And var<=$2182) Or (var>=$2070 And var<=$2079) Or (var>=$2080 And var<=$2089) Or (var>=$2460 And var<=$249B) Or (var>=$2776 And var<=$2793) Or (var>=$3220 And var<=$3229) Or (var>=$3280 And var<=$3289))
  EndMacro
 CompilerElse
  Macro UnicodeNumberWildCardCompare(var)
   ((var>='0' And var<='9') Or var=$B9 Or var=$B2 Or var=$B3 Or (var>=$BC And var<=$BE))
  EndMacro
 CompilerEndIf
 Repeat
  Select *char\c
   Case '*'
    *Wide1=*pos
    *Wide2=*char+1+#PB_Compiler_Unicode
    If *Wide2\c=0 Or (*Wide1\c=0 And RemoveString(wildcards,"*")="")
     ProcedureReturn #True
    EndIf
    If *Wide1\c=0
     ProcedureReturn #False
    EndIf
    *sPos=*char+1+#PB_Compiler_Unicode
    While *sPos\c='*'
     *char=*sPos
     *sPos+1+#PB_Compiler_Unicode
    Wend
    If *sPos\c=*pos\c Or *sPos\c='?' Or (*sPos\c='#' And UnicodeNumberWildCardCompare(*pos\c))
     *Wide2=*sPos
     While *Wide2\c<>'*' And *Wide2\c<>0
      If *Wide1\c=*Wide2\c Or *Wide2\c='?' Or (*Wide2\c='#' And UnicodeNumberWildCardCompare(*Wide1\c))
       *Wide1+1+#PB_Compiler_Unicode
       *Wide2+1+#PB_Compiler_Unicode
      Else
       If *sPos\c=*Wide1\c
        *Wide2=*char+1+#PB_Compiler_Unicode
       Else
        *Wide1+1+#PB_Compiler_Unicode
        *Wide2=*sPos
       EndIf
      EndIf
      If *Wide1\c=0
       While *Wide2\c='*'
        *Wide2+1+#PB_Compiler_Unicode
       Wend
       If *Wide2\c=0
        ProcedureReturn #True
       Else
        ProcedureReturn #False
       EndIf
      EndIf
     Wend
     If *Wide2\c='*'
      *pos=*Wide1
      *sPos=*Wide2
     Else
      If *Wide1\c=*Wide2\c And *Wide2\c=0
       ProcedureReturn #True
      Else
       If *Wide2\c=0 And *sPos\c<>'*'
        If *Wide1\c=0
         ProcedureReturn #False
        Else
         *pos+1+#PB_Compiler_Unicode
         *sPos=*char
        EndIf
       Else
        *pos=*Wide1
       EndIf
      EndIf
     EndIf
     *char=*sPos
    Else
     *pos+1+#PB_Compiler_Unicode
    EndIf
   Case '?'
    If *pos\c<>0
     *pos+1+#PB_Compiler_Unicode
     *char+1+#PB_Compiler_Unicode
     If *pos\c<>0 And *char\c=0
      ProcedureReturn #False
     EndIf
    Else
     ProcedureReturn #False
    EndIf
   Case '#'
    If UnicodeNumberWildCardCompare(*pos\c)
     *pos+1+#PB_Compiler_Unicode
     *char+1+#PB_Compiler_Unicode
     If *pos\c<>0 And *char\c=0
      ProcedureReturn #False
     EndIf
    Else
     ProcedureReturn #False
    EndIf
   Default
    If *pos\c=*char\c
     *pos+1+#PB_Compiler_Unicode
     *char+1+#PB_Compiler_Unicode
    Else
     ProcedureReturn #False
    EndIf
  EndSelect
 Until *char\c=0
 ProcedureReturn #True
EndProcedure

Debug "Should be FALSE"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*??D*",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","?*a",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*1",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*?a",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","defg",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("w ind ind","wind",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("w ind ind","wind&",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc","a*???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab..abc","*.??",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab.abc","*.????",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b/|c","a/&b&///&c",0))
Debug "- - - - -"
Debug "Should be TRUE"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*??*D*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","?Bc*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*g?i.*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def---ghi...jkl","*-g?i.*k?",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_.jkldef---ghi..jkl","*.jkl*.jkl",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_.jkldef---ghi..jkl","~defg&~*?a&*??D*|~*-?-*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab1c2_","*ab#c#_*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab.abc","*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab..abc","*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab...abc","*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("/~b...abc","//~*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","a/&b/|c",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b/|c","a&b&///|c",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","a/&b|c/",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","~~c",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","",0))
Post Reply