It is currently Sat May 25, 2013 12:28 am

All times are UTC + 1 hour




Post new topic Reply to topic  [ 17 posts ]  Go to page 1, 2  Next
Author Message
 Post subject: Wildcard String Compare
PostPosted: Mon Dec 01, 2003 11:07 pm 
Offline
User
User

Joined: Wed Apr 30, 2003 2:25 pm
Posts: 60
Location: Västerås
This will allow you to compare a wildcard pattern with a string
Remember to use a * at the end of the pattern or it will not know what to do with the last word.
I am amazed that it worked so quickly, got lucky with the "fulhacks" i guess :>

[EDIT]
ooops, found a bug

Code:
Procedure WCString(pattern.s, string.s, sens.b)
  spos = 0
  If sens.b = 0
    pattern.s = UCase(pattern.s)
    string.s = UCase(string.s)
  EndIf

  If Left(pattern.s, 1) = "*"
    pos = FindString(pattern.s, "*", 2)
    Debug Mid(pattern.s, 2, pos - 2)
    spos = FindString(string, Mid(pattern.s, 2, pos - 2), spos)
    If spos = 0
      ProcedureReturn 0
    EndIf
  Else
    pos = FindString(pattern.s, "*", 1) - 1
    If Left(string.s, pos) = Left(pattern.s, pos)
      spos = pos + 1
      pos + 1
    EndIf
    If spos = 0
      ProcedureReturn 0
    EndIf
  EndIf
  Repeat
    opos = pos + 1
    pos = FindString(pattern.s, "*", opos)
    If pos = 0
      Break
    EndIf
    spos = FindString(string, Mid(pattern.s, opos, pos-opos), spos)
    If spos = 0
      ProcedureReturn 0
    EndIf
  ForEver
ProcedureReturn 1
EndProcedure


Top
 Profile  
 
 Post subject: Re: Wildcard String Compare
PostPosted: Thu Dec 04, 2003 3:21 am 
Offline
PureBasic Expert
PureBasic Expert

Joined: Fri Apr 25, 2003 5:24 pm
Posts: 6561
> This will allow you to compare a wildcard pattern with a string

Can you please post an example of how to use it? Thanks!


Top
 Profile  
 
 Post subject:
PostPosted: Fri Dec 05, 2003 8:20 pm 
Offline
User
User

Joined: Wed Apr 30, 2003 2:25 pm
Posts: 60
Location: Västerås
WCString("*like*cookie*", "I like those cookies", 0) <- 1
WCString("*like*cookie*", "I Like those cookies", 1) <- 0
WCString("like*cookie*", "I like those cookies", 0) <- 0
WCString("like*cookie*", "like those cookies", 0) <- 1


Top
 Profile  
 
 Post subject:
PostPosted: Sun Dec 07, 2003 8:20 am 
Offline
PureBasic Expert
PureBasic Expert
User avatar

Joined: Sat May 17, 2003 11:31 am
Posts: 5808
slightly more complex...

Code:
Procedure.l x_matchpattern(string.s,pattern.s,ulcase.l)
  Protected s_l, s_p, m_l, m_p, nomatch.l, match.l, ff.l, m.s
  ;
  ; *** check if string matches pattern
  ;
  ; in:     string.s   - string to check
  ;         pattern.s  - pattern including wildcards * and ? and multiple patterns seperated by |
  ;         ulcase.l   - 0 check case 1 don't care about case
  ; retval: 0          - no match
  ;         1          - match
  ;
  If ulcase.l = 1
    string.s = LCase(string.s)
    pattern.s = LCase(pattern.s)
  EndIf
  ;
  s_l = Len(string)
  s_p = 1
  m_l = Len(pattern)
  m_p = 0
  ;
  nomatch.l = #False
  match.l = #False
  ff.l = #False
  While m_p < m_l And match = #False
    m_p = m_p+1
    m.s = Mid(pattern,m_p,1)
    If m = "*"
      If m_p = m_l
        match = #True
      Else
        ff= #True
      EndIf
    EndIf
    If m = "|"
      If s_p = s_l+1
        match = #True
      EndIf
    ElseIf m = "?"
      If s_p <= s_l
        s_p = s_p+1
      Else
        nomatch = #True
      EndIf
    ElseIf m <> "*"
      If ff = #True
        ff = #False
        Repeat
          s_p = s_p+1
        Until s_p > s_l Or m=Mid(string,s_p,1)
        If m = Mid(string,s_p,1)
          s_p = s_p+1
        Else
          nomatch = #True
        EndIf
      Else
        If m = Mid(string,s_p,1)
          s_p = s_p+1
        Else
          nomatch = #True
        EndIf
      EndIf
    EndIf
    If nomatch = #False And s_p = s_l+1 And m_p = m_l
      match = #True
    EndIf
    If nomatch = #True
      m_p = FindString(pattern,"|",m_p+1)
      If m_p = 0
        m_p = m_l
      Else
        nomatch = #False
        ff = #False
        s_p = 1
      EndIf
    EndIf
  Wend
  ;
  ProcedureReturn match
EndProcedure

_________________
( PB5.11 Win7 x64 Dell XPS710 Raid 0 VelociRaptor Intel Q6600 nForce 5 NVidia GTS450 )
( You have two options: psychotherapy, or the PureBasic Survival Guide... )


Top
 Profile  
 
 Post subject:
PostPosted: Thu Feb 02, 2006 5:30 am 
Offline
Enthusiast
Enthusiast

Joined: Fri Dec 23, 2005 2:20 pm
Posts: 338
Location: Quebec, Canada
blueznl;

I saw your code "compare a wildcard pattern with a string" a litle later, because I almost finished mine....

It works ok when I run it alone..... but when I put it as a part of my
application sometimes return 0, when it must be 1...
I use the same data to test it inside and outside the application....

Hope you or some of the many experts here can detect something
I can't.

Modified on feb 03, 2006

Code:
Procedure ToLowerCase(*pt)
  Repeat
    char1.l = Asc(LCase(Chr(PeekB(*pt))))
    PokeB( *pt,char1 )
    *pt = *pt + 1
  Until PeekB(*pt) = 0
EndProcedure

Procedure.l StringRegExpMatch(*string1, *string2 , caseSensitive.l)

match.l = -1
matchChar.l = 0
starAsc = Asc("*")
questionAsc = Asc("?")

If caseSensitive = 0                      ; if 0 convert to lower case
  ToLowerCase(*string1)
  ToLowerCase(*string2)
EndIf

Repeat
 
  ; the first "If" tries to find the first valid char of string2 in string1.
  ; pointers are moved and left them where the match happens
 
  If  PeekB(*string2) = starAsc And matchChar = 0
    *string2 = *string2 + 1
    Repeat   
      While PeekB(*string2) = questionAsc ; if a ?'s exist after the beginning *: move both pointers
        *string1 = *string1 + 1
        *string2 = *string2 + 1
      Wend                 

        If PeekB(*string1) = PeekB(*string2)     ;and look where that char is in string1
          matchChar = 1
          match = 1
        Else
          *string1 = *string1 + 1             
          match = 0
        EndIf
      Until PeekB(*string1) = 0  Or matchChar = 1
    EndIf   
   
   
    If PeekB(*string1) = PeekB(*string2)
      match = 1
    Else
      If PeekB(*string2) = questionAsc
        match = 1
      ElseIf PeekB(*string2) = starAsc
        match = 1
        ProcedureReturn 1
      Else
        match = 0
        ProcedureReturn 0
      EndIf
    EndIf
    *string1 = *string1 + 1
    *string2 = *string2 + 1
   
    ; these two If's are executed if no ? or *: means-> string must be same size
    If PeekB(*string1) = 0 And PeekB(*string2) <> 0: ProcedureReturn 0: EndIf
    If PeekB(*string1) <> 0 And PeekB(*string2) = 0: ProcedureReturn 0: EndIf
   
  Until PeekB(*string1) = 0 Or PeekB(*string2) = 0
 
  ProcedureReturn match
EndProcedure

Test data

a.s = "<TITLE>COntact Us</TITLE>"
b.s = "*?????????nt??t Us</TI????*"
c.s = "Strings Match"

res.l = StringRegExpMatch(@a, @b, 1)

If res
  b=a
  new$ = ReplaceString(a,b,c)
EndIf

Debug res
Debug new$



Top
 Profile  
 
 Post subject:
PostPosted: Fri Feb 03, 2006 3:44 pm 
Offline
PureBasic Expert
PureBasic Expert
User avatar

Joined: Sat May 17, 2003 11:31 am
Posts: 5808
chen, you probably have an older version, here's my last one...

Code:
Procedure.l x_matchpattern(string.s,pattern.s,ulcase.l)              ; check if a string matches one or more patterns  (wildcards *? separator |)
  ;
  ; *** check if string matches one or more patters using * and ? as wildcards
  ;
  ; in:     string.s                             - string to check
  ;         pattern.s                            - pattern including wildcards * and ? and multiple patterns seperated by |
  ;         ulcase.l    = 0 (#x_parse_matchcase) - check case
  ;                     = 1 (#x_parse_anycase)   - don't care about case
  ; retval: 0 or #False                          - no match
  ;         1 or #True                           - match
  ;
  ; notes:
  ;
  ; - ? must be filled by a character, this is different in windows / dos where it can be empty
  ; - note that an 'empty' pattern will matches an empty original
  ; - rewrite number three, and it's still horrible
  ;
  If ulcase = 1
    pattern = LCase(pattern)
    string = LCase(string)
  EndIf
  ;
  p_p = 0
  p_l = Len(pattern)
  s_l = Len(string)
  ;
  If s_l = 0                                            ; 2265 handle emtpy strings
    If p_l = 0
      match = #True
    ElseIf FindString(pattern,"||",1) > 0
      match = #True
    ElseIf Right(pattern,1) = "|"
      match = #True
    Else
      match = #False
    EndIf
  Else
    Repeat
      s_p = 1
      ;
      Repeat
        ff_max = 0
        ff_min = 0
        p_sub.s = ""
        p_char.s = ""
        done = #False
        match = #False
        Repeat
          p_p = p_p+1
          p_char = Mid(pattern,p_p,1)
          If p_char = "*"
            ff_max = s_l
          ElseIf p_char = "?"
            ff_min = ff_min+1
            ff_max = x_max(ff_max,ff_min)
          ElseIf p_char = "|"
            done = #True
          Else
            p_sub = p_sub+p_char
            Repeat
              p_char = Mid(pattern,p_p+1,1)
              If p_char = "*" Or p_char = "?"
                done = #True
              ElseIf p_char = "|"
                done = #True
                ; p_p = p_p+1                               ; 2265 small quickfix... hope it works
              Else
                p_sub = p_sub+p_char
                If p_p < p_l
                  p_p = p_p+1
                Else
                  done = #True
                EndIf
              EndIf
            Until done = #True
          EndIf
          If p_p >= p_l
            done = #True
          EndIf
        Until done = #True
        ;
        If p_sub = ""
          If s_l-s_p+1 <= ff_max And s_l-s_p+1 >= ff_min
            match = #True
          Else
          EndIf
          s_p = s_l+1
        Else
          x = FindString(string,p_sub,s_p)
          If x > 0
            If x-s_p > ff_max
            ElseIf x-s_p < ff_min
            Else
              match = #True
            EndIf
            s_p = x+Len(p_sub)
          Else
            s_p = s_l+1
          EndIf
        EndIf
      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = false
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
          Wend
        Else
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
            If p_char = "|"
            ElseIf p_char <> "*"
              match = #False
            EndIf
          Wend
        EndIf
      EndIf
    Until match = #True Or p_p >= p_l
  EndIf
  ;
  ProcedureReturn match
EndProcedure

_________________
( PB5.11 Win7 x64 Dell XPS710 Raid 0 VelociRaptor Intel Q6600 nForce 5 NVidia GTS450 )
( You have two options: psychotherapy, or the PureBasic Survival Guide... )


Top
 Profile  
 
 Post subject:
PostPosted: Fri Feb 03, 2006 6:52 pm 
Offline
Enthusiast
Enthusiast

Joined: Fri Dec 23, 2005 2:20 pm
Posts: 338
Location: Quebec, Canada
thanks for your reply...

the old version works OK...

the new fail here.... looking for this function

Quote:

ff_min = ff_min+1
ff_max = x_max(ff_max,ff_min)


ok I built a simple funtion than return the max
Code:
Procedure.l x_max(first.l,second.l)
max = 0
If first >= Second
   max = first
 Else
   max = second
 EndIf 
 ProcedureReturn max
EndProcedure


if dont know if your version is taken care on something else...


Top
 Profile  
 
 Post subject:
PostPosted: Fri Feb 03, 2006 7:08 pm 
Offline
Enthusiast
Enthusiast

Joined: Fri Dec 23, 2005 2:20 pm
Posts: 338
Location: Quebec, Canada
ok....

I did some tests...

the old version failed
the new is ok

data

a.s = " position: relative;"
b.s = " *ion: *;"
c.s = "Strings Match"

res.l = x_matchpattern(a, b, 0)

If res
b=a
new$ = ReplaceString(a,b,c)
EndIf

Debug res
Debug new$


Top
 Profile  
 
 Post subject:
PostPosted: Fri Feb 03, 2006 9:12 pm 
Offline
PureBasic Expert
PureBasic Expert
User avatar

Joined: Sat May 17, 2003 11:31 am
Posts: 5808
chen wrote:
thanks for your reply...

the old version works OK...

the new fail here.... looking for this function

Quote:

ff_min = ff_min+1
ff_max = x_max(ff_max,ff_min)


ok I built a simple funtion than return the max
Code:
Procedure.l x_max(first.l,second.l)
max = 0
If first >= Second
   max = first
 Else
   max = second
 EndIf 
 ProcedureReturn max
EndProcedure


if dont know if your version is taken care on something else...


no that's fine

i'll have to release a new version of my x_lib.pb include file on of these days...

_________________
( PB5.11 Win7 x64 Dell XPS710 Raid 0 VelociRaptor Intel Q6600 nForce 5 NVidia GTS450 )
( You have two options: psychotherapy, or the PureBasic Survival Guide... )


Top
 Profile  
 
 Post subject:
PostPosted: Sun Mar 26, 2006 7:32 pm 
Offline
Enthusiast
Enthusiast

Joined: Mon Jun 09, 2003 10:08 pm
Posts: 635
Location: Nottingham
I decided to write my own pattern matching routine as I could not understand the logic of the published ones.

Like blueznl's routine it supports "*" and "?" and "|" and with "?" matching exactly one character.

It is written as two procedures: QueryMatch() which handles "?" and Match() which handles "*" and "|".
However, as Match() calls QueryMatch() only once, the two procedures can theoretically be combined into one, though I don't think this would be an easy task.

Code:
; Pattern Matching  AKJ  26-Mar-06

Procedure.l QueryMatch(string$, pattern$, startposn=1)
; Determine whether pattern$ appears within string$ from startposn onwards
; If pattern$ is within string$, return the position within string$ of the first
;   byte just beyond the match [in readiness for the next call to QueryMatch()]
; If pattern$ is not within string$, return 0
; Pattern$ may contain the wildcard "?" that matches a single character in string$
; No other wildcards are recognised in pattern$
; All comparisons are case sensitive
Protected s$ ; A substring of string$
Protected s ; Position within string$ and s$
Protected p ; Position within pattern$
Protected c$ ; Character within pattern$
Protected ls, lp ; Length of string$ and pattern$
ls=Len(string$): lp=Len(pattern$)
If startposn<1: startposn=1: EndIf ; Sanity check
If lp=0: ProcedureReturn startposn: EndIf ; Special case
; Get the first non-wildcard character within pattern$ and it's position
c$=Left(RemoveString(pattern$,"?"),1): p=FindString(pattern$, c$, 1)
If p=0 ; If pattern contains only wildcards
  startposn+lp
  If startposn<=ls+1: ProcedureReturn startposn: Else: ProcedureReturn 0: EndIf
EndIf
Repeat
  ; Get the first position of string$ that might match pattern$
  s=FindString(string$, c$, startposn+p-1)
  If s=0: ProcedureReturn 0: EndIf
  s-p+1: startposn=s+1 ; Set new start position for case of no match
  ; Extract in s$ the portion of string$ that might match pattern$
  s$=Mid(string$, s, lp)
  ; Compare s$ with pattern$, looking for a match
  For p=1 To lp
    c$=Mid(pattern$, p, 1)
    If c$<>"?" And c$<>Mid(s$, p, 1): Break: EndIf
  Next p
Until p>lp ; If no match, look for one later in string$
ProcedureReturn s+lp ; Match.  Return start position
EndProcedure

Procedure.l Match(string$, pattern$, casesensitive=#False)
; Returns True if pattern$ appears within string$, otherwise False
; Pattern$ may contain wildcards of "*" (match >=0 chars) and/or "?" (match 1 char)
; Pattern$ may contain "|" meaning 'or'
; The last parameter determines whether the matching will be case sensitive
Protected s ; Position within string$
Protected w$ ; Wildcard string that may contain "?" or "*" but not "|"
Protected w ; Index to wildcard strings in pattern$
Protected q$ ; Query string that may contain "?" but not "*"
Protected q ; Index to query strings in w$
; Try to simplify the pattern
While FindString(pattern$, "**", 1)
  pattern$=ReplaceString(pattern$, "**", "*")
Wend
While FindString(pattern$, "||", 1)
  pattern$=ReplaceString(pattern$, "||", "|")
Wend
If Left(pattern$,1)="|": pattern$=Mid(pattern$, 2, 9999): EndIf
If Right(pattern$,1)="|": pattern$=Left(pattern$, Len(pattern$)-1): EndIf
; Check for string case sensitivity
If Not casesensitive
  string$ = LCase(string$): pattern$ = LCase(pattern$)
EndIf
; Loop for each wildcard string
string$="|"+string$+"|" ; Add start/end anchorages
For w=1 To CountString(pattern$, "|")+1
  w$="|"+StringField(pattern$, w, "|")+"|"  ; Add matching anchorages
  s=1 ; Position at start of string$
  ; Loop for each query string
  For q=1 To CountString(w$, "*")+1
    q$=StringField(w$, q, "*")
    s=QueryMatch(string$, q$, s)
    If s=0: Break: EndIf ; Unmatched so far
  Next q
  If s: ProcedureReturn #True: EndIf ; Match
Next w
ProcedureReturn #False ; No match
EndProcedure

; Examples
Define text$="abracadabra"
Debug "01  "+Str(Match(text$, "*????*"))
Debug "02  "+Str(Match(text$, "*DAB*"))
Debug "03  "+Str(Match(text$, "*DAB*", #True)) ; Case sensitive
Debug "04  "+Str(Match(text$, "ab|*xyz*|abra?|bra"))
Debug "05  "+Str(Match(text$, "???????????"))
Debug "06  "+Str(Match(text$, "???????"))
Debug "07  "+Str(Match(text$, "*"))
Debug "08  "+Str(Match(text$, "|"))
Debug "09  "+Str(Match(text$, "*a*a*a*a*a"))
Debug "10  "+Str(Match(" position: relative;", " *ion: *;"))
Debug "11  "+Str(Match("", ""))

End

_________________
Anthony Jordan


Top
 Profile  
 
 Post subject:
PostPosted: Sun May 13, 2007 2:20 am 
Offline
Enthusiast
Enthusiast
User avatar

Joined: Wed Jun 09, 2004 11:10 am
Posts: 162
Location: Germany
Hey BlueZNL! You have a small mistake in your code ;)

Code:
      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = false
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)

should be
Code:
      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = #False
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)


as the variable false is not defined in most codes it won't make a real difference but it should be fixed anyways ;)

here's your full code with that correction and the call to x_max replaced with some proper code:
Code:
Procedure.l x_matchpattern(string.s,pattern.s,ulcase.l)              ; check if a string matches one or more patterns  (wildcards *? separator |)
  ;
  ; *** check if string matches one or more patters using * and ? as wildcards
  ;
  ; in:     string.s                             - string to check
  ;         pattern.s                            - pattern including wildcards * and ? and multiple patterns seperated by |
  ;         ulcase.l    = 0 (#x_parse_matchcase) - check case
  ;                     = 1 (#x_parse_anycase)   - don't care about case
  ; retval: 0 or #False                          - no match
  ;         1 or #True                           - match
  ;
  ; notes:
  ;
  ; - ? must be filled by a character, this is different in windows / dos where it can be empty
  ; - note that an 'empty' pattern will matches an empty original
  ; - rewrite number three, and it's still horrible
  ;
  If ulcase = 1
    pattern = LCase(pattern)
    string = LCase(string)
  EndIf
  ;
  p_p = 0
  p_l = Len(pattern)
  s_l = Len(string)
  ;
  If s_l = 0                                            ; 2265 handle emtpy strings
    If p_l = 0
      match = #True
    ElseIf FindString(pattern,"||",1) > 0
      match = #True
    ElseIf Right(pattern,1) = "|"
      match = #True
    Else
      match = #False
    EndIf
  Else
    Repeat
      s_p = 1
      ;
      Repeat
        ff_max = 0
        ff_min = 0
        p_sub.s = ""
        p_char.s = ""
        done = #False
        match = #False
        Repeat
          p_p = p_p+1
          p_char = Mid(pattern,p_p,1)
          If p_char = "*"
            ff_max = s_l
          ElseIf p_char = "?"
            ff_min = ff_min+1
            If ff_min > ff_max
              ff_max = ff_min
            EndIf
          ElseIf p_char = "|"
            done = #True
          Else
            p_sub = p_sub+p_char
            Repeat
              p_char = Mid(pattern,p_p+1,1)
              If p_char = "*" Or p_char = "?"
                done = #True
              ElseIf p_char = "|"
                done = #True
                ; p_p = p_p+1                               ; 2265 small quickfix... hope it works
              Else
                p_sub = p_sub+p_char
                If p_p < p_l
                  p_p = p_p+1
                Else
                  done = #True
                EndIf
              EndIf
            Until done = #True
          EndIf
          If p_p >= p_l
            done = #True
          EndIf
        Until done = #True
        ;
        If p_sub = ""
          If s_l-s_p+1 <= ff_max And s_l-s_p+1 >= ff_min
            match = #True
          Else
          EndIf
          s_p = s_l+1
        Else
          x = FindString(string,p_sub,s_p)
          If x > 0
            If x-s_p > ff_max
            ElseIf x-s_p < ff_min
            Else
              match = #True
            EndIf
            s_p = x+Len(p_sub)
          Else
            s_p = s_l+1
          EndIf
        EndIf
      Until match = #False Or s_p = s_l+1
      ;
      If p_p < p_l
        If match = #False
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
          Wend
        Else
          While p_p < p_l And p_char <> "|"
            p_p = p_p+1
            p_char = Mid(pattern,p_p,1)
            If p_char = "|"
            ElseIf p_char <> "*"
              match = #False
            EndIf
          Wend
        EndIf
      EndIf
    Until match = #True Or p_p >= p_l
  EndIf
  ;
  ProcedureReturn match
EndProcedure


Top
 Profile  
 
 Post subject:
PostPosted: Fri Nov 07, 2008 10:21 pm 
Offline
Enthusiast
Enthusiast
User avatar

Joined: Wed Jun 09, 2004 11:10 am
Posts: 162
Location: Germany
I wrote a new wildcard checking engine... It should perform better the others in ths thread...

So here are the current sources:
Code:
; Author: DaDummy

Structure wc_new1_data ; 2008-12-28
  ; attention: this structure is shared across all recursive calls - marked content musn't be invalidated
  starCount.l ; preserve
  s_pos.l
  p_pos.l
  s.s         ; preserve
  p.s         ; preserve
EndStructure

Procedure   _WildcardCompare_new1_recursiveSearch(*vars.wc_new1_data) ; 2008-11-12
  Protected *s_c.character, *p_c.character, tmp.l
 
  ; restore last saved position
  With *vars
    *s_c = \s_pos
    *p_c = \p_pos
  EndWith
 
  While *p_c\c = '*' Or *p_c\c = '?' ; move till next non-wildcard char
    If *p_c\c = '?' ; handle questionmarks
      If *s_c\c = 0 ; there should be another char but there is none
        ProcedureReturn #False
      EndIf
     
      *s_c + SizeOf(character)
    EndIf
    *p_c + SizeOf(character)
  Wend
 
  If *p_c\c = 0 ; no regular character after the last star and string did match so far
    ProcedureReturn #True
  EndIf
 
  *vars\p_pos = *p_c
 
  If *s_c\c = 0 ; string finished and there are characters left in the pattern
    ProcedureReturn #False
  EndIf
 
  Repeat
    While *p_c\c <> *s_c\c And *s_c\c ; find candidates for next part
      *s_c + SizeOf(character)
    Wend
   
    If *s_c\c = 0 ; string finished and there are characters left in the pattern
      ProcedureReturn #False
    EndIf
   
    ; save a loop as the current character was checked in the last loop
    *p_c + SizeOf(character)
    *s_c + SizeOf(character)
   
    ; save current position
    With *vars
      \p_pos = *p_c
      \s_pos = *s_c
    EndWith
   
    While (*p_c\c = *s_c\c Or *p_c\c = '?') And *s_c\c
      *p_c + SizeOf(character)
      *s_c + SizeOf(character)
    Wend
   
    Select *p_c\c
      Case 0
        If *s_c\c = 0 ; string and pattern did match
          ProcedureReturn #True
        Else ; pattern is done, string is not
          ProcedureReturn #False
        EndIf
       
      Case '*'
        ; save current position and restore last saved position
        With *vars
          ; Swap \p_pos, *p_c
          tmp    = \p_pos
          \p_pos = *p_c
          *p_c   = tmp
         
          ; Swap \s_pos, *s_c
          tmp    = \s_pos
          \s_pos = *s_c
          *s_c   = tmp
        EndWith
       
        ; next recursion (one per star)
        If _WildcardCompare_new1_recursiveSearch(*vars)
          ProcedureReturn #True
        EndIf
       
      Default
        If *s_c\c = 0 ; string is done, pattern is not
          ProcedureReturn #False
        Else
          ; restore last saved position
          With *vars
            *p_c = \p_pos
            *s_c = \s_pos
          EndWith
        EndIf
    EndSelect
  ForEver
EndProcedure

Procedure   WildcardCompare_new1(string.s, pattern.s) ; 2008-12-28
  ; remarks:
  ; - pattern should not contain multiple stars in a row to optimze performance
  ; - string is not allowed to contain stars
  ; - empty string only matches empty string
  ; - '?' must to be filled with a character
  ; - attention: 'vars' is shared across all recursive calls - marked content musn't be invalidated
 
  Protected vars.wc_new1_data
  Protected *s_c.character, *p_c.character
 
  ; stack is zeroed by pb so the following lines are commented
  ; vars\starCount = 0
 
  With vars
    \s = string
    \p = pattern
  EndWith
 
  *s_c = @vars\s
  *p_c = @vars\p
 
  While (vars\starCount Or *p_c\c = *s_c\c Or *p_c\c = '?' Or *p_c\c = '*') And *p_c\c And *s_c\c
    Select *p_c\c
      Case '*' ; found first wildcard && and strings match so far
        If vars\starCount = 0 ; if this is the first star
          ; save current position
          With vars
            \p_pos = *p_c
            \s_pos = *s_c
          EndWith
        EndIf
       
        vars\starCount + 1
    EndSelect
   
    *p_c + SizeOf(character)
    *s_c + SizeOf(character)
  Wend
 
  Select vars\starCount
    Case 0 ; pattern does not contain any stars
      If *p_c\c = 0 And *s_c\c = 0 ; string = pattern
        ProcedureReturn #True
      Else ; len(string) < len(pattern) || len(string) > len(pattern) || strinng <> pattern
        ProcedureReturn #False
      EndIf
     
    Case 1 ; special treatment for patterns with only 1 star
      If *p_c\c
        *p_c + SizeOf(character)
       
        If *p_c\c ; len(string) < (len(pattern) - starCount)
          ProcedureReturn #False
        EndIf
      EndIf
     
      While *s_c\c
        *s_c + SizeOf(character)
      Wend
     
      ; save a loop as both are 0 right now
      *p_c - SizeOf(character)
      *s_c - SizeOf(character)
     
      While (*p_c\c = *s_c\c Or *p_c\c = '?') ; there is a star so this will return before the beginning of the string
        *p_c - SizeOf(character)
        *s_c - SizeOf(character)
      Wend
     
      If *p_c\c = '*' ; everything around the star does match
        ProcedureReturn #True
      Else
        ProcedureReturn #False
      EndIf
    Default ; more than 1 star - do recursive search
      ProcedureReturn _WildcardCompare_new1_recursiveSearch(vars)
  EndSelect
EndProcedure

Procedure.s WildcardCompare_new1_optimizePattern(pattern.s) ; 2008-11-12
  Protected retVal.s
  Protected *p_c.character, *r_c.character
 
  *p_c = @pattern
 
  While *p_c\c
    If *p_c\c = '*' And Right(retVal, 1) <> "*"
      retVal + Chr(*p_c\c)
    ElseIf *p_c\c <> '*'
      retVal + Chr(*p_c\c)
    EndIf
   
    *p_c + SizeOf(character)
  Wend
 
  ProcedureReturn retVal
EndProcedure

Procedure   WildcardCompare_new1_checkString(string.s) ; 2008-11-12
  If FindString(string, "*", 1) Or FindString(string, "?", 1)
    ProcedureReturn #False
  Else
    ProcedureReturn #True
  EndIf
EndProcedure

Procedure   WildcardCompare_new1_multiplePatterns(string.s, patterns.s) ; 2008-11-12
  Protected i.l, count.l
 
  count = CountString(patterns, "|")
 
  For i = 1 To count + 1
    If WildcardCompare_new1(string, StringField(patterns, i, "|"))
      ProcedureReturn #True
    EndIf
  Next
 
  ProcedureReturn #False
EndProcedure


2008-12-28:
-Fixed a buffer overflow that could happen in rare cases
-Removed some useless lines


Last edited by Dummy on Fri Nov 28, 2008 2:20 pm, edited 6 times in total.

Top
 Profile  
 
 Post subject:
PostPosted: Sat Nov 08, 2008 12:28 am 
Offline
Addict
Addict

Joined: Wed Aug 24, 2005 8:39 am
Posts: 2559
Location: Southwest OH - USA
thanks. I had used the other one.

will test this one as i have some more time available.

cheers


Top
 Profile  
 
 Post subject:
PostPosted: Mon Nov 10, 2008 3:29 am 
Offline
Enthusiast
Enthusiast
User avatar

Joined: Wed Jun 09, 2004 11:10 am
Posts: 162
Location: Germany
I ran some tests to compare speed and find the fastest function...

The one by Johan_Haegg seems to be the fastest but I didn't take it into account as it doesn't support the '?'-symbol.

My testing code and files:
http://www.dashtec.net/wildcards.rar

And my results:
Code:
*/tp-downloads/*/.htaccess: blueznl: 1638ms akj: 1591ms dummy: 405ms
*avatars: blueznl: 874ms akj: 1201ms dummy: 359ms
*avatars*: blueznl: 842ms akj: 1264ms dummy: 359ms
/srv: blueznl: 811ms akj: 1108ms dummy: 312ms
/srv/www/bfl*: blueznl: 1248ms akj: 1606ms dummy: 344ms
/srv/*/bfl/*: blueznl: 1154ms akj: 1794ms dummy: 343ms
*/srv/*/bfl/*: blueznl: 1201ms akj: 2184ms dummy: 359ms
*srv/*/bfl/*: blueznl: 1124ms akj: 2121ms dummy: 359ms
*srv/*/bfl?*: blueznl: 1092ms akj: 1981ms dummy: 297ms
*srv/*/b?l/*: blueznl: 982ms akj: 1763ms dummy: 281ms
*?rv/*/bfl/*: blueznl: 920ms akj: 1763ms dummy: 281ms
????: blueznl: 281ms akj: 967ms dummy: 265ms


Top
 Profile  
 
 Post subject:
PostPosted: Sun Nov 30, 2008 3:11 pm 
Offline
User
User
User avatar

Joined: Sat May 22, 2004 1:38 am
Posts: 69
here is one even more complex ;)
(not for x64)

Code:
#WildCardsStringMatchEscapeCharacter = '/'

Declare.l EvalWildCardsStringMatch(string.s,wildcards.s,flags.l)
Declare.l WildCardsStringMatch(string.s,wildcards.s,flag.l)

Procedure.l EvalWildCardsStringMatch(string.s,wildcards.s,flags.l)
 *char.CHARACTER=@wildcards
 lastChar.c=0
 Part$=""
 Result$=""
 Negate=#False
 While *char\c<>0
  Select *char\c
   Case '|'
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
     Default
      Result$+Str(WildCardsStringMatch(string,Part$,flags) ! Negate)+"|"
      Part$=""
      Negate=#False
    EndSelect
    lastChar=0
   Case '&'
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
     Default
      Result$+Str(WildCardsStringMatch(string,Part$,flags) ! Negate)+"&"
      Part$=""
      Negate=#False
    EndSelect
    lastChar=0
   Case '~'
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
     Default
      If Part$="" And Negate=#False
       Negate=#True
      Else
       Part$+Chr(*char\c)
      EndIf
    EndSelect
    lastChar=0
   Case #WildCardsStringMatchEscapeCharacter
    Select lastChar
     Case #WildCardsStringMatchEscapeCharacter
      Part$+Chr(*char\c)
      lastChar=0
     Default
      lastChar=#WildCardsStringMatchEscapeCharacter
    EndSelect
   Default
    If lastChar=0
     Part$+Chr(*char\c)
    Else
     Part$+Chr(lastChar)+Chr(*char\c)
     lastChar=0
    EndIf
  EndSelect
  *char+1+#PB_Compiler_Unicode
 Wend
 Result$+Str(WildCardsStringMatch(string,Part$,flags) ! Negate)

 If Len(Result$)>1
  DataSection
   !@@wcsm_eval_ORs:
    Data.l 0
   !@@wcsm_eval_ANDs:
    Data.l 0
  EndDataSection
  !MOV esi, dword [p.v_Result$]
  CompilerIf #PB_Compiler_Unicode
  !MOVZX ebx, word [esi]
  CompilerElse
  !MOVZX ebx, byte [esi]
  CompilerEndIf
  !CMP ebx, 0
  !JZ @@wcsm_eval_wend
  !@@wcsm_eval_while:
   !CMP ebx, '|'
   !JNE @@wcsm_eval_case2
    !@@wcsm_eval_case1:;case '|'
     !MOV ecx, dword [@@wcsm_eval_ORs]
     !JECXZ @@wcsm_eval_subwend1
     !@@wcsm_eval_subwhile1:
      !POP eax
      !POP edx
      !OR eax, edx
      !PUSH eax
      !DEC ecx
     !JNZ @@wcsm_eval_subwhile1
     !@@wcsm_eval_subwend1:
     !INC ecx
     !MOV dword [@@wcsm_eval_ORs], ecx
     !JMP @@wcsm_eval_endselect
    !@@wcsm_eval_case2:;case '&'
    !CMP ebx, '&'
    !JNE @@wcsm_eval_default
     !INC dword [@@wcsm_eval_ANDs]
     !JMP @@wcsm_eval_endselect
    !@@wcsm_eval_default:;default
     !SUB ebx, 30h
     !PUSH ebx
     !MOV ecx, dword [@@wcsm_eval_ANDs]
     !JECXZ @@wcsm_eval_subwend2
     !@@wcsm_eval_subwhile2:
      !POP eax
      !POP edx
      !AND eax, edx
      !PUSH eax
      !DEC ecx
     !JNZ @@wcsm_eval_subwhile2
     !@@wcsm_eval_subwend2:
     !MOV dword [@@wcsm_eval_ANDs], ecx
    !@@wcsm_eval_endselect:
   CompilerIf #PB_Compiler_Unicode
   !INC esi
   !INC esi
   !MOVZX ebx, word [esi]
   CompilerElse
   !INC esi
   !MOVZX ebx, byte [esi]
   CompilerEndIf
   !CMP ebx, 0
   !JNZ @@wcsm_eval_while
  !@@wcsm_eval_wend:
  !MOV ecx, dword [@@wcsm_eval_ORs]
  !JECXZ @@wcsm_eval_subwend3
  !@@wcsm_eval_subwhile3:
   !POP eax
   !POP edx
   !OR eax, edx
   !PUSH eax
   !DEC ecx
  !JNZ @@wcsm_eval_subwhile3
  !@@wcsm_eval_subwend3:
  !MOV dword [@@wcsm_eval_ORs], ecx
  !POP eax
  ProcedureReturn
 Else
  ProcedureReturn Val(Result$)
 EndIf
EndProcedure

Procedure.l WildCardsStringMatch(string.s,wildcards.s,flag.l)
 If flag=0
  string=LCase(string)
  wildcards=LCase(wildcards)
 EndIf
 If wildcards=""
  ProcedureReturn #True
 EndIf
 If Left(wildcards,1)<>"*" And Left(wildcards,1)<>"?" And Left(wildcards,1)<>"#" And Right(wildcards,1)<>"*" And Right(wildcards,1)<>"?" And Right(wildcards,1)<>"#" : wildcards="*"+wildcards+"*" : EndIf
 *Wide1.CHARACTER
 *Wide2.CHARACTER
 *pos.CHARACTER=@string
 *char.CHARACTER=@wildcards
 *sPos.CHARACTER
 CompilerIf #PB_Compiler_Unicode
  Macro UnicodeNumberWildCardCompare(var)
   ((var>='0' And var<='9') Or (var>=$FF10 And var<=$FF19) Or (var>=$00BC And var<=$00BE) Or (var>=2153 And var<=$2182) Or (var>=$2070 And var<=$2079) Or (var>=$2080 And var<=$2089) Or (var>=$2460 And var<=$249B) Or (var>=$2776 And var<=$2793) Or (var>=$3220 And var<=$3229) Or (var>=$3280 And var<=$3289))
  EndMacro
 CompilerElse
  Macro UnicodeNumberWildCardCompare(var)
   ((var>='0' And var<='9') Or var=$B9 Or var=$B2 Or var=$B3 Or (var>=$BC And var<=$BE))
  EndMacro
 CompilerEndIf
 Repeat
  Select *char\c
   Case '*'
    *Wide1=*pos
    *Wide2=*char+1+#PB_Compiler_Unicode
    If *Wide2\c=0 Or (*Wide1\c=0 And RemoveString(wildcards,"*")="")
     ProcedureReturn #True
    EndIf
    If *Wide1\c=0
     ProcedureReturn #False
    EndIf
    *sPos=*char+1+#PB_Compiler_Unicode
    While *sPos\c='*'
     *char=*sPos
     *sPos+1+#PB_Compiler_Unicode
    Wend
    If *sPos\c=*pos\c Or *sPos\c='?' Or (*sPos\c='#' And UnicodeNumberWildCardCompare(*pos\c))
     *Wide2=*sPos
     While *Wide2\c<>'*' And *Wide2\c<>0
      If *Wide1\c=*Wide2\c Or *Wide2\c='?' Or (*Wide2\c='#' And UnicodeNumberWildCardCompare(*Wide1\c))
       *Wide1+1+#PB_Compiler_Unicode
       *Wide2+1+#PB_Compiler_Unicode
      Else
       If *sPos\c=*Wide1\c
        *Wide2=*char+1+#PB_Compiler_Unicode
       Else
        *Wide1+1+#PB_Compiler_Unicode
        *Wide2=*sPos
       EndIf
      EndIf
      If *Wide1\c=0
       While *Wide2\c='*'
        *Wide2+1+#PB_Compiler_Unicode
       Wend
       If *Wide2\c=0
        ProcedureReturn #True
       Else
        ProcedureReturn #False
       EndIf
      EndIf
     Wend
     If *Wide2\c='*'
      *pos=*Wide1
      *sPos=*Wide2
     Else
      If *Wide1\c=*Wide2\c And *Wide2\c=0
       ProcedureReturn #True
      Else
       If *Wide2\c=0 And *sPos\c<>'*'
        If *Wide1\c=0
         ProcedureReturn #False
        Else
         *pos+1+#PB_Compiler_Unicode
         *sPos=*char
        EndIf
       Else
        *pos=*Wide1
       EndIf
      EndIf
     EndIf
     *char=*sPos
    Else
     *pos+1+#PB_Compiler_Unicode
    EndIf
   Case '?'
    If *pos\c<>0
     *pos+1+#PB_Compiler_Unicode
     *char+1+#PB_Compiler_Unicode
     If *pos\c<>0 And *char\c=0
      ProcedureReturn #False
     EndIf
    Else
     ProcedureReturn #False
    EndIf
   Case '#'
    If UnicodeNumberWildCardCompare(*pos\c)
     *pos+1+#PB_Compiler_Unicode
     *char+1+#PB_Compiler_Unicode
     If *pos\c<>0 And *char\c=0
      ProcedureReturn #False
     EndIf
    Else
     ProcedureReturn #False
    EndIf
   Default
    If *pos\c=*char\c
     *pos+1+#PB_Compiler_Unicode
     *char+1+#PB_Compiler_Unicode
    Else
     ProcedureReturn #False
    EndIf
  EndSelect
 Until *char\c=0
 ProcedureReturn #True
EndProcedure

Debug "Should be FALSE"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*??D*",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","?*a",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*1",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*?a",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","defg",1))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("w ind ind","wind",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("w ind ind","wind&",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc","a*???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab..abc","*.??",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab.abc","*.????",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b/|c","a/&b&///&c",0))
Debug "- - - - -"
Debug "Should be TRUE"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*??*D*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","?Bc*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def-ghi.,;jkl","*g?i.*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_def---ghi...jkl","*-g?i.*k?",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_.jkldef---ghi..jkl","*.jkl*.jkl",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("abc_.jkldef---ghi..jkl","~defg&~*?a&*??D*|~*-?-*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab1c2_","*ab#c#_*",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab.abc","*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab..abc","*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("ab...abc","*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("/~b...abc","//~*.???",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","a/&b/|c",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b/|c","a&b&///|c",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","a/&b|c/",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","~~c",0))
Debug "- - - - -"
Debug "return = "+Str(EvalWildCardsStringMatch("a&b|c","",0))


Top
 Profile  
 
Display posts from previous:  Sort by  
Post new topic Reply to topic  [ 17 posts ]  Go to page 1, 2  Next

All times are UTC + 1 hour


Who is online

Users browsing this forum: No registered users and 1 guest


You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum

Search for:
Jump to:  

 


Powered by phpBB © 2008 phpBB Group
subSilver+ theme by Canver Software, sponsor Sanal Modifiye