Google-type string testing
Posted: Sun Apr 10, 2011 2:18 am
This is some code I've been using for a while. It works, even for complex search parameters (although I'm sure somebody will find a bug!).
It allows you to test whether strings match a given search pattern, in the manner of a Google search. In the example provided, the search pattern is
Some necessary macros for processing strings are included, which people may find useful for other projects.
Hope someone can use it!
It allows you to test whether strings match a given search pattern, in the manner of a Google search. In the example provided, the search pattern is
which is quite complex."innocent movie-watcher" -popcorn-
Some necessary macros for processing strings are included, which people may find useful for other projects.
Code: Select all
Global c10.s = Chr(10)
Global c13.s = Chr(13)
Global c32.s = Chr(32)
Global c34.s = Chr(34)
Global c39.s = Chr(39)
Procedure.s ByteYesNo(b.b)
If b
ProcedureReturn "Yes"
Else
ProcedureReturn "No"
EndIf
EndProcedure
Macro RecursiveReplaceThisString(t,a,b,mode=1)
While FindString(t,a,1)
t = ReplaceString(t,a,b,mode)
Wend
EndMacro
Procedure.s RecursiveReplaceString(t.s,a.s,b.s,mode.b=0)
RecursiveReplaceThisString(t,a,b,mode)
ProcedureReturn t
EndProcedure
Macro EnsureThisStart(t,start)
If start<>""
If Left(t,Len(start)) <> start
t = start+t
EndIf
EndIf
EndMacro
Macro EnsureThisEnd(t,endd)
If endd<>""
If Right(t,Len(endd)) <> endd
t+endd
EndIf
EndIf
EndMacro
Macro EnsureThisNotStart(t,start)
If Left(t,Len(start)) = start
t = Mid(t,Len(start)+1,Len(t))
EndIf
EndMacro
Macro R(t)
MessageRequester("Report",t,0)
EndMacro
; --------------------------------------------------------------------------------------------------
Procedure.s BreakIntoWords(str.s)
str = RemoveString(str,c10)
Static NewList villain.s()
If Not ListSize(villain())
AddElement(villain()) : villain() = c13
AddElement(villain()) : villain() = "!"
AddElement(villain()) : villain() = "?"
AddElement(villain()) : villain() = c34
AddElement(villain()) : villain() = "."
AddElement(villain()) : villain() = ","
AddElement(villain()) : villain() = "|"
AddElement(villain()) : villain() = "("
AddElement(villain()) : villain() = ")"
EndIf
ForEach villain()
str = ReplaceString(str,villain(),c32)
Next
str = c32+str+c32
RecursiveReplaceThisString(str,c32+c32,c32)
ProcedureReturn str
EndProcedure
Structure GoogleStringMatcher
List want.s()
List dontwant.s()
viable.b
EndStructure
Procedure ReportGoogleSearchParameters(*g.GoogleStringMatcher,origstr.s)
ssreport.s = "SEARCH STRING..."+c13+origstr+c13+c13+"Is valid for matching: "+UCase(ByteYesNo(*g\viable))+c13+c13+"Wants: "+Str(ListSize(*g\want()))+c13
ForEach *g\want()
ssreport+" "+*g\want()+c13
Next
ssreport+c13+"Don't wants: "+Str(ListSize(*g\dontwant()))+c13
ForEach *g\dontwant()
ssreport+" "+*g\dontwant()+c13
Next
R(ssreport)
EndProcedure
Procedure.b GoogleTestString(*g.GoogleStringMatcher,string.s,casemode.b,wholewordsonly.b=#True,reportstring.b=#False)
If Not *g\viable
ProcedureReturn #False
EndIf
dot.s = c32
If casemode=#PB_String_NoCase
string = LCase(string)
EndIf
If wholewordsonly
string = BreakIntoWords(string)
Else
string = Trim(string)
EndIf
NewList want.s()
CopyList(*g\want(),want())
NewList dontwant.s()
CopyList(*g\dontwant(),dontwant())
ForEach want()
If casemode=#PB_String_NoCase
want() = LCase(want())
EndIf
If wholewordsonly
want() = BreakIntoWords(want())
Else
want() = Trim(want())
EndIf
Next
ForEach dontwant()
dontwant() = *g\dontwant()
If casemode=#PB_String_NoCase
dontwant() = LCase(dontwant())
EndIf
If wholewordsonly
dontwant() = BreakIntoWords(dontwant())
Else
dontwant() = Trim(dontwant())
EndIf
Next
string = ReplaceString(string,"<p>",c32,#PB_String_NoCase)
string = ReplaceString(string,"<br>",c32,#PB_String_NoCase)
If reportstring : R(string) : EndIf
; now search text
ForEach want()
If Not FindString(string,want(),0)
ProcedureReturn #False
EndIf
Next
ForEach dontwant()
If FindString(string,dontwant(),0)
ProcedureReturn #False
EndIf
Next
ProcedureReturn #True
EndProcedure
Enumeration
#GoogleStringParse_ModePositive
#GoogleStringParse_ModePositivePhrase
#GoogleStringParse_ModeNegative
#GoogleStringParse_ModeNegativePhrase
EndEnumeration
Procedure.b ParseGoogleSearchString(string.s,*g.GoogleStringMatcher)
;string = "nice good 'nice person' great -bad- -evil- -horrible- -'bad person'- -'villainous people'-"
;R("Proc: ParseSearchString")
InitializeStructure(*g,GoogleStringMatcher)
If Not string
*g\viable = #False
ProcedureReturn #False
EndIf
string = ReplaceString(string,c39,c34)
EnsureThisEnd(string,c32)
EnsureThisStart(string,c32)
string = ReplaceString(string,"-"+c34,"¬")
string = ReplaceString(string,c34+"-","¬")
string = ReplaceString(string,c32+"-",c32+"`") ; single negative words (opener)
string = ReplaceString(string,"-"+c32,"`"+c32) ; single negative words (closer)
RecursiveReplaceThisString(string,c32+c32,c32)
;R(string)
EnsureThisNotStart(string,c32)
mode=#GoogleStringParse_ModePositive
For a = 1 To Len(string)
letter.s = Mid(string,a,1)
Select letter
Case "¬"
Select mode ; negative phrases
Case #GoogleStringParse_ModeNegativePhrase
mode=#GoogleStringParse_ModePositive
dontwants$+"|"
Case #GoogleStringParse_ModePositive
mode=#GoogleStringParse_ModeNegativePhrase
EndSelect
Case "`" ; single negative words
Select mode
Case #GoogleStringParse_ModeNegative
mode=#GoogleStringParse_ModePositive
dontwants$+"|"
Case #GoogleStringParse_ModePositive
mode=#GoogleStringParse_ModeNegative
EndSelect
Case Chr(34) ; positive phrases
Select mode
Case #GoogleStringParse_ModePositivePhrase
mode=#GoogleStringParse_ModePositive
wants$+"|"
Case #GoogleStringParse_ModePositive
mode=#GoogleStringParse_ModePositivePhrase
EndSelect
Case Chr(32)
Select mode
Case #GoogleStringParse_ModePositive
wants$+"|"
Case #GoogleStringParse_ModeNegative
dontwants$+"|"
Case #GoogleStringParse_ModeNegativePhrase
dontwants$+c32
Case #GoogleStringParse_ModePositivePhrase
wants$+c32
EndSelect
Default ; single positive words
If mode=#GoogleStringParse_ModeNegative Or mode=#GoogleStringParse_ModeNegativePhrase
dontwants$+letter
Else
wants$+letter
EndIf
EndSelect
Next
RecursiveReplaceThisString(wants$,"||","|")
RecursiveReplaceThisString(dontwants$,"||","|")
ClearList(*g\want())
items = CountString(wants$,"|")
For a = 1 To items
AddElement(*g\want())
*g\want() = StringField(wants$,a,"|")
Next a
ClearList(*g\dontwant())
items = CountString(dontwants$,"|")
For a = 1 To items
AddElement(*g\dontwant())
*g\dontwant() = StringField(dontwants$,a,"|")
Next a
If ListSize(*g\want()) Or ListSize(*g\dontwant())
*g\viable = #True
EndIf
ProcedureReturn *g\viable
EndProcedure
params.s = c34+"innocent movie-watcher"+c34+" -popcorn-"
ParseGoogleSearchString(params,@g.GoogleStringMatcher)
ReportGoogleSearchParameters(@g,params)
Debug GoogleTestString(@g,"He was watching movies.",0,#True)
Debug GoogleTestString(@g,"He was an "+c34+"innocent movie-watcher"+c34+".",0,#True)
Debug GoogleTestString(@g,"He was an "+c34+"innocent movie-watcher"+c34+", according to reports, and he liked popcorn.",0,#True)