
inspired by this code (Converting a google search query to PostgreSQL tsquery), I've added the code that generate the Lexer :
Code: Select all
; Program : PROCEDURES_ParseQuery.pbi
; Comment : Generates PostgreSql to_tsquery
; Author : Flaith
; Create : 24.11.2013
; Update : 27.11.2013
; Inspired of the PHP code from Peter Bailey
; http://stackoverflow.com/questions/207817/converting-a-google-search-query-to-a-postgresql-tsquery#tab-top
;-Modules declaration
DeclareModule HandleString
; sPrintf
; =======
; From mk-soft
; http://www.purebasic.fr/english/viewtopic.php?p=241016
Declare.s sPrintf(text.s, *value1 = 0, *value2 = 0, *value3 = 0, *value4 = 0, *value5 = 0, *value6 = 0, *value7 = 0, *value8 = 0, *value9 = 0, *value10 = 0, *value11 = 0 )
; L/R/TrimChars
; =============
; From Little John
; http://www.purebasic.fr/english/viewtopic.php?p=411500#p411500
#WHITESPACE$ = " " + #TAB$ + #CRLF$
Declare.s LTrimChars (source$, charlist$=#WHITESPACE$)
Declare.s RTrimChars (source$, charlist$=#WHITESPACE$)
Declare.s TrimChars (source$, charlist$=#WHITESPACE$)
EndDeclareModule
DeclareModule QueryPhrase
; Constants and structures are declare as public
#MODE_DEFAULT = 1
#MODE_OR = 2
#MODE_AND = 3
#MODE_EXCLUDE = 4
#PHRASE = 0
#SUBPHRASE = 1
Structure CS_QUERY_PHRASE
Phrase.s
Mode.i
Type.i ; Phrase or subExpression
EndStructure
Structure CS_QUERY_SUBEXPRESSION
List Phrases.CS_QUERY_PHRASE()
EndStructure
Global NewList Phrases.CS_QUERY_PHRASE()
Global NewList SubExpressions.CS_QUERY_SUBEXPRESSION()
Global TYPE.i = #PHRASE
Declare initiateSubExpression()
EndDeclareModule
DeclareModule QueryExpression
Declare AddQueryPhrase(__phrase.s, __type.i = QueryPhrase::#PHRASE, _Mode.i = QueryPhrase::#MODE_DEFAULT)
Declare AddPhrase(__input.s, __type.i)
Declare AddOrPhrase(__input.s, __type.i)
Declare AddAndPhrase(__input.s, __type.i)
Declare AddExclusionPhrase(__input.s, __type.i)
EndDeclareModule
DeclareModule QueryLexer
; Global declare as public
Global.s NewList TokenStack()
Global.s NewList ErrorStack()
Declare ErrToken(__ErrNum.s)
Declare AddToken(__token.s)
Declare.i Strtok(List __Token.s(), __str.s, __sep.s = " ")
Declare.s Execute(__str.s)
EndDeclareModule
DeclareModule QueryParser
Declare.s Parse(__input.s)
Declare.s ParseQuery(__input.s)
Declare ProcessToken(__token.s)
EndDeclareModule
DeclareModule QueryBuilder
Declare.s GetResult()
Declare.s FormatString(__Expression.s, __Mode.i)
Declare ProcessExpression()
EndDeclareModule
;-Modules creation
Module HandleString
Procedure.s sPrintf( text.s, *value1 = 0, *value2 = 0, *value3 = 0, *value4 = 0, *value5 = 0, *value6 = 0, *value7 = 0, *value8 = 0, *value9 = 0, *value10 = 0, *value11 = 0 )
Protected args, *value
Protected result.s, help.s
Protected *text.character
Protected IsValue, IsString, IsLeft, IsVZ, IsNum2, SetFill.s, num1, num2
args = @*value1
*text = @text
Repeat
Select *text\c
Case 0
Break
Case '\'
*text + SizeOf ( character )
Select *text\c
Case 0 : Break
Case '\' : result + "\"
Case 'n' : result + #LF$
Case 'r' : result + #CR$
Case 't' : result + #HT$
Case 'v' : result + #VT$
Case 39 : result + #DQUOTE$ ; (')
Case 'a' : result + #BEL$
Case 'b' : result + #BS$
Case 'f' : result + #FF$
Case '[' : result + #ESC$
Case '0' : result + #NULL$
EndSelect
*text + SizeOf ( character )
Case '%'
help = "?"
IsValue = #False
IsString = #False
IsLeft = #False
IsVZ = #False
IsNum2 = #False
SetFill = " "
num1 = 0
num2 = 0
*text + SizeOf ( character )
*value = PeekI ( args ) ; get pointer to value
Repeat
Select *text\c
Case 0 : Break
Case '-' : IsLeft = #True
Case '+' : IsVZ = #True
Case '.' : IsNum2 = #True
Case '%' : result + "%" : *text + SizeOf ( character ) : Break
Case 39 : *text + SizeOf ( character ) : If *text\c = 0 : Break : Else : SetFill = Chr(*text\c) : EndIf
Case '0' To '9'
If IsNum2 : num2 = num2 * 10 + *text\c - 48 : Else : num1 = num1 * 10 + *text\c - 48 : EndIf
Case 'b'
If *value : help = Str ( PeekB ( *value ) ) : EndIf : IsValue = #True
Case 'w'
If *value : help = Str ( PeekW ( *value ) ) : EndIf : IsValue = #True
Case 'l'
If *value : help = Str ( PeekL ( *value ) ) : EndIf : IsValue = #True
Case 'q'
If *value : help = Str ( PeekQ ( *value ) ) : EndIf : IsValue = #True
Case 'i'
If *value : help = Str ( PeekI ( *value ) ) : EndIf : IsValue = #True
Case 'f'
If *value : help = StrF ( PeekF ( *value ), num2 ) : EndIf : IsValue = #True
Case 'd'
If *value : help = StrD ( PeekD ( *value ), num2 ) : EndIf : IsValue = #True
Case 's'
If *value : help = PeekS ( *value ) : EndIf
If num2 : help = Left ( help, num2 ) : EndIf : IsString = #True
Case 'c'
If *value : help = Chr ( PeekI ( *value ) ) : EndIf : IsString = #True
Case 'X', 'x'
If num2 = 0 : num2 = num1 : EndIf
If *value
Select num2
Case 0 To 2 : help = RSet ( Hex ( PeekB ( *value ), #PB_Byte), num2, "0" )
Case 3 To 4 : help = RSet ( Hex ( PeekW ( *value ), #PB_Word), num2, "0" )
Case 5 To 8 : help = RSet ( Hex ( PeekL ( *value ), #PB_Long), num2, "0" )
Default : help = RSet ( Hex ( PeekQ ( *value ), #PB_Quad), num2, "0" )
EndSelect
EndIf
If *text\c = 'x' : help = LCase ( help ) : EndIf
IsString = #True
Default
IsString = #True
EndSelect
If IsValue And IsVZ
If Left ( help, 1 ) <> "-"
help = "+" + help
EndIf
EndIf
*text + SizeOf(character)
If IsString Or IsValue
If num1 And Len ( help ) < num1
If IsLeft
result + LSet ( help, num1, SetFill )
Else
result + RSet ( help, num1, SetFill )
EndIf
Else
result + help
EndIf
args + SizeOf ( Integer )
Break
EndIf
ForEver
Default
result + Chr ( *text\c )
*text + SizeOf ( character )
EndSelect
ForEver
ProcedureReturn result
EndProcedure
Procedure.s LTrimChars (source$, charlist$=#WHITESPACE$)
; removes from source$ all leading characters which are contained in charlist$
Protected p, last=Len(source$)
p = 1
While p <= last And FindString(charlist$, Mid(source$,p,1)) <> 0
p + 1
Wend
ProcedureReturn Mid(source$, p)
EndProcedure
Procedure.s RTrimChars (source$, charlist$=#WHITESPACE$)
; removes from source$ all trailing characters which are contained in charlist$
Protected p
p = Len(source$)
While p >= 1 And FindString(charlist$, Mid(source$,p,1)) <> 0
p - 1
Wend
ProcedureReturn Left(source$, p)
EndProcedure
Procedure.s TrimChars(source$, charlist$=#WHITESPACE$)
ProcedureReturn LtrimChars(RtrimChars(source$, charlist$), charlist$)
EndProcedure
EndModule
Module QueryPhrase
Procedure initiateSubExpression()
AddElement(SubExpressions())
EndProcedure
EndModule
Module QueryExpression
Procedure AddQueryPhrase(__phrase.s, __type.i = QueryPhrase::#PHRASE, __Mode.i = QueryPhrase::#MODE_DEFAULT)
Select __type
Case QueryPhrase::#PHRASE
AddElement(QueryPhrase::Phrases())
QueryPhrase::Phrases()\Phrase = __phrase
QueryPhrase::Phrases()\Mode = __Mode
QueryPhrase::Phrases()\Type = __type
Case QueryPhrase::#SUBPHRASE
AddElement(QueryPhrase::SubExpressions()\Phrases())
QueryPhrase::SubExpressions()\Phrases()\Phrase = __phrase
QueryPhrase::SubExpressions()\Phrases()\Mode = __Mode
QueryPhrase::SubExpressions()\Phrases()\Type = __type
EndSelect
EndProcedure
Procedure AddPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type)
EndProcedure
Procedure AddOrPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type, QueryPhrase::#MODE_OR)
EndProcedure
Procedure AddAndPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type, QueryPhrase::#MODE_AND)
EndProcedure
Procedure AddExclusionPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type, QueryPhrase::#MODE_EXCLUDE)
EndProcedure
EndModule
Module QueryLexer
Procedure ErrToken(__ErrNum.s)
AddElement(ErrorStack())
ErrorStack() = __ErrNum
EndProcedure
Procedure AddToken(__token.s)
AddElement(TokenStack())
TokenStack() = __token
EndProcedure
Procedure.i Strtok(List __Token.s(), __str.s, __sep.s = " ")
Protected.i _strCount = CountString(__str, __sep)
Protected.i _index
For _index = 1 To _strCount + 1
AddElement(__Token())
__Token() = StringField(__str, _index, __sep)
Next
ProcedureReturn _strCount + 1
EndProcedure
Procedure.s Execute(__str.s)
Protected.i _TokenCount, _index, _posQuote
Protected.i _DQUOTE, _PARENTHESE, _POS_QUOTE_START
Protected.s _OneCar = ""
Protected.s _searchString = ""
Protected.s _LogicalToken, _keyToken, _KeepToken, _KeepTokenTmp, _Token, _TokenTmp, _FullToken
Protected.s NewList _token()
; split each token in a Array
_searchString = LCase(__str)
_TokenCount = Strtok(_token(), _searchString, " ")
If _TokenCount > 0
; Get each token
_DQUOTE = 0
_PARENTHESE = 0
ForEach _token()
_TokenTmp = ""
_FullToken = ""
_index = 1
_POS_QUOTE_START = 0 ; Case where we are on another token and quotes are already open
; so when we extract the whole token With _POS_QUOTE_START + 1,
; we are sure to start at 0 (cf line Case '"' when _DQUOTE > 0)
; Check for Or, And, Not, |, &, -
_keyToken = _token()
Select _keyToken
Case "or"
_LogicalToken = "or "
_keyToken = ""
Case "and"
_LogicalToken = "and "
_keyToken = ""
Case "not"
_LogicalToken = "not "
_keyToken = ""
Case "&"
_LogicalToken = "and "
_keyToken = ""
Case "|"
_LogicalToken = "or "
_keyToken = ""
Case "-"
_LogicalToken = "not "
_keyToken = ""
EndSelect
While _index <= Len(_keyToken)
_OneCar = Mid(_keyToken, _index, 1)
Select _OneCar
Case "("
AddToken("(")
_TokenTmp = ""
_PARENTHESE + 1
Case ")"
If _TokenTmp <> ""
If _LogicalToken <> ""
AddToken(_LogicalToken + Trim(_TokenTmp))
_LogicalToken = ""
Else
AddToken(Trim(_TokenTmp))
EndIf
EndIf
_TokenTmp = ""
AddToken(")")
_PARENTHESE - 1
Case "'"
_index + 1
Case #DQUOTE$
If _DQUOTE > 0 ; Means the checked value has already an open quote, so we can add the token
_Token = Mid(_keyToken, _POS_QUOTE_START + 1, _index - _POS_QUOTE_START - 1)
If _KeepToken <> ""
_FullToken = Trim(_KeepToken) + "$[0]" + Trim(_token) ; or " #0 "
If _LogicalToken <> ""
_FullToken = _LogicalToken + _FullToken
EndIf
AddToken(Trim(_FullToken))
_FullToken = ""
_LogicalToken = ""
_KeepToken = ""
Else
If _LogicalToken <> ""
AddToken(_LogicalToken + Trim(_token))
_LogicalToken = ""
Else
AddToken(Trim(_token)) ; We add the only one token we found between double quotes
EndIf
EndIf
_DQUOTE - 1
Else
; Case it's a new quote, we must keep the token to be add with the one we will find
; when the quote will be closed
; check If we have another quote after this one
_POS_QUOTE_START = _index
_DQUOTE + 1
; If no other Quote, we keep it to be added after
If FindString(_keyToken, #DQUOTE$, _index + 1) = 0
; No so we keep it
_KeepToken + Mid(_keyToken, _index + 1)
EndIf
EndIf
_TokenTmp = ""
Default
_TokenTmp + _OneCar
EndSelect
_index+1
Wend
; We are in the case of a new token appears and the previous had a quote
If _DQUOTE > 0
_posQuote = FindString(_keyToken, #DQUOTE$, 1) ; Check to be sure there is no other quote
If _posQuote = 0 ; No so we keep it
_KeepTokenTmp = _TokenTmp
_KeepToken + "$[0]" + _KeepTokenTmp ; or " #0 "
EndIf
Else
If _TokenTmp <> ""
If _LogicalToken <> ""
AddToken(_LogicalToken + Trim(_TokenTmp))
_LogicalToken = ""
Else
_TokenTmp = Trim(_TokenTmp)
; We start With the first token And there is no quote, nor parenthese, just a word
If ListSize(tokenStack()) = 0
; add parentheses To be sure To have the 'and' between each word
If (Left(_TokenTmp, 1) = "-")
AddToken("not " + Mid(_TokenTmp, 2))
Else
AddToken("(")
AddToken(_TokenTmp)
AddToken(")")
EndIf
Else ; Not empty
If Left(_TokenTmp, 1) = "-" ; If left of token = '-' don't add the 'And'
AddToken("not " + Mid(_TokenTmp, 2))
;addToken(_TokenTmp)
Else
AddToken("and " + _TokenTmp) ; automatic 'and' between all
EndIf
EndIf
EndIf
EndIf
EndIf
Next ; End ForEach
If _DQUOTE <> 0 : ErrToken("#ERR001") : EndIf ; ("Error double quotes does not match");
If _PARENTHESE <> 0 : ErrToken("#ERR002") : EndIf ; ("Error parentheses does not match");
Else
ProcedureReturn ""
EndIf
EndProcedure
EndModule
Module QueryParser
Procedure.s Parse(__input.s)
Protected.s _expression
QueryLexer::Execute(__input)
ForEach QueryLexer::TokenStack()
ProcessToken(QueryLexer::TokenStack())
Next
EndProcedure
Procedure.s ParseQuery(__input.s)
; Clear each time
ClearList(QueryLexer::TokenStack())
ClearList(QueryLexer::ErrorStack())
ClearList(QueryPhrase::Phrases())
ClearList(QueryPhrase::SubExpressions())
QueryParser::Parse(__input)
QueryBuilder::ProcessExpression()
ProcedureReturn QueryBuilder::GetResult()
EndProcedure
Procedure ProcessToken(__token.s)
Protected.s _Modifier, _Phrase, _Operator
Protected.i _type = QueryPhrase::TYPE
Select __token
Case "("
QueryPhrase::TYPE = QueryPhrase::#SUBPHRASE
QueryPhrase::initiateSubExpression()
Case ")"
QueryPhrase::TYPE = QueryPhrase::#PHRASE
Default
_Modifier = Left(__token, 1)
_Phrase = Mid(__token, 2)
Select _Modifier
Case "-"
QueryExpression::AddExclusionPhrase(_Phrase, _type)
Case "+"
QueryExpression::AddPhrase(_Phrase, _type)
Default
_Operator = Trim(Mid(__token, 1, FindString(__token, " ")))
_Phrase = Trim(Mid(__token, FindString(__token, " ")))
Select LCase(_Operator)
Case "and"
QueryExpression::AddAndPhrase(_Phrase, _type)
Case "or"
QueryExpression::AddOrPhrase(_Phrase, _type)
Case "not"
QueryExpression::AddExclusionPhrase(_Phrase, _type)
Default:
QueryExpression::AddPhrase(__token, _type)
EndSelect
EndSelect
EndSelect
EndProcedure
EndModule
Module QueryBuilder
; Private var
Global.s _Query
Procedure.s GetResult()
ProcedureReturn _Query
EndProcedure
Procedure.s FormatString(__Expression.s, __Mode.i)
Protected.s _Expression = HandleString::TrimChars(__Expression, " &|")
Protected.s _Format = "''%s'' "
Select __Mode
Case QueryPhrase::#MODE_AND
_Format = "& ''%s'' "
Case QueryPhrase::#MODE_OR
_Format = "| ''%s'' "
Case QueryPhrase::#MODE_EXCLUDE
_Format = "& !''%s'' "
EndSelect
_Expression = ReplaceString(_Expression, "'", "\\'")
ProcedureReturn HandleString::sPrintf(_Format, @_Expression)
EndProcedure
Procedure ProcessExpression()
Protected.s _Expression, _Format, _QueryTMP
_Query = ""
ForEach QueryPhrase::Phrases()
If QueryPhrase::Phrases()\type = QueryPhrase::#PHRASE
_Query + FormatString(QueryPhrase::Phrases()\Phrase, QueryPhrase::Phrases()\Mode)
EndIf
Next
ForEach QueryPhrase::SubExpressions()
_QueryTMP = ""
ForEach QueryPhrase::SubExpressions()\Phrases()
_QueryTMP + FormatString(QueryPhrase::SubExpressions()\Phrases()\Phrase, QueryPhrase::SubExpressions()\Phrases()\Mode)
Next
_Query + "& (" + _QueryTMP + ") "
Next
_Query = HandleString::TrimChars(_Query, " &|")
EndProcedure
EndModule
;-End modules creation
Code: Select all
XIncludeFile "PROCEDURES_ParseQuery.pbi"
Procedure showQuery(__query.s)
Debug "Result for Query : "+__query+ #CRLF$ + QueryParser::ParseQuery(__query)
Debug "-------------------------------------------------"
EndProcedure
str1$ = "("+#DQUOTE$+"sales director"+#DQUOTE$+" Or "+#DQUOTE$+"sale"+#DQUOTE$+") "+#DQUOTE$+"team leader"+#DQUOTE$+" ("+#DQUOTE$+"ho chi minh"+#DQUOTE$+" And male) -executive -fmcg Not representative"
str2$ = "java "+#DQUOTE$+"technical architect"+#DQUOTE$
str3$ = #DQUOTE$+"qa manager"+#DQUOTE$+" -fmcg"
str4$ = #DQUOTE$+"qa manager"+#DQUOTE$+" Or -fmcg"
str5$ = "GENERAL MANAGER"
str6$ = "manager java developer"
str7$ = "-fmcg"
str8$ = "java -fmcg"
str9$ = "("+#DQUOTE$+"used cars"+#DQUOTE$+" Or "+#DQUOTE$+"new cars"+#DQUOTE$+") -ford -mistubishi"
; --- Result for str1$ must be :
; ''team #0 leader'' & !''executive'' & !''fmcg'' & !''representative'' & (''sales #0 director'' | ''sale'' ) & (''ho #0 chi #0 minh'' & ''male'' )
; --- (27/11/2013) result :
; ''team$[0]leader'' & !''executive'' & !''fmcg'' & !''representative'' & (''sales$[0]director'' | ''sale'' ) & (''ho$[0]chi$[0]minh'' & ''male'' )
showQuery(str1$)
showQuery(str2$)
showQuery(str3$)
showQuery(str4$)
showQuery(str5$)
showQuery(str6$)
showQuery(str7$)
showQuery(str8$)
showQuery(str9$)
