PostgreSQL permet de faire des recherches précises dans un texte en utilisant "to_tsquery", dans mon cas sur des CV, donc ayant besoin, pour ce projet actuel, de faire des recherches dans une base de données pour un website, j'ai été inspiré par ce code (Converting a google search query to PostgreSQL tsquery), Il manquait la partie générant les lexers, je l'ai donc rajouté dans mon code PHP, et pour les besoins d'administration de la base de données, j'ai fait un programme en Purebasic, que voici :
Code : Tout sélectionner
; Program : PROCEDURES_ParseQuery.pbi
; Comment : Generates PostgreSql to_tsquery
; Author : Flaith
; Create : 24.11.2013
; Update : 27.11.2013
; Inspired of the PHP code from Peter Bailey
; http://stackoverflow.com/questions/207817/converting-a-google-search-query-to-a-postgresql-tsquery#tab-top
;-Modules declaration
DeclareModule HandleString
; sPrintf
; =======
; From mk-soft
; http://www.purebasic.fr/english/viewtopic.php?p=241016
Declare.s sPrintf(text.s, *value1 = 0, *value2 = 0, *value3 = 0, *value4 = 0, *value5 = 0, *value6 = 0, *value7 = 0, *value8 = 0, *value9 = 0, *value10 = 0, *value11 = 0 )
; L/R/TrimChars
; =============
; From Little John
; http://www.purebasic.fr/english/viewtopic.php?p=411500#p411500
#WHITESPACE$ = " " + #TAB$ + #CRLF$
Declare.s LTrimChars (source$, charlist$=#WHITESPACE$)
Declare.s RTrimChars (source$, charlist$=#WHITESPACE$)
Declare.s TrimChars (source$, charlist$=#WHITESPACE$)
EndDeclareModule
DeclareModule QueryPhrase
; Constants and structures are declare as public
#MODE_DEFAULT = 1
#MODE_OR = 2
#MODE_AND = 3
#MODE_EXCLUDE = 4
#PHRASE = 0
#SUBPHRASE = 1
Structure CS_QUERY_PHRASE
Phrase.s
Mode.i
Type.i ; Phrase or subExpression
EndStructure
Structure CS_QUERY_SUBEXPRESSION
List Phrases.CS_QUERY_PHRASE()
EndStructure
Global NewList Phrases.CS_QUERY_PHRASE()
Global NewList SubExpressions.CS_QUERY_SUBEXPRESSION()
Global TYPE.i = #PHRASE
Declare initiateSubExpression()
EndDeclareModule
DeclareModule QueryExpression
Declare AddQueryPhrase(__phrase.s, __type.i = QueryPhrase::#PHRASE, _Mode.i = QueryPhrase::#MODE_DEFAULT)
Declare AddPhrase(__input.s, __type.i)
Declare AddOrPhrase(__input.s, __type.i)
Declare AddAndPhrase(__input.s, __type.i)
Declare AddExclusionPhrase(__input.s, __type.i)
EndDeclareModule
DeclareModule QueryLexer
; Global declare as public
Global.s NewList TokenStack()
Global.s NewList ErrorStack()
Declare ErrToken(__ErrNum.s)
Declare AddToken(__token.s)
Declare.i Strtok(List __Token.s(), __str.s, __sep.s = " ")
Declare.s Execute(__str.s)
EndDeclareModule
DeclareModule QueryParser
Declare.s Parse(__input.s)
Declare.s ParseQuery(__input.s)
Declare ProcessToken(__token.s)
EndDeclareModule
DeclareModule QueryBuilder
Declare.s GetResult()
Declare.s FormatString(__Expression.s, __Mode.i)
Declare ProcessExpression()
EndDeclareModule
;-Modules creation
Module HandleString
Procedure.s sPrintf( text.s, *value1 = 0, *value2 = 0, *value3 = 0, *value4 = 0, *value5 = 0, *value6 = 0, *value7 = 0, *value8 = 0, *value9 = 0, *value10 = 0, *value11 = 0 )
Protected args, *value
Protected result.s, help.s
Protected *text.character
Protected IsValue, IsString, IsLeft, IsVZ, IsNum2, SetFill.s, num1, num2
args = @*value1
*text = @text
Repeat
Select *text\c
Case 0
Break
Case '\'
*text + SizeOf ( character )
Select *text\c
Case 0 : Break
Case '\' : result + "\"
Case 'n' : result + #LF$
Case 'r' : result + #CR$
Case 't' : result + #HT$
Case 'v' : result + #VT$
Case 39 : result + #DQUOTE$ ; (')
Case 'a' : result + #BEL$
Case 'b' : result + #BS$
Case 'f' : result + #FF$
Case '[' : result + #ESC$
Case '0' : result + #NULL$
EndSelect
*text + SizeOf ( character )
Case '%'
help = "?"
IsValue = #False
IsString = #False
IsLeft = #False
IsVZ = #False
IsNum2 = #False
SetFill = " "
num1 = 0
num2 = 0
*text + SizeOf ( character )
*value = PeekI ( args ) ; get pointer to value
Repeat
Select *text\c
Case 0 : Break
Case '-' : IsLeft = #True
Case '+' : IsVZ = #True
Case '.' : IsNum2 = #True
Case '%' : result + "%" : *text + SizeOf ( character ) : Break
Case 39 : *text + SizeOf ( character ) : If *text\c = 0 : Break : Else : SetFill = Chr(*text\c) : EndIf
Case '0' To '9'
If IsNum2 : num2 = num2 * 10 + *text\c - 48 : Else : num1 = num1 * 10 + *text\c - 48 : EndIf
Case 'b'
If *value : help = Str ( PeekB ( *value ) ) : EndIf : IsValue = #True
Case 'w'
If *value : help = Str ( PeekW ( *value ) ) : EndIf : IsValue = #True
Case 'l'
If *value : help = Str ( PeekL ( *value ) ) : EndIf : IsValue = #True
Case 'q'
If *value : help = Str ( PeekQ ( *value ) ) : EndIf : IsValue = #True
Case 'i'
If *value : help = Str ( PeekI ( *value ) ) : EndIf : IsValue = #True
Case 'f'
If *value : help = StrF ( PeekF ( *value ), num2 ) : EndIf : IsValue = #True
Case 'd'
If *value : help = StrD ( PeekD ( *value ), num2 ) : EndIf : IsValue = #True
Case 's'
If *value : help = PeekS ( *value ) : EndIf
If num2 : help = Left ( help, num2 ) : EndIf : IsString = #True
Case 'c'
If *value : help = Chr ( PeekI ( *value ) ) : EndIf : IsString = #True
Case 'X', 'x'
If num2 = 0 : num2 = num1 : EndIf
If *value
Select num2
Case 0 To 2 : help = RSet ( Hex ( PeekB ( *value ), #PB_Byte), num2, "0" )
Case 3 To 4 : help = RSet ( Hex ( PeekW ( *value ), #PB_Word), num2, "0" )
Case 5 To 8 : help = RSet ( Hex ( PeekL ( *value ), #PB_Long), num2, "0" )
Default : help = RSet ( Hex ( PeekQ ( *value ), #PB_Quad), num2, "0" )
EndSelect
EndIf
If *text\c = 'x' : help = LCase ( help ) : EndIf
IsString = #True
Default
IsString = #True
EndSelect
If IsValue And IsVZ
If Left ( help, 1 ) <> "-"
help = "+" + help
EndIf
EndIf
*text + SizeOf(character)
If IsString Or IsValue
If num1 And Len ( help ) < num1
If IsLeft
result + LSet ( help, num1, SetFill )
Else
result + RSet ( help, num1, SetFill )
EndIf
Else
result + help
EndIf
args + SizeOf ( Integer )
Break
EndIf
ForEver
Default
result + Chr ( *text\c )
*text + SizeOf ( character )
EndSelect
ForEver
ProcedureReturn result
EndProcedure
Procedure.s LTrimChars (source$, charlist$=#WHITESPACE$)
; removes from source$ all leading characters which are contained in charlist$
Protected p, last=Len(source$)
p = 1
While p <= last And FindString(charlist$, Mid(source$,p,1)) <> 0
p + 1
Wend
ProcedureReturn Mid(source$, p)
EndProcedure
Procedure.s RTrimChars (source$, charlist$=#WHITESPACE$)
; removes from source$ all trailing characters which are contained in charlist$
Protected p
p = Len(source$)
While p >= 1 And FindString(charlist$, Mid(source$,p,1)) <> 0
p - 1
Wend
ProcedureReturn Left(source$, p)
EndProcedure
Procedure.s TrimChars(source$, charlist$=#WHITESPACE$)
ProcedureReturn LtrimChars(RtrimChars(source$, charlist$), charlist$)
EndProcedure
EndModule
Module QueryPhrase
Procedure initiateSubExpression()
AddElement(SubExpressions())
EndProcedure
EndModule
Module QueryExpression
Procedure AddQueryPhrase(__phrase.s, __type.i = QueryPhrase::#PHRASE, __Mode.i = QueryPhrase::#MODE_DEFAULT)
Select __type
Case QueryPhrase::#PHRASE
AddElement(QueryPhrase::Phrases())
QueryPhrase::Phrases()\Phrase = __phrase
QueryPhrase::Phrases()\Mode = __Mode
QueryPhrase::Phrases()\Type = __type
Case QueryPhrase::#SUBPHRASE
AddElement(QueryPhrase::SubExpressions()\Phrases())
QueryPhrase::SubExpressions()\Phrases()\Phrase = __phrase
QueryPhrase::SubExpressions()\Phrases()\Mode = __Mode
QueryPhrase::SubExpressions()\Phrases()\Type = __type
EndSelect
EndProcedure
Procedure AddPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type)
EndProcedure
Procedure AddOrPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type, QueryPhrase::#MODE_OR)
EndProcedure
Procedure AddAndPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type, QueryPhrase::#MODE_AND)
EndProcedure
Procedure AddExclusionPhrase(__input.s, __type.i)
AddQueryPhrase(__input, __type, QueryPhrase::#MODE_EXCLUDE)
EndProcedure
EndModule
Module QueryLexer
Procedure ErrToken(__ErrNum.s)
AddElement(ErrorStack())
ErrorStack() = __ErrNum
EndProcedure
Procedure AddToken(__token.s)
AddElement(TokenStack())
TokenStack() = __token
EndProcedure
Procedure.i Strtok(List __Token.s(), __str.s, __sep.s = " ")
Protected.i _strCount = CountString(__str, __sep)
Protected.i _index
For _index = 1 To _strCount + 1
AddElement(__Token())
__Token() = StringField(__str, _index, __sep)
Next
ProcedureReturn _strCount + 1
EndProcedure
Procedure.s Execute(__str.s)
Protected.i _TokenCount, _index, _posQuote
Protected.i _DQUOTE, _PARENTHESE, _POS_QUOTE_START
Protected.s _OneCar = ""
Protected.s _searchString = ""
Protected.s _LogicalToken, _keyToken, _KeepToken, _KeepTokenTmp, _Token, _TokenTmp, _FullToken
Protected.s NewList _token()
; split each token in a Array
_searchString = LCase(__str)
_TokenCount = Strtok(_token(), _searchString, " ")
If _TokenCount > 0
; Get each token
_DQUOTE = 0
_PARENTHESE = 0
ForEach _token()
_TokenTmp = ""
_FullToken = ""
_index = 1
_POS_QUOTE_START = 0 ; Case where we are on another token and quotes are already open
; so when we extract the whole token With _POS_QUOTE_START + 1,
; we are sure to start at 0 (cf line Case '"' when _DQUOTE > 0)
; Check for Or, And, Not, |, &, -
_keyToken = _token()
Select _keyToken
Case "or"
_LogicalToken = "or "
_keyToken = ""
Case "and"
_LogicalToken = "and "
_keyToken = ""
Case "not"
_LogicalToken = "not "
_keyToken = ""
Case "&"
_LogicalToken = "and "
_keyToken = ""
Case "|"
_LogicalToken = "or "
_keyToken = ""
Case "-"
_LogicalToken = "not "
_keyToken = ""
EndSelect
While _index <= Len(_keyToken)
_OneCar = Mid(_keyToken, _index, 1)
Select _OneCar
Case "("
AddToken("(")
_TokenTmp = ""
_PARENTHESE + 1
Case ")"
If _TokenTmp <> ""
If _LogicalToken <> ""
AddToken(_LogicalToken + Trim(_TokenTmp))
_LogicalToken = ""
Else
AddToken(Trim(_TokenTmp))
EndIf
EndIf
_TokenTmp = ""
AddToken(")")
_PARENTHESE - 1
Case "'"
_index + 1
Case #DQUOTE$
If _DQUOTE > 0 ; Means the checked value has already an open quote, so we can add the token
_Token = Mid(_keyToken, _POS_QUOTE_START + 1, _index - _POS_QUOTE_START - 1)
If _KeepToken <> ""
_FullToken = Trim(_KeepToken) + "$[0]" + Trim(_token) ; or " #0 "
If _LogicalToken <> ""
_FullToken = _LogicalToken + _FullToken
EndIf
AddToken(Trim(_FullToken))
_FullToken = ""
_LogicalToken = ""
_KeepToken = ""
Else
If _LogicalToken <> ""
AddToken(_LogicalToken + Trim(_token))
_LogicalToken = ""
Else
AddToken(Trim(_token)) ; We add the only one token we found between double quotes
EndIf
EndIf
_DQUOTE - 1
Else
; Case it's a new quote, we must keep the token to be add with the one we will find
; when the quote will be closed
; check If we have another quote after this one
_POS_QUOTE_START = _index
_DQUOTE + 1
; If no other Quote, we keep it to be added after
If FindString(_keyToken, #DQUOTE$, _index + 1) = 0
; No so we keep it
_KeepToken + Mid(_keyToken, _index + 1)
EndIf
EndIf
_TokenTmp = ""
Default
_TokenTmp + _OneCar
EndSelect
_index+1
Wend
; We are in the case of a new token appears and the previous had a quote
If _DQUOTE > 0
_posQuote = FindString(_keyToken, #DQUOTE$, 1) ; Check to be sure there is no other quote
If _posQuote = 0 ; No so we keep it
_KeepTokenTmp = _TokenTmp
_KeepToken + "$[0]" + _KeepTokenTmp ; or " #0 "
EndIf
Else
If _TokenTmp <> ""
If _LogicalToken <> ""
AddToken(_LogicalToken + Trim(_TokenTmp))
_LogicalToken = ""
Else
_TokenTmp = Trim(_TokenTmp)
; We start With the first token And there is no quote, nor parenthese, just a word
If ListSize(tokenStack()) = 0
; add parentheses To be sure To have the 'and' between each word
If (Left(_TokenTmp, 1) = "-")
AddToken("not " + Mid(_TokenTmp, 2))
Else
AddToken("(")
AddToken(_TokenTmp)
AddToken(")")
EndIf
Else ; Not empty
If Left(_TokenTmp, 1) = "-" ; If left of token = '-' don't add the 'And'
AddToken("not " + Mid(_TokenTmp, 2))
;addToken(_TokenTmp)
Else
AddToken("and " + _TokenTmp) ; automatic 'and' between all
EndIf
EndIf
EndIf
EndIf
EndIf
Next ; End ForEach
If _DQUOTE <> 0 : ErrToken("#ERR001") : EndIf ; ("Error double quotes does not match");
If _PARENTHESE <> 0 : ErrToken("#ERR002") : EndIf ; ("Error parentheses does not match");
Else
ProcedureReturn ""
EndIf
EndProcedure
EndModule
Module QueryParser
Procedure.s Parse(__input.s)
Protected.s _expression
QueryLexer::Execute(__input)
ForEach QueryLexer::TokenStack()
ProcessToken(QueryLexer::TokenStack())
Next
EndProcedure
Procedure.s ParseQuery(__input.s)
; Clear each time
ClearList(QueryLexer::TokenStack())
ClearList(QueryLexer::ErrorStack())
ClearList(QueryPhrase::Phrases())
ClearList(QueryPhrase::SubExpressions())
QueryParser::Parse(__input)
QueryBuilder::ProcessExpression()
ProcedureReturn QueryBuilder::GetResult()
EndProcedure
Procedure ProcessToken(__token.s)
Protected.s _Modifier, _Phrase, _Operator
Protected.i _type = QueryPhrase::TYPE
Select __token
Case "("
QueryPhrase::TYPE = QueryPhrase::#SUBPHRASE
QueryPhrase::initiateSubExpression()
Case ")"
QueryPhrase::TYPE = QueryPhrase::#PHRASE
Default
_Modifier = Left(__token, 1)
_Phrase = Mid(__token, 2)
Select _Modifier
Case "-"
QueryExpression::AddExclusionPhrase(_Phrase, _type)
Case "+"
QueryExpression::AddPhrase(_Phrase, _type)
Default
_Operator = Trim(Mid(__token, 1, FindString(__token, " ")))
_Phrase = Trim(Mid(__token, FindString(__token, " ")))
Select LCase(_Operator)
Case "and"
QueryExpression::AddAndPhrase(_Phrase, _type)
Case "or"
QueryExpression::AddOrPhrase(_Phrase, _type)
Case "not"
QueryExpression::AddExclusionPhrase(_Phrase, _type)
Default:
QueryExpression::AddPhrase(__token, _type)
EndSelect
EndSelect
EndSelect
EndProcedure
EndModule
Module QueryBuilder
; Private var
Global.s _Query
Procedure.s GetResult()
ProcedureReturn _Query
EndProcedure
Procedure.s FormatString(__Expression.s, __Mode.i)
Protected.s _Expression = HandleString::TrimChars(__Expression, " &|")
Protected.s _Format = "''%s'' "
Select __Mode
Case QueryPhrase::#MODE_AND
_Format = "& ''%s'' "
Case QueryPhrase::#MODE_OR
_Format = "| ''%s'' "
Case QueryPhrase::#MODE_EXCLUDE
_Format = "& !''%s'' "
EndSelect
_Expression = ReplaceString(_Expression, "'", "\\'")
ProcedureReturn HandleString::sPrintf(_Format, @_Expression)
EndProcedure
Procedure ProcessExpression()
Protected.s _Expression, _Format, _QueryTMP
_Query = ""
ForEach QueryPhrase::Phrases()
If QueryPhrase::Phrases()\type = QueryPhrase::#PHRASE
_Query + FormatString(QueryPhrase::Phrases()\Phrase, QueryPhrase::Phrases()\Mode)
EndIf
Next
ForEach QueryPhrase::SubExpressions()
_QueryTMP = ""
ForEach QueryPhrase::SubExpressions()\Phrases()
_QueryTMP + FormatString(QueryPhrase::SubExpressions()\Phrases()\Phrase, QueryPhrase::SubExpressions()\Phrases()\Mode)
Next
_Query + "& (" + _QueryTMP + ") "
Next
_Query = HandleString::TrimChars(_Query, " &|")
EndProcedure
EndModule
;-End modules creation
Code : Tout sélectionner
XIncludeFile "PROCEDURES_ParseQuery.pbi"
Procedure showQuery(__query.s)
Debug "Result for Query : "+__query+ #CRLF$ + QueryParser::ParseQuery(__query)
Debug "-------------------------------------------------"
EndProcedure
str1$ = "("+#DQUOTE$+"sales director"+#DQUOTE$+" Or "+#DQUOTE$+"sale"+#DQUOTE$+") "+#DQUOTE$+"team leader"+#DQUOTE$+" ("+#DQUOTE$+"ho chi minh"+#DQUOTE$+" And male) -executive -fmcg Not representative"
str2$ = "java "+#DQUOTE$+"technical architect"+#DQUOTE$
str3$ = #DQUOTE$+"qa manager"+#DQUOTE$+" -fmcg"
str4$ = #DQUOTE$+"qa manager"+#DQUOTE$+" Or -fmcg"
str5$ = "GENERAL MANAGER"
str6$ = "manager java developer"
str7$ = "-fmcg"
str8$ = "java -fmcg"
str9$ = "("+#DQUOTE$+"used cars"+#DQUOTE$+" Or "+#DQUOTE$+"new cars"+#DQUOTE$+") -ford -mistubishi"
; --- Result for str1$ must be :
; ''team #0 leader'' & !''executive'' & !''fmcg'' & !''representative'' & (''sales #0 director'' | ''sale'' ) & (''ho #0 chi #0 minh'' & ''male'' )
; --- (27/11/2013) result :
; ''team$[0]leader'' & !''executive'' & !''fmcg'' & !''representative'' & (''sales$[0]director'' | ''sale'' ) & (''ho$[0]chi$[0]minh'' & ''male'' )
showQuery(str1$)
showQuery(str2$)
showQuery(str3$)
showQuery(str4$)
showQuery(str5$)
showQuery(str6$)
showQuery(str7$)
showQuery(str8$)
showQuery(str9$)