Single-pass command parser
Posted: Mon Dec 29, 2008 2:18 pm
I've made a lot of improvements to my command parser (works with both functions and keywords) but I still think it's fairly slow. Can anyone make any recommendations on improving parser speed?
The code is a single-pass parser with the exception that I do a fast preliminary pass on each line to check for the existence of the string to find before doing a full parse.
Some of the optimizations I use is to transverse the string by mapping its pointer to a character array. I also take advantage of the fact that by comparing characters instead of strings it should be faster since it's only doing an integer comparison.
Further optimization might be obtained by using the significant offset trick referenced here by AND51:
http://www.purebasic.fr/english/viewtopic.php?t=35813
Here are some of the older parsers I've written:
http://www.purebasic.fr/english/viewtopic.php?t=33516
http://www.purebasic.fr/english/viewtopic.php?t=30614
The code is a single-pass parser with the exception that I do a fast preliminary pass on each line to check for the existence of the string to find before doing a full parse.
Some of the optimizations I use is to transverse the string by mapping its pointer to a character array. I also take advantage of the fact that by comparing characters instead of strings it should be faster since it's only doing an integer comparison.
Further optimization might be obtained by using the significant offset trick referenced here by AND51:
http://www.purebasic.fr/english/viewtopic.php?t=35813
Here are some of the older parsers I've written:
http://www.purebasic.fr/english/viewtopic.php?t=33516
http://www.purebasic.fr/english/viewtopic.php?t=30614
Code: Select all
Declare ParserCallback(ParserChar.s, ParserPosition, MatchStatus, StatusConstant)
Declare CMDParser_ReportStatus(StatusConstant)
Declare CMDParser_SetStatusCallback(*Callback)
Declare CMDParser_ParseString(String.s, SearchString.s, MatchDelimiters.s, MatchKeys.s)
Declare CMDParser_CallParser()
Structure CharField
c.c[0]
EndStructure
Structure ParserGlobstruct
*StringFields.CharField
StringLength.i
ParserPosition.i
*StatusCallback
*SearchStringFields.CharField
SearchStringLength.i
Matching.i
MatchCount.i
MatchFailed.i
*IgnoreDelimiterFields.CharField ;/ Delimiters between a match and a match key or preceeding the match string
IgnoreDelimiterLength.i
*MatchKeyFields.CharField ;/ A characters that may appear after a match and between a delimiter
MatchKeyLength.i
MatchFound.i
MatchInvalid.i ;/ A flag to determine if a valid match has been terminated by an invalid character
*SearchStringCharArray.CharField
InString.i
MatchStartPos.i
EndStructure
Global Glob_Parser.ParserGlobstruct
;- Constants
#CMDParser_Status_Default=0
#CMDParser_Status_StringBoolean=1
#CMDParser_Status_Comment=2
#CMDParser_Status_BeginMatching=3
#CMDParser_Status_MatchFound=4
#CMDParser_Status_Matching=5
#CMDParser_Status_MatchFailed=6
#CMDParser_Status_Delimiter=7
#CMDParser_Status_MatchKey=8
#CMDParser_Status_MatchingByEOL=9
;- Public
Procedure ParserCallback(ParserChar.s, ParserPosition, MatchStatus, StatusConstant)
Select StatusConstant
Case #CMDParser_Status_Default
ParserStatus.s=""
Case #CMDParser_Status_StringBoolean
ParserStatus.s=" - String Boolean"
Case #CMDParser_Status_Comment
ParserStatus.s=" - Comment"
Case #CMDParser_Status_BeginMatching
ParserStatus.s=" - Begin Matching"
Case #CMDParser_Status_MatchFound
ParserStatus.s=" - Match Found"
Case #CMDParser_Status_Matching
ParserStatus.s=" - Matching"
Case #CMDParser_Status_MatchFailed
ParserStatus.s=" - Match Failed"
Case #CMDParser_Status_Delimiter
ParserStatus.s=" - Delimiter"
Case #CMDParser_Status_MatchKey
ParserStatus.s=" - Key"
Case #CMDParser_Status_MatchingByEOL
ParserStatus.s=" - Key by end of line"
EndSelect
Debug ParserChar.s+ParserStatus.s+" - "+Str(MatchStatus)
EndProcedure
Procedure CMDParser_ReportStatus(StatusConstant)
;/ MatchStatus = 0 match valid
;/ MatchStatus = 1 match invalid
;/ MatchStatus = 2 match found (must be preceded by MatchStatus = 0)
With Glob_Parser
If Not \StatusCallback
ProcedureReturn
EndIf
If \Matching
MatchStatus=1
EndIf
If \MatchFound=1
MatchStatus=2
ElseIf \MatchFound=2
MatchStatus=3
EndIf
If Not StatusConstant=#CMDParser_Status_MatchingByEOL
Char.s=Chr(\StringFields\c[\ParserPosition])
Else
Char.s=#CR$
EndIf
CallFunctionFast(\StatusCallback,Char.s,\ParserPosition,MatchStatus,StatusConstant)
EndWith
EndProcedure
Procedure CMDParser_SetStatusCallback(*Callback)
Glob_Parser\StatusCallback=*Callback
EndProcedure
Procedure CMDParser_ParseString(String.s, SearchString.s, IgnoreDelimiters.s, MatchKeys.s)
If Not FindString(String.s,SearchString.s,1)
ProcedureReturn 0
EndIf
Glob_Parser\MatchFound=0 ;/ Clear important global variables
Glob_Parser\Matching=0 ;/ Clear important global variables
Glob_Parser\MatchInvalid=0 ;/ Clear important global variables
Glob_Parser\InString=0
Glob_Parser\MatchFailed=0
;/ Don't search if the string to search is shorter than the string to find
Glob_Parser\StringLength=Len(String.s)
Glob_Parser\SearchStringLength=Len(SearchString.s)
If SearchStringLength>StringLength
ProcedureReturn
EndIf
;/ String to search fields
Glob_Parser\StringFields.CharField=@String.s
;/ String to find fields
Glob_Parser\SearchStringFields.CharField=@SearchString.s
;/ Delimiter fields
Glob_Parser\IgnoreDelimiterFields.CharField=@IgnoreDelimiters.s
Glob_Parser\IgnoreDelimiterLength=Len(IgnoreDelimiters.s)
;/ Match key fields
Glob_Parser\MatchKeyFields.CharField=@MatchKeys.s
Glob_Parser\MatchKeyLength=Len(MatchKeys.s)
For i=0 To Glob_Parser\StringLength-1
Glob_Parser\ParserPosition=i
CMDParser_CallParser()
If Glob_Parser\MatchFound=2
Break
EndIf
;/ End early if the match fails due to a comment character
If Glob_Parser\MatchFailed
Break
EndIf
Next i
;/ Handle match key of #CR$ (carriage return)
If Glob_Parser\MatchFound=1
For i=1 To Glob_Parser\MatchKeyLength
If Glob_Parser\MatchKeyFields\c[i-1]=13 ;/ Carriage return
CMDParser_ReportStatus(#CMDParser_Status_MatchingByEOL)
Glob_Parser\MatchFound=2
EndIf
Next i
EndIf
Glob_Parser\InString=0 ;/ Reset InString boolean
If Glob_Parser\MatchFound=2
;/ Return the start position of the match
ProcedureReturn Glob_Parser\MatchStartPos ;/ success
Else
ProcedureReturn 0 ;/ failed
EndIf
EndProcedure
;- Private
Procedure CMDParser_CallParser()
With Glob_Parser
Char.c=\StringFields\c[\ParserPosition]
If Char.c=34 ;/ "
\InString!1
EndIf
;/ Reset MatchInvalid if the line separator ' : ' or if an ignore delimiter character is found
If Not \InString
If Not \MatchInvalid
If Char.c=\SearchStringFields\c[\Matching]
\Matching+1 ;/ Increment match counter+1
If \Matching=\SearchStringLength
\MatchFound=1
CMDParser_ReportStatus(#CMDParser_Status_MatchFound)
ProcedureReturn
EndIf
If \Matching=1
\MatchStartPos=\ParserPosition+1
CMDParser_ReportStatus(#CMDParser_Status_BeginMatching)
ProcedureReturn
EndIf
CMDParser_ReportStatus(#CMDParser_Status_Default)
ProcedureReturn
EndIf
;/ If a string match has been found check for the string key
If \MatchFound=1 ;/ A complete match has been found
For i=0 To \MatchKeyLength-1
If Char.c=\MatchKeyFields\c[i]
\MatchFound=2
CMDParser_ReportStatus(#CMDParser_Status_MatchKey)
ProcedureReturn
EndIf
Next i
EndIf
EndIf
;/ If not matching check for ignore delimiters
For i=0 To \IgnoreDelimiterLength-1
If Char.c=\IgnoreDelimiterFields\c[i]
\MatchInvalid=0 ;/ Reset match invalid flag
If Not \MatchFound
\Matching=0
EndIf
CMDParser_ReportStatus(#CMDParser_Status_Delimiter)
ProcedureReturn
EndIf
Next i
If Not \Matching
If Char.c=59 ;/ ;
;/ Skip
\MatchFailed=1
\MatchInvalid=1
CMDParser_ReportStatus(#CMDParser_Status_Comment)
ProcedureReturn
EndIf
EndIf
EndIf
\MatchInvalid=1 ;/ Match is invalid otherwise
\Matching=0
\MatchFound=0
CMDParser_ReportStatus(#CMDParser_Status_Default)
If \MatchFound=2
ProcedureReturn 1
EndIf
EndWith
EndProcedure
;/ Examples
String.s=LCase("Debug This: Command(Something)")
SearchString.s="command"
CMDParser_SetStatusCallback(@ParserCallback())
If CMDParser_ParseString(String.s,SearchString.s,Chr(32)+Chr(9)+":","(")
Debug "Match!"
EndIf