Single-pass parser slower than multi-pass?

Just starting out? Need help? Post your questions and find answers here.
Mistrel
Addict
Addict
Posts: 3415
Joined: Sat Jun 30, 2007 8:04 pm

Single-pass parser slower than multi-pass?

Post by Mistrel »

I wrote a string parser to find PureBasic commands by reading source code one line at a time. The old parser did multiple passes (it read the line several times) and was very, very slow.

The new version is supposed to be a lot faster but it's not. What am I doing wrong?

Here is my old code:
http://www.purebasic.fr/english/viewtopic.php?t=30614

Code: Select all

Declare ParserCallback(ParserChar.s, ParserPosition, MatchStatus, StatusConstant)
Declare CMDParser_ReportStatus(StatusConstant)
Declare CMDParser_SetStatusCallback(*Callback)
Declare CMDParser_ParseString(String.s, SearchString.s, MatchDelimiters.s, MatchKey.s)
Declare CMDParser_CallParser()

Structure CharField
  StructureUnion
    c.c[0]
    s.s{1}[0]
  EndStructureUnion
EndStructure 

Structure ParserGlobstruct
	String.s
	*StringFields.CharField
	StringLength.l
	ParserPosition.l
	*StatusCallback
	SearchString.s
	*SearchStringFields.CharField
	SearchStringLength.l
	Matching.l
	MatchCount.l
	MatchFailed.l
	MatchDelimiters.s ; Delimiters between a match and a match key
	MatchKey.s ; A unique character that may appear after a match and between a delimiter
	MatchFound.l
	MatchInvalid.l ; A flag to determine if a valid match has been terminated by an invalid character
EndStructure

Global Glob_ParserGlobstruct.ParserGlobstruct

;- Constants

#CMDParser_Status_Default=0
#CMDParser_Status_StringBoolean=1
#CMDParser_Status_Comment=2
#CMDParser_Status_BeginMatching=3
#CMDParser_Status_MatchFound=4
#CMDParser_Status_Matching=5
#CMDParser_Status_MatchFailed=6
#CMDParser_Status_Delimiter=7
#CMDParser_Status_MatchKey=8

;- Public

Procedure ParserCallback(ParserChar.s, ParserPosition, MatchStatus, StatusConstant)
	Select StatusConstant
		Case #CMDParser_Status_Default
			ParserStatus.s=""
		Case #CMDParser_Status_StringBoolean
			ParserStatus.s=" - String Boolean"
		Case #CMDParser_Status_Comment
			ParserStatus.s=" - Comment"
		Case #CMDParser_Status_BeginMatching
			ParserStatus.s=" - Begin Matching"
		Case #CMDParser_Status_MatchFound
			ParserStatus.s=" - Match Found"
		Case #CMDParser_Status_Matching
			ParserStatus.s=" - Matching"
		Case #CMDParser_Status_MatchFailed
			ParserStatus.s=" - Match Failed"
		Case #CMDParser_Status_Delimiter
			ParserStatus.s=" - Delimiter"
		Case #CMDParser_Status_MatchKey
			ParserStatus.s=" - Matching Key"
	EndSelect
	Debug ParserChar.s+ParserStatus.s+" - "+Str(MatchStatus)
EndProcedure

Procedure CMDParser_ReportStatus(StatusConstant)
	; MatchStatus = 0 match valid
	; MatchStatus = 1 match invalid
	; MatchStatus = 2 match found (must be preceded by MatchStatus = 0)
	With Glob_ParserGlobstruct
		If Not \StatusCallback
			ProcedureReturn
		EndIf
		If \MatchFound=1
			MatchStatus=2
		ElseIf \MatchFound=2
			MatchStatus=3
		Else
			MatchStatus=\MatchInvalid
		EndIf
		Char.s=\StringFields\s[\ParserPosition*SizeOf(Character)]
		CallFunctionFast(\StatusCallback,Char.s,\ParserPosition,MatchStatus,StatusConstant)
	EndWith
EndProcedure

Procedure CMDParser_SetStatusCallback(*Callback)
	Glob_ParserGlobstruct\StatusCallback=*Callback
EndProcedure


Procedure CMDParser_ParseString(String.s, SearchString.s, MatchDelimiters.s, MatchKey.s)
	Glob_ParserGlobstruct\String.s=String.s
	Glob_ParserGlobstruct\StringFields.CharField=@String.s
	Glob_ParserGlobstruct\StringLength=Len(String.s)
	Glob_ParserGlobstruct\SearchString.s=SearchString.s
	Glob_ParserGlobstruct\SearchStringFields.CharField=@SearchString.s
	Glob_ParserGlobstruct\SearchStringLength=Len(SearchString.s)
	Glob_ParserGlobstruct\MatchDelimiters.s=MatchDelimiters.s
	Glob_ParserGlobstruct\MatchKey.s=MatchKey.s
	For i=0 To Glob_ParserGlobstruct\StringLength-1
		Glob_ParserGlobstruct\ParserPosition=i
		CMDParser_CallParser()
	Next i
EndProcedure

;- Private

Procedure CMDParser_CallParser()
	Static InString
	With Glob_ParserGlobstruct
		CharString.s=\StringFields\s[\ParserPosition*SizeOf(Character)]
		If \ParserPosition=0
			InString=0
		EndIf
		; Reset MatchInvalid if the line separator ' : ' or not previous character is found
		If Not InString
			Select CharString.s
				Case ":"
					\MatchInvalid=0
				Case Chr(32),Chr(9)
					; Do nothing
				Default
					; Match is always valid for the first position in the string
					If \ParserPosition>0
						\MatchInvalid=1
					EndIf
			EndSelect		
		EndIf	
		Select CharString.s
			Case Chr(34) ; "
					InString!1 ; flip boolean
					If \MatchFound=1 And \MatchKey.s=Chr(34)
						\MatchFound=2
						CMDParser_ReportStatus(#CMDParser_Status_MatchKey)
					Else
						CMDParser_ReportStatus(#CMDParser_Status_StringBoolean)
				EndIf
			Case Chr(59) ; ;
				If Not InString
					; skip
					CMDParser_ReportStatus(#CMDParser_Status_Comment)
					\MatchFailed=1
				EndIf
			Default
				; If no matching is in progress and the current char index is == to the first char of the keyword
				If Not \Matching And CharString.s=\SearchStringFields\s[0]
					CMDParser_ReportStatus(#CMDParser_Status_BeginMatching)
					\Matching=1 ; Begin matching
				; If already matching and the match index is less <= the keyword length and the current char
				; index matches the current keyword index
				ElseIf \Matching And \Matching<=\SearchStringLength And CharString.s=\SearchStringFields\s[\Matching-1]
					; If all characters match the keyword then break
					If \Matching=\SearchStringLength
						\Matching=0
						\MatchFound=1
						CMDParser_ReportStatus(#CMDParser_Status_MatchFound)
					Else
						CMDParser_ReportStatus(#CMDParser_Status_Matching)
					EndIf
				ElseIf \MatchFound And FindString(\MatchDelimiters.s,CharString.s,0)
					CMDParser_ReportStatus(#CMDParser_Status_Delimiter)
				Else
					If \MatchFound And CharString.s=\MatchKey.s And Not \MatchFound=2
						\MatchFound=2
						CMDParser_ReportStatus(#CMDParser_Status_MatchKey)
					Else
						If \MatchFound=1 And \MatchKey.s
							CMDParser_ReportStatus(#CMDParser_Status_MatchFailed)
							\MatchFound=0
						Else
							CMDParser_ReportStatus(#CMDParser_Status_Default)
						EndIf
					EndIf
					\Matching=0
				EndIf
		EndSelect
		; If currently matching then progress the match counter by 1
		If \Matching
			\Matching+1
		EndIf
		ProcedureReturn \MatchFound
	EndWith
EndProcedure
Example 1:

Code: Select all

String.s=LCase("Debug This: Command(Something)")
SearchString.s="command"

CMDParser_SetStatusCallback(@ParserCallback())
CMDParser_ParseString(String.s,SearchString.s,Chr(32)+Chr(9),"(")
Example 2:

Code: Select all

String.s=LCase("Debug This: Command "+Chr(34)+"Something"+Chr(34))
SearchString.s="command"

CMDParser_SetStatusCallback(@ParserCallback())
CMDParser_ParseString(String.s,SearchString.s,Chr(32)+Chr(9),Chr(34))
User avatar
pdwyer
Addict
Addict
Posts: 2813
Joined: Tue May 08, 2007 1:27 pm
Location: Chiba, Japan

Post by pdwyer »

What do you do in the callback?

It's being called a lot (for every char?) and my guess is that if you want to do some sort of interface update or anything with that callback you are going to slow it down.

Can you try to remove the callback just for a speed test, comment out the updates maybe?

I'm just guessing but it's where I would start, perhaps I'm misunderstanding the flow
Paul Dwyer

“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
Mistrel
Addict
Addict
Posts: 3415
Joined: Sat Jun 30, 2007 8:04 pm

Post by Mistrel »

The comment is for debugging. If you comment out CMDParser_SetStatusCallback() then it's never called.
Mistrel
Addict
Addict
Posts: 3415
Joined: Sat Jun 30, 2007 8:04 pm

Post by Mistrel »

I tweaked it a bit to compare character datatypes instead of one character length strings. I also removed two string copies that weren't being used for anything.

The new parser is about four times as fast but I think it should still be faster.

Compile with the debugger off to compare.

Code: Select all

Declare ParserCallback(ParserChar.s, ParserPosition, MatchStatus, StatusConstant)
Declare CMDParser_ReportStatus(StatusConstant)
Declare CMDParser_SetStatusCallback(*Callback)
Declare CMDParser_ParseString(String.s, SearchString.s, MatchDelimiters.s, MatchKey.s)
Declare CMDParser_CallParser()

Structure CharField
	c.c[0]
EndStructure 

Structure ParserGlobstruct
	*StringFields.CharField
	StringLength.l
	ParserPosition.l
	*StatusCallback
	*SearchStringFields.CharField
	SearchStringLength.l
	Matching.l
	MatchCount.l
	MatchFailed.l
	MatchDelimiters.s ; Delimiters between a match and a match key
	MatchKey.s ; A unique character that may appear after a match and between a delimiter
	MatchFound.l
	MatchInvalid.l ; A flag to determine if a valid match has been terminated by an invalid character
EndStructure

Global Glob_ParserGlobstruct.ParserGlobstruct

;- Constants

#CMDParser_Status_Default=0
#CMDParser_Status_StringBoolean=1
#CMDParser_Status_Comment=2
#CMDParser_Status_BeginMatching=3
#CMDParser_Status_MatchFound=4
#CMDParser_Status_Matching=5
#CMDParser_Status_MatchFailed=6
#CMDParser_Status_Delimiter=7
#CMDParser_Status_MatchKey=8

;- Public

Procedure ParserCallback(ParserChar.s, ParserPosition, MatchStatus, StatusConstant)
	Select StatusConstant
		Case #CMDParser_Status_Default
			ParserStatus.s=""
		Case #CMDParser_Status_StringBoolean
			ParserStatus.s=" - String Boolean"
		Case #CMDParser_Status_Comment
			ParserStatus.s=" - Comment"
		Case #CMDParser_Status_BeginMatching
			ParserStatus.s=" - Begin Matching"
		Case #CMDParser_Status_MatchFound
			ParserStatus.s=" - Match Found"
		Case #CMDParser_Status_Matching
			ParserStatus.s=" - Matching"
		Case #CMDParser_Status_MatchFailed
			ParserStatus.s=" - Match Failed"
		Case #CMDParser_Status_Delimiter
			ParserStatus.s=" - Delimiter"
		Case #CMDParser_Status_MatchKey
			ParserStatus.s=" - Matching Key"
	EndSelect
	Debug ParserChar.s+ParserStatus.s+" - "+Str(MatchStatus)
EndProcedure

Procedure CMDParser_ReportStatus(StatusConstant)
	; MatchStatus = 0 match valid
	; MatchStatus = 1 match invalid
	; MatchStatus = 2 match found (must be preceded by MatchStatus = 0)
	With Glob_ParserGlobstruct
		If Not \StatusCallback
			ProcedureReturn
		EndIf
		If \MatchFound=1
			MatchStatus=2
		ElseIf \MatchFound=2
			MatchStatus=3
		Else
			MatchStatus=\MatchInvalid
		EndIf
		Char.s=Chr(\StringFields\c[\ParserPosition])
		CallFunctionFast(\StatusCallback,Char.s,\ParserPosition,MatchStatus,StatusConstant)
	EndWith
EndProcedure

Procedure CMDParser_SetStatusCallback(*Callback)
	Glob_ParserGlobstruct\StatusCallback=*Callback
EndProcedure


Procedure CMDParser_ParseString(String.s, SearchString.s, MatchDelimiters.s, MatchKey.s)
	Glob_ParserGlobstruct\StringFields.CharField=@String.s
	Glob_ParserGlobstruct\StringLength=Len(String.s)
	Glob_ParserGlobstruct\SearchStringFields.CharField=@SearchString.s
	Glob_ParserGlobstruct\SearchStringLength=Len(SearchString.s)
	Glob_ParserGlobstruct\MatchDelimiters.s=MatchDelimiters.s
	Glob_ParserGlobstruct\MatchKey.s=MatchKey.s
	For i=0 To Glob_ParserGlobstruct\StringLength-1
		Glob_ParserGlobstruct\ParserPosition=i
		CMDParser_CallParser()
	Next i
EndProcedure

;- Private

Procedure CMDParser_CallParser()
	Static InString
	With Glob_ParserGlobstruct
		Char.c=\StringFields\c[\ParserPosition]
		If \ParserPosition=0
			InString=0
		EndIf
		; Reset MatchInvalid if the line separator ' : ' or not previous character is found
		If Not InString
			Select Char.c
				Case Asc(":")
					\MatchInvalid=0
				Case 32,9
					; Do nothing
				Default
					; Match is always valid for the first position in the string
					If \ParserPosition>0
						\MatchInvalid=1
					EndIf
			EndSelect		
		EndIf	
		Select Char.c
			Case 34 ; "
					InString!1 ; flip boolean
					If \MatchFound=1 And \MatchKey.s=Chr(34)
						\MatchFound=2
						CMDParser_ReportStatus(#CMDParser_Status_MatchKey)
					Else
						CMDParser_ReportStatus(#CMDParser_Status_StringBoolean)
				EndIf
			Case 59 ; ;
				If Not InString
					; skip
					CMDParser_ReportStatus(#CMDParser_Status_Comment)
					\MatchFailed=1
				EndIf
			Default
				; If no matching is in progress and the current char index is == to the first char of the keyword
				If Not \Matching And Char.c=\SearchStringFields\c[0]
					CMDParser_ReportStatus(#CMDParser_Status_BeginMatching)
					\Matching=1 ; Begin matching
				; If already matching and the match index is less <= the keyword length and the current char
				; index matches the current keyword index
				ElseIf \Matching And \Matching<=\SearchStringLength And Char.c=\SearchStringFields\c[\Matching-1]
					; If all characters match the keyword then break
					If \Matching=\SearchStringLength
						\Matching=0
						\MatchFound=1
						CMDParser_ReportStatus(#CMDParser_Status_MatchFound)
					Else
						CMDParser_ReportStatus(#CMDParser_Status_Matching)
					EndIf
				ElseIf \MatchFound And FindString(\MatchDelimiters.s,Chr(Char.c),0)
					CMDParser_ReportStatus(#CMDParser_Status_Delimiter)
				Else
					If \MatchFound And Char.c=Asc(\MatchKey.s) And Not \MatchFound=2
						\MatchFound=2
						CMDParser_ReportStatus(#CMDParser_Status_MatchKey)
					Else
						If \MatchFound=1 And \MatchKey.s
							CMDParser_ReportStatus(#CMDParser_Status_MatchFailed)
							\MatchFound=0
						Else
							CMDParser_ReportStatus(#CMDParser_Status_Default)
						EndIf
					EndIf
					\Matching=0
				EndIf
		EndSelect
		; If currently matching then progress the match counter by 1
		If \Matching
			\Matching+1
		EndIf
		ProcedureReturn \MatchFound
	EndWith
EndProcedure

String.s=LCase("Debug This: Command(Something)")
SearchString.s="command"

CMDParser_SetStatusCallback(@ParserCallback())
CMDParser_ParseString(String.s,SearchString.s,Chr(32)+Chr(9),"(")

CMDParser_SetStatusCallback(0)
start=ElapsedMilliseconds()
For i=1 To 100000
	CMDParser_ParseString(String.s,SearchString.s,Chr(32)+Chr(9),"(")
Next i
MessageRequester("",Str(ElapsedMilliseconds()-start))
User avatar
pdwyer
Addict
Addict
Posts: 2813
Joined: Tue May 08, 2007 1:27 pm
Location: Chiba, Japan

Post by pdwyer »

Mistrel wrote:The comment is for debugging. If you comment out CMDParser_SetStatusCallback() then it's never called.
Yes :)

Does it have to call every time? Can you have something like (pseudo code)

Code: Select all

If CharNo mod 100 = 0
    ;Call Callback
endif
where charNo you will have to track.
Paul Dwyer

“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
Mistrel
Addict
Addict
Posts: 3415
Joined: Sat Jun 30, 2007 8:04 pm

Post by Mistrel »

The callback is only called once per character in the string. The status callback is for debugging and shouldn't be used when benchmarking for a comparison.
Post Reply