Page 4 of 4

Re: How does PureBasic parse files?

Posted: Tue Jun 02, 2015 3:10 pm
by Zebuddi123
Hi this one creates a random data file of strings and floats defined by

Code: Select all

number_elements
then processes the data into 2 arrays via the regular expression, I wantedto see how fast it is! on my i3 laptop 8gb ram 120gb ssd it Processed 1,000,000 elements in 2.137 seconds
Zebuddi. :D

Code: Select all

;  create some random data to test in file of same directory  named as test.txt
	
	number_elements=125000 ;  choose how many element   time to process total number of elements  is  shown at bottom of debug output  in seconds
	
	If CreateFile(0,"test.txt")
		
		Debug "Creating random data file"
		DisableDebugger ; disable the debugger for testing
		
		For i=1 To number_elements ; create strings
			For x=1 To 4
				For u=1 To 6	
					s$+Chr(Random(92,65))
				Next
				WriteString(0,Chr(34)+s$+Chr(34))
			Next
			WriteStringN(0,"") : s$=""
		Next

		For i=1 To number_elements	; create floats
			For x=1 To 4
				For j=1 To 2
					r=Random(3,1)
					For u=1 To r	
						s$+Chr(Random(57,48))
					Next
					If j<2 : s$+"." : EndIf
				Next
				s$+" "
			Next
			WriteString(0,s$)
		WriteStringN(0,"") : s$=""
		Next
		Debug "creating Random data file finished"
		CloseFile(0)
	EndIf
	
	
	; Discription
	; reads the file into allocated memory space and peeks the file into a variable string for the regular expression extraction
	
	
	regex_strings=CreateRegularExpression(#PB_Any, #DOUBLEQUOTE$+".+?"+#DOUBLEQUOTE$) ; matches "bla bla"
	regex_numbers=CreateRegularExpression(#PB_Any, "\d+\.\d+")						  ; matches 2334.123 1.234 any combination
	
	s=ElapsedMilliseconds()
	
	If ReadFile(0,"test.txt")
		*mem=AllocateMemory(Lof(0))
		buffer=ReadData(0,*mem,Lof(0))
		If buffer
			file_content$=PeekS(*mem)
			If MatchRegularExpression(regex_strings,file_content$)
				Dim string$(0)
				; extracts and floats based on  [ any number digits + decimal point+ any number of digits ] ie: 2334.1234 45.789987
				Numb_Strings=ExtractRegularExpression(regex_strings,file_content$,string$()) ; Numb_Strings  = number of elements in the string$() array
				For i=0 To Numb_Strings-1 : string$(i)=RemoveString(string$(i),Chr(34)) : Next  ; remove quotemarks from the strings in the array
			EndIf
			If MatchRegularExpression(regex_numbers,file_content$)
				Dim number$(0)
				; extracts the floats based on  [ any number digits + decimal point+ any number of digits ] ie: 2334.1234 45.789987
				Numb_Numbers=ExtractRegularExpression(regex_numbers,file_content$,number$()) ; Numb_Numbers  = number of elements in the number$() array
			EndIf
		EndIf	
		CloseFile(0)
	EndIf
	e=ElapsedMilliseconds()
	
	EnableDebugger
	;show contents of arrays  
	; UNCOMMENT  for see out put <<<<<<<<<<<<<<<
	
	; Debug "content of string$() array" ;<<<<<<<<<<<<<<<<<<<<<<<<<<<<
	; For i=0 To Numb_Strings-1
	; 	Debug string$(i)
	; Next
	; Debug ""
	; Debug "content of number$() array"
	; For i=0 To Numb_Numbers-1
	; 	Debug number$(i)
	; Next 								;<<<<<<<<<<<<<<<<<<<<<<<<<<<<
	
	Debug "Processed "+Str(Numb_Strings+Numb_Numbers)+" elements in "+StrF((e-s)/1000,3)+" seconds"
	Debug "sizeof strings array "+Str(ArraySize(string$())+1)
	Debug "sizeof numbers array "+Str(ArraySize(number$())+1)

	;Garbage Collection -- memory allocation, regular exppression, arrays  to be cleaned up when not neeed any longer or end of program

	FreeRegularExpression(regex_numbers)
	FreeRegularExpression(regex_strings)
	FreeArray(string$())
	FreeArray(number$())
	FreeMemory(*mem)