Code: Select all
number_elements
Zebuddi.

Code: Select all
; create some random data to test in file of same directory named as test.txt
number_elements=125000 ; choose how many element time to process total number of elements is shown at bottom of debug output in seconds
If CreateFile(0,"test.txt")
Debug "Creating random data file"
DisableDebugger ; disable the debugger for testing
For i=1 To number_elements ; create strings
For x=1 To 4
For u=1 To 6
s$+Chr(Random(92,65))
Next
WriteString(0,Chr(34)+s$+Chr(34))
Next
WriteStringN(0,"") : s$=""
Next
For i=1 To number_elements ; create floats
For x=1 To 4
For j=1 To 2
r=Random(3,1)
For u=1 To r
s$+Chr(Random(57,48))
Next
If j<2 : s$+"." : EndIf
Next
s$+" "
Next
WriteString(0,s$)
WriteStringN(0,"") : s$=""
Next
Debug "creating Random data file finished"
CloseFile(0)
EndIf
; Discription
; reads the file into allocated memory space and peeks the file into a variable string for the regular expression extraction
regex_strings=CreateRegularExpression(#PB_Any, #DOUBLEQUOTE$+".+?"+#DOUBLEQUOTE$) ; matches "bla bla"
regex_numbers=CreateRegularExpression(#PB_Any, "\d+\.\d+") ; matches 2334.123 1.234 any combination
s=ElapsedMilliseconds()
If ReadFile(0,"test.txt")
*mem=AllocateMemory(Lof(0))
buffer=ReadData(0,*mem,Lof(0))
If buffer
file_content$=PeekS(*mem)
If MatchRegularExpression(regex_strings,file_content$)
Dim string$(0)
; extracts and floats based on [ any number digits + decimal point+ any number of digits ] ie: 2334.1234 45.789987
Numb_Strings=ExtractRegularExpression(regex_strings,file_content$,string$()) ; Numb_Strings = number of elements in the string$() array
For i=0 To Numb_Strings-1 : string$(i)=RemoveString(string$(i),Chr(34)) : Next ; remove quotemarks from the strings in the array
EndIf
If MatchRegularExpression(regex_numbers,file_content$)
Dim number$(0)
; extracts the floats based on [ any number digits + decimal point+ any number of digits ] ie: 2334.1234 45.789987
Numb_Numbers=ExtractRegularExpression(regex_numbers,file_content$,number$()) ; Numb_Numbers = number of elements in the number$() array
EndIf
EndIf
CloseFile(0)
EndIf
e=ElapsedMilliseconds()
EnableDebugger
;show contents of arrays
; UNCOMMENT for see out put <<<<<<<<<<<<<<<
; Debug "content of string$() array" ;<<<<<<<<<<<<<<<<<<<<<<<<<<<<
; For i=0 To Numb_Strings-1
; Debug string$(i)
; Next
; Debug ""
; Debug "content of number$() array"
; For i=0 To Numb_Numbers-1
; Debug number$(i)
; Next ;<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Debug "Processed "+Str(Numb_Strings+Numb_Numbers)+" elements in "+StrF((e-s)/1000,3)+" seconds"
Debug "sizeof strings array "+Str(ArraySize(string$())+1)
Debug "sizeof numbers array "+Str(ArraySize(number$())+1)
;Garbage Collection -- memory allocation, regular exppression, arrays to be cleaned up when not neeed any longer or end of program
FreeRegularExpression(regex_numbers)
FreeRegularExpression(regex_strings)
FreeArray(string$())
FreeArray(number$())
FreeMemory(*mem)