Restored from previous forum. Originally posted by fweil.
Thanks to Art Sentinel challenge ... I place here a copy of the code I made to parse a file and count lines and words.
Please go to [url]viewtopic.php?t=2450">http://forums.pur ... php?t=2450 for understanding the history.
This code allows to open a file and load it in memory for a fast process. It counts lines, words and opens a notepad with the list of unique words and count of each.
I did not use linked lists because I am not used to work with it and I believe trying to code with it performances were a bit slower than using arrays.
The file to memory procedures are updates I made from Horst's sample code available at
Code: Select all
CompilerIf #PB_Compiler_Unicode
Global Dim AsciiConv.s(65535)
CompilerElse
Global Dim AsciiConv.s(255)
CompilerEndIf
Global Dim AllWords.s(10000000)
Global Dim UniqueWords.s(1000000)
Global Dim WordCount.l(1000000)
Global NLines.l, NWords.l, EOL.s, AsciiConv, Allwords, UniqueWords, WordCount
Global MemFileOffset.l, MemFileSize.l, CurrentDirectory.s, *FileBuffer
#FileBufferMem = 0
Procedure.l LoadFileToMem(fileID,fname.s)
If ReadFile(fileID,fname)
MemFileSize = Lof(fileID)
Debug "MemFileSize = " + Str(MemFileSize)
*FileBuffer = AllocateMemory(MemFileSize)
If *FileBuffer
ReadData(fileID, *FileBuffer, MemFileSize)
EndIf
CloseFile(fileID)
MemFileOffset = 0 ; reset
EndIf
ProcedureReturn *FileBuffer
EndProcedure
Procedure MoreInMem()
If MemFileOffset < MemFileSize
ok = 1
EndIf
ProcedureReturn ok
EndProcedure
Procedure.s ReadLineFromMem() ; in case EOF: empty line is returned
; *FileBuffer = UseMemory(#FileBufferMem)
If *FileBuffer And MoreInMem()
Start = *FileBuffer + MemFileOffset
length = 0
Repeat
length + 1
Byte.b = PeekB(Start + length)
Until Byte = 13 Or Byte = 10 Or MemFileOffset + length >= MemFileSize
EndIf
Skip = 1
*addr = Start + length + 1
Debug *addr
Byte = PeekB(*addr)
If Byte = 10 Or Byte = 13
length + 1
Skip + 1
EndIf
MemFileOffset + length
ProcedureReturn PeekS(Start + 1, length - Skip, #PB_Ascii)
EndProcedure
Procedure CloseFileMem()
FreeMemory(#FileBufferMem)
EndProcedure
Procedure.l IMod(a.l, b.l)
ProcedureReturn a - (b * (a / b))
EndProcedure
Procedure ParseFile(FileName.s)
Debug "Parsing : " + FileName
SetGadgetText(100, "Processing file " + FileName)
CurrentDirectory = GetPathPart(FileName)
If LoadFileToMem(0, FileName)
While MoreInMem()
NLines + 1
a$ = LTrim(RTrim(ReadLineFromMem()))
b$ = ""
For i = 1 To Len(a$)
b$ = b$ + AsciiConv(Asc(Mid(a$, i, 1)))
Next
While FindString(b$, " ", 1) <> 0
b$ = ReplaceString(b$, " ", " ")
Wend
b$ = LTrim(RTrim(b$))
If Len(b$) <> 0
While FindString(b$, " ", 1) <> 0
AllWords(NWords) = Mid(b$, 1, FindString(b$, " ", 1) - 1)
NWords + 1
b$ = Mid(b$, FindString(b$, " ", 1) + 1, Len(b$) - FindString(b$, " ", 1) - 1 + 1)
Wend
AllWords(NWords) = b$
NWords + 1
EndIf
If IMod(NLines, 2500) = 0
StatusBarText(0, 0, "Parsing line #" + Str(NLines) + " ... found " + Str(NWords) + " words.", 0)
EndIf
Wend
StatusBarText(0, 0, "Parsing line #" + Str(NLines) + " ... found " + Str(NWords) + " words.", 0)
EndIf
EndProcedure
;;;
Quit.l = #False
WindowXSize.l = 320
WindowYSize.l = 240
CurrentDirectory = Space(255)
GetCurrentDirectory_(255, @CurrentDirectory)
EOL.s = Chr(13) + Chr(10)
For i = 0 To 255
AsciiConv(i) = Chr(i)
Next
AsciiConv(Asc(".")) = " "
AsciiConv(Asc(",")) = " "
AsciiConv(Asc(":")) = " "
AsciiConv(Asc(";")) = " "
AsciiConv(Asc("+")) = " "
AsciiConv(Asc("-")) = " "
AsciiConv(Asc("*")) = " "
AsciiConv(Asc("/")) = " "
AsciiConv(Asc("(")) = " "
AsciiConv(Asc(")")) = " "
AsciiConv(Asc("[")) = " "
AsciiConv(Asc("]")) = " "
AsciiConv(Asc("'")) = " "
AsciiConv(Asc("!")) = " "
AsciiConv(Asc("?")) = " "
AsciiConv(Asc("{")) = " "
AsciiConv(Asc("}")) = " "
AsciiConv(Asc("=")) = " "
AsciiConv(Asc("<")) = " "
AsciiConv(Asc(">")) = " "
AsciiConv(Asc(Chr(34))) = " "
AsciiConv(Asc(Chr(9))) = " "
hwnd.l = OpenWindow(0, 200, 500, WindowXSize, WindowYSize, "MyWindow", #PB_Window_SystemMenu | #PB_Window_MinimizeGadget | #PB_Window_MaximizeGadget | #PB_Window_SizeGadget | #PB_Window_TitleBar)
If hwnd
AddKeyboardShortcut(0, #PB_Shortcut_Escape, 99)
;fontVerd11.l = LoadFont(0,"Verdana",12)
If CreateMenu(0, WindowID(0))
OpenSubMenu("General")
MenuItem(11, "Open file")
MenuItem(99, "Quit")
CloseSubMenu()
EndIf
If CreateStatusBar(0, WindowID(0))
AddStatusBarField(200)
StatusBarText(0, 0, "Idle ...", 0)
EndIf
;SetGadgetFont(#PB_Default,FontID(fontVerd11.l))
TextGadget(100, 10, 10, WindowXSize - 20, WindowYSize - 40, "")
SetGadgetText(100, "Select a file to process ...")
Repeat
Select WaitWindowEvent()
Case #PB_Event_CloseWindow
Quit = #True
Case #PB_Event_Menu
Select EventMenu()
Case 11
FileName.s = OpenFileRequester("Select a file", CurrentDirectory + "\" + "*.txt", "Text files|*.txt|All files|*.*", 0, #PB_Requester_MultiSelection)
NLines.l = 0
NWords.l = 0
tz.l = GetTickCount_()
ParseFile(FileName)
NWords - 1
SetGadgetText(100, "File : " + FileName + EOL + "Lines : " + Str(NLines) + EOL + "Words : " + Str(NWords + 1))
SortArray(AllWords(), 0, 0, NWords)
j = 0
UniqueWords(j) = AllWords(j)
WordCount(j) = 1
For i = 1 To NWords
If AllWords(i) <> AllWords(i - 1)
j + 1
UniqueWords(j) = AllWords(i)
WordCount(j) = 1
Else
WordCount(j) + 1
EndIf
Next
NUniqueWords.l = j
SetGadgetText(100, "File : " + FileName + EOL + "Lines : " + Str(NLines) + EOL + "Words : " + Str(NWords + 1) + EOL + "Unique words : " + Str(NUniqueWords + 1) + EOL + "Done in " + Str(GetTickCount_() - tz) + "ms")
If CreateFile(0, "result.txt")
For z = 0 To NUniqueWords
WriteStringN(0, Str(z) + " " + UniqueWords(z) + Chr(9) + Chr(9) + Str(WordCount(z)))
Next
CloseFile(0)
EndIf
ShellExecute_(hwnd,"open","result.txt","","",#SW_SHOWNORMAL)
Case 99
Quit = #True
EndSelect
EndSelect
Until Quit
EndIf
FreeMemory(*FileBuffer)
End
14, rue Douer
F64100 Bayonne