Posted: Fri Nov 01, 2002 6:24 pm
Restored from previous forum. Originally posted by fweil.
Thanks Pupil ... I am checking now your code, but the first difference in counts is probably caused by the 'valid chars' we accept.
Using the chr(39) and A-Z a-z range as you do, I find 792079 words, 14480 unique words on 100117 lines.
Don't know exactly where the differences are then ...
Concerning performances I will look how to use the download of the file to memory, and it is certainly faster to process than reading using ReadString().
Here is my code :
Nice case study ...
Rgrds
Francois Weil
14, rue Douer
F64100 Bayonne
Thanks Pupil ... I am checking now your code, but the first difference in counts is probably caused by the 'valid chars' we accept.
Using the chr(39) and A-Z a-z range as you do, I find 792079 words, 14480 unique words on 100117 lines.
Don't know exactly where the differences are then ...
Concerning performances I will look how to use the download of the file to memory, and it is certainly faster to process than reading using ReadString().
Here is my code :
Code: Select all
NLines.l
NWords.l
CurrentDirectory.s
EOL.s
Dim AsciiConv.s(255)
Dim AllWords.s(10000000)
Dim UniqueWords.s(1000000)
Dim WordCount.l(1000000)
Global NLines, NWords, EOL, CurrentDirectory, AsciiConv, Allwords, UniqueWords, WordCount
Procedure.l IMod(a.l, b.l)
ProcedureReturn a - (b * (a / b))
EndProcedure
Procedure ParseFile(FileName.s)
Debug "Parsing : " + FileName
SetGadgetText(100, "Processing file " + FileName)
CurrentDirectory = GetPathPart(FileName)
If ReadFile(0, FileName)
Repeat
NLines + 1
a$ = LTrim(RTrim(ReadString()))
b$ = ""
For i = 1 To Len(a$)
b$ = b$ + AsciiConv(Asc(Mid(a$, i, 1)))
Next
While FindString(b$, " ", 1) 0
b$ = ReplaceString(b$, " ", " ")
Wend
b$ = LTrim(RTrim(b$))
If Len(b$) 0
While FindString(b$, " ", 1) 0
AllWords(NWords) = Mid(b$, 1, FindString(b$, " ", 1) - 1)
NWords + 1
b$ = Mid(b$, FindString(b$, " ", 1) + 1, Len(b$) - FindString(b$, " ", 1) - 1 + 1)
Wend
AllWords(NWords) = b$
NWords + 1
EndIf
If IMod(NLines, 2500) = 0
StatusBarText(0, 0, "Parsing line #" + Str(NLines) + " ... found " + Str(NWords) + " words.", 0)
EndIf
Until Eof(0)
CloseFile(0)
StatusBarText(0, 0, "Parsing line #" + Str(NLines) + " ... found " + Str(NWords) + " words.", 0)
EndIf
EndProcedure
;
;
;
Quit.l = #FALSE
WindowXSize.l = 320
WindowYSize.l = 240
CurrentDirectory = Space(255)
GetCurrentDirectory_(255, @CurrentDirectory)
EOL.s = Chr(13) + Chr(10)
For i = 0 To 255
If (i >= 'A' And i = 'a' And i AllWords(i - 1)
j + 1
UniqueWords(j) = AllWords(i)
WordCount(j) = 1
Else
WordCount(j) + 1
EndIf
Next
NUniqueWords.l = j
SetGadgetText(100, "File : " + FileName + EOL + "Lines : " + Str(NLines) + EOL + "Words : " + Str(NWords + 1) + EOL + "Unique words : " + Str(NUniqueWords + 1) + EOL + "Done in " + Str(GetTickCount_() - tz) + "ms")
If CreateFile(0, "result.txt")
For z = 0 To NUniqueWords
WriteStringN(Str(z) + " " + UniqueWords(z) + Chr(9) + Chr(9) + Str(WordCount(z)))
Next
CloseFile(0)
EndIf
ShellExecute_(hWnd,"open","result.txt","","",#SW_SHOWNORMAL)
Case 99
Quit = #TRUE
EndSelect
EndSelect
Until Quit
EndIf
End
Rgrds
Francois Weil
14, rue Douer
F64100 Bayonne