I needed to search files for certain text and return the full line so I modded this code by
It will return the line where the match was found. There is no noticeable slowdown compared to the original ( I was searching 300Meg text files in under 8 seconds)
Code: Select all
; tested with PB 5.31 / PB 5.41 LTS
; DESC: little john search in files
; INFO: http://www.purebasic.fr/english/viewtopic.php?p=256625#p256625
; added a way to get the current line
; change eline$ to your OS linefeed, currently set to windows
EnableExplicit
Structure ByteArray
byte.b[0]
EndStructure
Structure foundpos ; added this structure to return position of match, start and end of line.
found.q
lstart.q
lend.q
EndStructure
Procedure.i QuickSearch (*mainMem.ByteArray, mainSize.i, *findMem.ByteArray, findSize.i, startOff.i=0)
; -- Simplification of the Boyer-Moore algorithm;
; searches for a sequence of bytes in memory
; (not for characters, so it works in ASCII mode and Unicode mode)
; in : *mainMem: pointer to memory area where to search
; mainSize: size of memory area where to search (bytes)
; *findMem: pointer to byte sequence to search for
; findSize: number of bytes to search for
; startOff: offset in <mainMem>, where the search begins (bytes)
; out: offset in <mainMem>, where <findMem> was found (bytes);
; -1 if not found
; Note: The first offset is 0 (not 1)!
;
; after <http://www-igm.univ-mlv.fr/~lecroq/string/node19.html#SECTION00190>, 31.8.2008
; (translated from C to PureBasic by Little John)
Protected i.i, diff.i
Protected Dim badByte.i(255)
;
Protected begline.i
; Preprocessing
For i = 0 To 255
badByte(i) = findSize + 1
Next
For i = 0 To findSize - 1
badByte(*findMem\byte[i] & #FF) = findSize - i
Next
; Searching
diff = mainSize - findSize
While startOff <= diff
;If CompareMemory(*mainMem + startOff,)=1
; begline=startOff
;EndIf
If CompareMemory(*mainMem + startOff, *findMem, findSize) = 1
ProcedureReturn startOff
EndIf
startOff + badByte(*mainMem\byte[startOff + findSize] & #FF) ; shift
Wend
ProcedureReturn -1 ; not found
EndProcedure
Procedure.l FindInFile (infile.i, *find, findSize.i, *eline,elineSize.i,startOff.q=0, bufferSize.i=8192)
; -- Looks in <infile> for byte sequence at *find;
; works in ASCII mode and Unicode mode.
; in : infile : number of a file, that was opened for reading
; *find : pointer to byte sequence to search for
; findSize : number of bytes to search for
; startOff : offset in the file where the search begins (bytes)
; bufferSize: size of used memory buffer (bytes)
; out: offset in the file, where byte sequence at *find was found (bytes),
; -1 if byte sequence at *find was not found in <infile>,
; -2 on error
; Note: The first offset is 0 (not 1)!
Protected *buffer
Protected offset.q, move.i, bytes.i
Protected sline.q,eline.q,dummy.q
Protected *retval.foundpos=AllocateMemory(SizeOf(foundpos))
move = bufferSize - findSize + 1
If move < 1
ProcedureReturn -2 ; error
EndIf
*buffer = AllocateMemory(bufferSize)
If *buffer = 0
ProcedureReturn -2 ; error
EndIf
Repeat
FileSeek(infile, startOff)
bytes = ReadData(infile, *buffer, bufferSize)
; QuickSearch returns the offset in the buffer (bytes),
; or -1 if not found:
sline = QuickSearch(*buffer, bytes, *eline, elineSize)
offset = QuickSearch(*buffer, bytes, *find, findSize)
If offset <> -1 ; found
eline = QuickSearch(*buffer, bytes, *eline, elineSize,offset) ; find linefeed from our found word
; sline = QuickSearch(*buffer, bytes, *eline, elineSize) ; find first line feed in current mem block
If sline > offset
sline=0
Else
dummy=sline+elineSize
Repeat
sline=dummy+elineSize
dummy=QuickSearch(*buffer, bytes, *eline, elineSize,sline)
Until dummy = -1 Or dummy > offset
EndIf
Debug "begin line sline" +StrD(sline)
Debug "linefeed eline" +StrD(eline)
offset + startOff
Break
EndIf
startOff + move
Until bytes < bufferSize
*retval\lstart=sline+startOff
*retval\lend=eline+startOff
*retval\found=offset
FreeMemory(*buffer)
ProcedureReturn *retval;offset
EndProcedure
; -- Demo
Define file$, search$, ifn.i, format.i, numBytes.i, *searchBuffer, *found.foundpos
Define elineBytes.i,eline$,*elbuffer,*readline
file$ = "largefiletosearch.txt"
search$ = "findThisText"
ifn = ReadFile(#PB_Any, file$)
If ifn
format = ReadStringFormat(ifn)
; If the above line does not work (because there is no BOM in the file), select the file format manually:
; format = #PB_Ascii
; format = #PB_UTF8
; format = #PB_Unicode
eline$=Chr(13)+Chr(10); "\r\n"
elineBytes=StringByteLength(eline$, format)
*elBuffer=AllocateMemory(elineBytes+2)
PokeS(*elBuffer,eline$,-1,format)
numBytes = StringByteLength(search$, format)
*searchBuffer = AllocateMemory(numBytes+2)
If *searchBuffer
PokeS(*searchBuffer, search$, -1, format)
*found = FindInFile(ifn, *searchBuffer, numBytes,*elBuffer,elineBytes)
Select *found\found
Case -2
Debug "Error."
Case -1
Debug "'" + search$ + "' not found in file '" + file$ + "'."
Default
Debug "'" + search$ + "' found in file '" + file$ + "' at offset " + Str(*found\found) + "."
Debug "line starts at: "+Str(*found\lstart)+" and ends at: " + Str(*found\lend)
FileSeek(ifn,*found\lstart)
*readline=AllocateMemory(*found\lend-*found\lstart+2)
ReadData(ifn,*readline,*found\lend-*found\lstart)
Debug PeekS(*readline,*found\lend-*found\lstart,format)
EndSelect
FreeMemory(*readline)
FreeMemory(*found)
FreeMemory(*searchBuffer)
Else
Debug "Error allocating memory for search string."
EndIf
CloseFile(ifn)
Else
Debug "Error reading from file '" + file$ + "'."
EndIf