in File UnicodeSearch [SOLVED]
Posted: Thu Sep 10, 2015 2:55 pm
Hi,
below the searchcode which works normally in nonUnicode. Is this the problem of Purebasic or the code?
How it is possible to get normal working similar code in Unicode?
below the searchcode which works normally in nonUnicode. Is this the problem of Purebasic or the code?
How it is possible to get normal working similar code in Unicode?
Code: Select all
;{ searchINfile
Structure ByteArray
byte.b[0]
EndStructure
Declare.i QuickSearch (*mainMem.ByteArray, mainSize.i, *findMem.ByteArray, findSize.i, startOff.i=0)
Declare.q FindInFile (infile.i, *find, findSize.i, startOff.q=0, bufferSize.i=4096)
Declare.s GetFileString(File$,tagStart$, tagEnd$)
Procedure.i QuickSearch (*mainMem.ByteArray, mainSize.i, *findMem.ByteArray, findSize.i, startOff.i=0)
; -- Simplification of the Boyer-Moore algorithm;
; searches for a sequence of bytes in memory
; (not for characters, so it works in ASCII mode and Unicode mode)
; in : *mainMem: pointer to memory area where to search
; mainSize: size of memory area where to search (bytes)
; *findMem: pointer to byte sequence to search for
; findSize: size of byte sequence to search for
; startOff: offset in <mainMem>, where the search begins (bytes)
; Note: The first offset is 0 (not 1)!
; out: offset in <mainMem>, where <findMem> was found (bytes);
; -1 if not found
;
; after <http://www-igm.univ-mlv.fr/~lecroq/string/node19.html#SECTION00190>, 31.8.2008
; (translated from C to PureBasic by Little John)
Protected i.i, diff.i
Dim badByte.i(255)
; Preprocessing
For i = 0 To 255
badByte(i) = findSize + 1
Next
For i = 0 To findSize - 1
badByte(*findMem\byte[i] & #FF) = findSize - i
Next
; Searching
diff = mainSize - findSize
While startOff <= diff
If CompareMemory(*mainMem + startOff, *findMem, findSize) = 1
ProcedureReturn startOff
EndIf
startOff + badByte(*mainMem\byte[startOff + findSize] & #FF) ; shift
Wend
ProcedureReturn -1 ; not found
EndProcedure
Procedure.q FindInFile (infile.i, *find, findSize.i, startOff.q=0, bufferSize.i=4096)
; -- Looks in <infile> for byte sequence at *find;
; works in ASCII mode and Unicode mode.
; in : infile : number of a file, that was opened for reading
; *find : pointer to byte sequence to search for
; findSize : number of bytes to search for
; startOff : offset in the file where the search begins (bytes)
; bufferSize: size of used memory buffer (bytes)
; Note: The first offset is 0 (not 1)!
; out: offset in the file, where byte sequence at *find was found (bytes),
; -1 if byte sequence at *find was not found in <infile>,
; -2 if <findSize> is bigger than <bufferSize>
Protected *buffer
Protected offset.q, move.i, bytes.i
move = bufferSize - findSize + 1
If move < 1
ProcedureReturn -2 ; error
EndIf
*buffer = AllocateMemory(bufferSize)
Repeat
FileSeek(infile, startOff)
bytes = ReadData(infile, *buffer, bufferSize)
; QuickSearch returns the offset in the buffer (bytes),
; or -1 if not found:
offset = QuickSearch(*buffer, bytes, *find, findSize)
If offset <> -1 ; found
offset + startOff
Break
EndIf
startOff + move
Until bytes < bufferSize
FreeMemory(*buffer)
ProcedureReturn offset
EndProcedure
Procedure.s GetFileString(File$,tagStart$, tagEnd$)
ifn = ReadFile(#PB_Any, File$)
If ifn
RETStart = FindInFile(ifn, @tagStart$, StringByteLength(tagStart$))
Debug "RETStart:"+Str(RETStart)
If RETStart < 0
CloseFile(ifn)
ProcedureReturn ""
EndIf
If tagEnd$<>""
RETEnd = FindInFile(ifn, @tagEnd$, StringByteLength(tagEnd$))
Debug "RETEnd:"+Str(RETEnd)
If RETend<=0
RetVal$=""
CloseFile(ifn)
ProcedureReturn RetVal$
EndIf
FileSeek(ifn,RETStart+StringByteLength(tagStart$))
RetVal$ = ReadString(ifn,#PB_UTF8,RETEnd-RETStart-StringByteLength(tagStart$)):RetVal$=Left(RetVal$,Len(RetVal$)-1)
Else
FileSeek(ifn,RETStart+StringByteLength(tagStart$)+1)
RetVal$ = ReadString(ifn,#PB_UTF8,17)
EndIf
CloseFile(ifn)
ProcedureReturn RetVal$
Else
ProcedureReturn "0"
EndIf
EndProcedure
;}