below the searchcode which works normally in nonUnicode. Is this the problem of Purebasic or the code?
How it is possible to get normal working similar code in Unicode?
Code: Select all
;{ searchINfile
Structure ByteArray
byte.b[0]
EndStructure
Declare.i QuickSearch (*mainMem.ByteArray, mainSize.i, *findMem.ByteArray, findSize.i, startOff.i=0)
Declare.q FindInFile (infile.i, *find, findSize.i, startOff.q=0, bufferSize.i=4096)
Declare.s GetFileString(File$,tagStart$, tagEnd$)
Procedure.i QuickSearch (*mainMem.ByteArray, mainSize.i, *findMem.ByteArray, findSize.i, startOff.i=0)
; -- Simplification of the Boyer-Moore algorithm;
; searches for a sequence of bytes in memory
; (not for characters, so it works in ASCII mode and Unicode mode)
; in : *mainMem: pointer to memory area where to search
; mainSize: size of memory area where to search (bytes)
; *findMem: pointer to byte sequence to search for
; findSize: size of byte sequence to search for
; startOff: offset in <mainMem>, where the search begins (bytes)
; Note: The first offset is 0 (not 1)!
; out: offset in <mainMem>, where <findMem> was found (bytes);
; -1 if not found
;
; after <http://www-igm.univ-mlv.fr/~lecroq/string/node19.html#SECTION00190>, 31.8.2008
; (translated from C to PureBasic by Little John)
Protected i.i, diff.i
Dim badByte.i(255)
; Preprocessing
For i = 0 To 255
badByte(i) = findSize + 1
Next
For i = 0 To findSize - 1
badByte(*findMem\byte[i] & #FF) = findSize - i
Next
; Searching
diff = mainSize - findSize
While startOff <= diff
If CompareMemory(*mainMem + startOff, *findMem, findSize) = 1
ProcedureReturn startOff
EndIf
startOff + badByte(*mainMem\byte[startOff + findSize] & #FF) ; shift
Wend
ProcedureReturn -1 ; not found
EndProcedure
Procedure.q FindInFile (infile.i, *find, findSize.i, startOff.q=0, bufferSize.i=4096)
; -- Looks in <infile> for byte sequence at *find;
; works in ASCII mode and Unicode mode.
; in : infile : number of a file, that was opened for reading
; *find : pointer to byte sequence to search for
; findSize : number of bytes to search for
; startOff : offset in the file where the search begins (bytes)
; bufferSize: size of used memory buffer (bytes)
; Note: The first offset is 0 (not 1)!
; out: offset in the file, where byte sequence at *find was found (bytes),
; -1 if byte sequence at *find was not found in <infile>,
; -2 if <findSize> is bigger than <bufferSize>
Protected *buffer
Protected offset.q, move.i, bytes.i
move = bufferSize - findSize + 1
If move < 1
ProcedureReturn -2 ; error
EndIf
*buffer = AllocateMemory(bufferSize)
Repeat
FileSeek(infile, startOff)
bytes = ReadData(infile, *buffer, bufferSize)
; QuickSearch returns the offset in the buffer (bytes),
; or -1 if not found:
offset = QuickSearch(*buffer, bytes, *find, findSize)
If offset <> -1 ; found
offset + startOff
Break
EndIf
startOff + move
Until bytes < bufferSize
FreeMemory(*buffer)
ProcedureReturn offset
EndProcedure
Procedure.s GetFileString(File$,tagStart$, tagEnd$)
ifn = ReadFile(#PB_Any, File$)
If ifn
RETStart = FindInFile(ifn, @tagStart$, StringByteLength(tagStart$))
Debug "RETStart:"+Str(RETStart)
If RETStart < 0
CloseFile(ifn)
ProcedureReturn ""
EndIf
If tagEnd$<>""
RETEnd = FindInFile(ifn, @tagEnd$, StringByteLength(tagEnd$))
Debug "RETEnd:"+Str(RETEnd)
If RETend<=0
RetVal$=""
CloseFile(ifn)
ProcedureReturn RetVal$
EndIf
FileSeek(ifn,RETStart+StringByteLength(tagStart$))
RetVal$ = ReadString(ifn,#PB_UTF8,RETEnd-RETStart-StringByteLength(tagStart$)):RetVal$=Left(RetVal$,Len(RetVal$)-1)
Else
FileSeek(ifn,RETStart+StringByteLength(tagStart$)+1)
RetVal$ = ReadString(ifn,#PB_UTF8,17)
EndIf
CloseFile(ifn)
ProcedureReturn RetVal$
Else
ProcedureReturn "0"
EndIf
EndProcedure
;}