here is my Version of SplitStringArray and SplitStringList inspired by mk-soft version with DQuote.
I post it here, because I used a different way of implementation.
It supports Multicharacter Separator with DQuote at a very high speed.
I used a few special tricks to get this done.
- Prototype the Function, so PB pass directly the Stringpointers instead of copy of the Strings
- use a special version of pChar Structure with conventional Character access and indexed Character access
I did a quick test with the original TestCode from mk-soft. Seems to be o.k.
maybe for someone is usefull!
Code: Select all
EnableExplicit
; An adapted version of pAny especally For character use
Structure pChar ; ATTENTION! Only use as Pointer Strukture! Do not define as a normal Var!
StructureUnion
a.a ; ASCII : 8 Bit unsigned [0..255]
c.c ; CHAR : 1 Byte for Ascii Chars 2 Bytes for unicode
u.u ; UNICODE : 2 Byte unsigned [0..65535]
aa.a[0] ; ASCII : 8 Bit unsigned [0..255]
cc.c[0] ; CHAR : 1 Byte for Ascii Chars 2 Bytes for unicode
uu.u[0] ; UNICODE : 2 Byte unsigned [0..65535]
EndStructureUnion
EndStructure
; with the Prototype trick, PB pass directly the Stringpointer and do not copy the String!
Prototype SplitStringArray(String$, Separator$, Array Out.s(1), DQuote=#False, ArrayRedimStep=10)
Global SplitStringArray.SplitStringArray
Prototype SplitStringList(String$, Separator$, List Out.s(), DQuote=#False)
Global SplitStringList.SplitStringList
Procedure.i _SplitStringArray(*String, *Separator, Array Out.s(1), DQuote=#False, ArrayRedimStep=10)
; ============================================================================
; NAME: _SplitStringArray
; DESC: Split a String into multiple Strings
; DESC:
; VAR(*String) : Pointer to String
; VAR(*Separator) : Pointer to Separator String
; VAR(Out.s()) : Array to return the Substrings (ArraySize >= Substrings)
; VAR(DQuote) : #True=skip search for Separator in quoted text, #Flase=search everywhere
; VAR(ArrayRedimStep) : How may entries are added to the String each ReDim
; if you know the exact No of Substrings before, use it here!
; this prevents from ReDim.
; RET.i : No of Substrings
; ============================================================================
Protected lsep, I, xDo, N, ASize
Protected *pStart
Protected *pRead.pChar
Protected *pSep.pChar
If Not *String
ProcedureReturn 0
EndIf
*pRead = *String ; ReadPointer
*pStart = *String ; Pointer of search Start
*pSep = *Separator ; Pointer of Separator
If *Separator
; lsep = Len(Separator)
While *pSep\cc[lsep] ; Trick to get length with indexed pChar
lsep + 1
Wend
EndIf
ASize = ArraySize(Out())
If *Separator=0 Or lsep=0
If ASize = -1 ; not Dim
Dim Out(0)
EndIf
Out(0) = PeekS(*String)
ProcedureReturn 1
EndIf
If ASize = -1 ; not Dim
ASize = ArrayRedimStep
Dim Out(ASize)
EndIf
Repeat
; ----------------------------------------------------------------------
If DQuote ; skip qutoed Text for search of Separator -> do not split in quotes
; ----------------------------------------------------------------------
; move *pRead to first matching Character
While *pRead\c <> *pSep\c
Select *pRead\c
Case 0 ; Break if EndOfString
Break
Case '"' ; DoubleQuote
; move *pRead to 2nd " to ignore Text in Quotes
*pRead + SizeOf(Character) ; Char after "
While *pRead\c <> '"'
*pRead + SizeOf(Character)
If *pRead\c = 0
Break 2
EndIf
Wend
EndSelect
*pRead + SizeOf(Character)
Wend
; ----------------------------------------------------------------------
Else ; do not check for Text in Quotes
; ----------------------------------------------------------------------
; move *pRead to first matching Character
While *pRead\c <> *pSep\c
If *pRead\c = 0 ; Break if EndOfString
Break
EndIf
*pRead + SizeOf(Character)
Wend
EndIf
; ----------------------------------------------------------------------
If *pRead\c ; If Not EndOfString -> we found a Separator
If lsep = 1
; if Len(Separator) = 1 -> split here
xDo = #True
Else
xDo = #True
; Check if all Charactes matching?
For I = 1 To lsep-1 ; we can start at 2nd char because 1st char is steill checked for equal
If *pRead\cc[I] <> *pSep\cc[I]
xDo = #False ; Character do not Match -> Separator not found!
EndIf
Next
EndIf
; If CompareMemoryString(*pRead, *Separator, #PB_String_CaseSensitive, lsep)= #PB_String_Equal
If xDo
If ASize < N
ASize + ArrayRedimStep
ReDim Out(ASize)
EndIf
Out(N) = PeekS(*pStart, (*pRead - *pStart)/SizeOf(Character))
N+1
*pRead + lsep * SizeOf(Character)
*pStart = *pRead
EndIf
Else
Break
EndIf
ForEver ; Until *pRead\c = 0
; return last Element
If ASize < N
ASize + 1
ReDim Out(ASize)
; ElseIf Asize > N
; ReDim Out(N)
EndIf
Out(N) = PeekS(*pStart)
N+1
ProcedureReturn N ; Number of Substrings
EndProcedure
SplitStringArray = @_SplitStringArray() ; Bind ProcedureAddress to Prototype
Procedure.i _SplitStringList(*String, *Separator, List Out.s(), DQuote = #False)
; ============================================================================
; NAME: _SplitStringList
; DESC: Split a String into multiple Strings
; DESC:
; VAR(*String) : Pointer to String
; VAR(*Separator): Pointer to Separator String
; VAR(Out.s()) : List to return the Substrings
; VAR(DQuote) : #True=skip search for Separator in quoted text, #Flase=search everywhere
; VAR(clrList) : #False: Append Splits to List; #True: Clear List first
; RET.i : No of Substrings
; ============================================================================
Protected lsep, I, xDo
Protected *pStart
Protected *pRead.pChar
Protected *pSep.pChar
If Not *String
ProcedureReturn 0
EndIf
ClearList(Out())
*pRead = *String ; ReadPointer
*pStart = *String ; Pointer of search Start
*pSep = *Separator ; Pointer of Separator
If *Separator
; lsep = Len(Separator)
While *pSep\cc[lsep] ; Trick to get length with indexed pChar
lsep + 1
Wend
EndIf
If *Separator=0 Or lsep=0
AddElement(Out())
Out() = PeekS(*String)
ProcedureReturn 1
EndIf
Repeat
; ----------------------------------------------------------------------
If DQuote ; skip qutoed Text for search of Separator -> do not split in quotes
; ----------------------------------------------------------------------
; move *pRead to first matching Character
While *pRead\c <> *pSep\c
Select *pRead\c
Case 0 ; Break if EndOfString
Break
Case '"' ; DoubleQuote
; move *pRead to 2nd " to ignore Text in Quotes
*pRead + SizeOf(Character) ; Char after "
While *pRead\c <> '"'
*pRead + SizeOf(Character)
If *pRead\c = 0
Break 2
EndIf
Wend
EndSelect
*pRead + SizeOf(Character)
Wend
; ----------------------------------------------------------------------
Else ; do not check for Text in Quotes
; ----------------------------------------------------------------------
; move *pRead to first matching Character
While *pRead\c <> *pSep\c
If *pRead\c = 0 ; Break if EndOfString
Break
EndIf
*pRead + SizeOf(Character)
Wend
EndIf
; ----------------------------------------------------------------------
If *pRead\c ; If Not EndOfString -> we found a Separator
If lsep = 1
; if Len(Separator) = 1 -> split here
xDo = #True
Else
xDo = #True
; Check if all Charactes matching?
For I = 1 To lsep-1 ; we can start at 2nd char because 1st char is steill checked for equal
If *pRead\cc[I] <> *pSep\cc[I]
xDo = #False ; Character do not Match -> Separator not found!
EndIf
Next
EndIf
; If CompareMemoryString(*pRead, *Separator, #PB_String_CaseSensitive, lsep)= #PB_String_Equal
If xDo
AddElement(Out())
Out() = PeekS(*pStart, (*pRead - *pStart)/SizeOf(Character))
*pRead + lsep * SizeOf(Character)
*pStart = *pRead
EndIf
Else
Break
EndIf
ForEver ; Until *pRead\c = 0
; return last Element
AddElement(Out())
Out() = PeekS(*pStart)
ProcedureReturn ListSize(Out()) ; Number of Substrings
EndProcedure
SplitStringList = @_SplitStringList() ; Bind ProcedureAddress to Prototype
CompilerIf #PB_Compiler_IsMainFile
Global Dim a1.s(0)
Global NewList l1.s()
Global Dim p1.s(0)
Global NewList r1.s()
Global text.s, count, index
text.s = "0;1x;2xx;'Text with separator (;)';4xxxx;5xxxxx;'Text with linefeed " + #LF$ + "and separator (;)';End"
text = ReplaceString(text, "'", #DQUOTE$)
Debug "Text to List = " + text
Global count = SplitStringList(text, ";", l1(), #True)
Debug "Count = " + count
ForEach l1()
Debug "Index " + ListIndex(l1()) + " = [" + l1() + "]"
Next
Debug "--------"
Debug "Text to Array = " + text
Global count = SplitStringArray(text, ";", a1(), #True)
Debug "Count = " + count
For index = 0 To count - 1
Debug "Index " + index + " = [" + a1(index) + "]"
Next
Debug "--------"
text.s = "0|-|1x|-|2xx|-|'Text with separator (|-|)'|-|4xxxx|-|5xxxxx|-|'Text with linefeed " + #LF$ + "and separator (|-|)'|-|End" ; Use 1x|-2xx for testing instead of 1x|-|2xx)
text = ReplaceString(text, "'", #DQUOTE$)
Debug "Text to List (multi-characters separator: " +#DQUOTE$+ "|-|" +#DQUOTE$+ ") = " + text
Global count = SplitStringList(text, "|-|", l1(), #True)
Debug "Count = " + count
ForEach l1()
Debug "Index " + ListIndex(l1()) + " = [" + l1() + "]"
Next
Debug "--------"
text.s = "0|-|1x|-|2xx|-|'Text with separator (|-|)'|-|4xxxx|-|5xxxxx|-|'Text with linefeed " + #LF$ + "and separator (|-|)'|-|End" ; Use 1x|-2xx for testing instead of 1x|-|2xx)
text = ReplaceString(text, "'", #DQUOTE$)
Debug "Text to Array (multi-characters separator: " +#DQUOTE$+ "|-|" +#DQUOTE$+ ") = " + text
Global count = SplitStringArray(text, "|-|", a1(), #True)
Debug "Count = " + count
For index = 0 To count - 1
Debug "Index " + index + " = [" + a1(index) + "]"
Next
Debug "--------"
CompilerEndIf