Some small string functions

Got an idea for enhancing PureBasic? New command(s) you'd like to see?
User avatar
Jac de Lad
Enthusiast
Enthusiast
Posts: 106
Joined: Wed Jul 15, 2020 7:10 am
Contact:

Some small string functions

Post by Jac de Lad »

Hello,
firstly, yes, I know I can easily create them by myself in very, very small functions (and that's what I'm doing right now). But I think these should be implemented natively.

newstring=Del(string,position,length)
Cuts a string out of a string and returns the rest, not to be confused with DeleteString().

position=FindMemoryString(*memory, string)
Returns the position of a string in a memory. A native implementation would be much more comfortable. Maybe optional parameters for case sensitivity could be added.
User avatar
skywalk
Addict
Addict
Posts: 4211
Joined: Wed Dec 23, 2009 10:14 pm
Location: Boston, MA

Re: Some small string functions

Post by skywalk »

See mid()...
See below...

Code: Select all

Structure ScanAllTypes  ;-!PB Datatypes
  ; Allows 1 Define for all datatypes to be scanned.
  ; Define *p.ScanAllTypes = @somevariable or *somememory
  ; Consider as a StructureUnion. Takes no memory and overflow is not checked.
  ; Ex. *p\d[i] ; 'i' can be incremented in a loop without compiler error.
  ;          ; Type,   Bytes,  Min,                     Max,                     C Type
  b.b[0]     ; Byte,       1, -128,                     127,                     char, bool(C++)
  a.a[0]     ; Ascii,      1,  0,                       255,                     unsigned char, UCHAR, BYTE
  c.c[0]     ; Character,  2,  0,                       65535,                   unsigned short, USHORT
  u.u[0]     ; Unicode,    2,  0,                       65535,                   unsigned short, USHORT
  w.w[0]     ; Word,       2, -32768,                   32767,                   short, VB6 integer
  l.l[0]     ; Long,       4, -2147483648,              2147483647,              long, int (long & $FFFFFFFF = unsigned)
  ;;ul.ul[0] ; ULong,      4,  0,                       4294967295,              unsigned long, unsigned int, DWORD(C++)
  i.i[0]     ; Integer,    4, -2147483648,              2147483647,              long, int(x86 or 32-bit),sizeof*
  ;i.i[0]    ; Integer,    8, -9223372036854775808,     9223372036854775807,     long long(x64 or 64-bit),sizeof*
  q.q[0]     ; Quad,       8, -9223372036854775808,     9223372036854775807,     long long
  ;;uq.uq[0] ; UQuad,      8,  0,                       18446744073709551615,    unsigned long long, ULONGLONG
  f.f[0]     ; Float,      4,           -1.175494e-38,            3.402823e+38,  float      (6 decimal places)
  d.d[0]     ; Double,     8, -2.2250738585072013e-308, 1.7976931348623157e+308, double     (15 decimal places)
  ;;ld.ld[0] ; LDouble,   10,                -3.4e-4932,               1.1e+4932,long double(19 decimal places)
  ;;s.s      ; String$,    2/char + 2 for chr(0),       ,                        wchar_t, TCHAR
  ;s.s[0]    ; FAIL, -> *p = @x$, *p\s[0] = IMA ERROR.
  s.s{1}[0]  ; {FixedLen}, 2/char,                      ,                        char *, char var[] <-- Convert to Ascii
  ss.s[0]    ; Scan String array.
  ;s.s[0]    ; FAIL, -> *p = @x$(), *p\s[1],  *p\s[2],  etc.
  ;s.s[0]    ; OK,   -> *p = @x$,   *p\s[1],  *p\s[2],  etc.
  ;ss.s[0]   ; OK,   -> *p = @x$(), *p\ss[1], *p\ss[2], etc.
EndStructure
Procedure.i SF_ToMem(Unicode$, Enc.i=#PB_Ascii)
  ; Useful as 1 function with Enc parameter vs multiple functions; Ascii(), Unicode().
  Protected *b = AllocateMemory(StringByteLength(Unicode$) + SizeOf(character))
  PokeS(*b, Unicode$, -1, Enc)  ;|#PB_String_NoZero)  ; Not dropping trailing Zero.
  ProcedureReturn *b
EndProcedure
Procedure.i SF_CharReplace(*p.ScanAllTypes, StrFrom$="|", StrTo$=#Empty$, enc.i=#PB_Unicode)
  ; REV:  111227, skywalk
  ; SYNTAX:   nNulls = SF_CharReplace(@x$,"|",#Empty$,0)
  ; DEFAULT:  Edit a string in memory to embed null char's = Chr(0)
  ; RETURN:   # of char replacements made.
  ; If StrFrom$=#Empty$, then buffer end is assumed to be double null position.
  ; Accepts multiple chars, but only works with the 1st char.
  ; Works with unicode or ascii.
  Protected.i i, msLen, nChars
  Protected.i CharFrom = Asc(StrFrom$)
  Protected.i CharTo = Asc(StrTo$)
  ; Determine msLen
  If CharFrom = #Null   ; Search memory for double nulls = end of string buffer
    If enc <> #PB_Ascii
      While msLen < 64000
        If *p\c[msLen] = 0 And *p\c[msLen+1] = 0
          Break
        EndIf
        msLen + 1
      Wend
    Else
      While msLen < 64000
        If *p\a[msLen] = 0 And *p\a[msLen+1] = 0
          Break
        EndIf
        msLen + 1
      Wend
    EndIf
    msLen - 1           ; Ignore the final #Null later in code
  Else
    msLen = MemoryStringLength(*p, enc) - 1 ; Get Len in characters, not bytes.
   ; StringByteLength(PeekS(*p))            ; Returns nBytes and depends on ascii/unicode switch!
  EndIf
  If enc <> #PB_Ascii
    For i = 0 To msLen    ; Since walking through String by Character, no need to count bytes/char.
      If *p\c[i] = CharFrom
        *p\c[i] = CharTo
        nChars + 1
      EndIf
    Next i
  Else
    For i = 0 To msLen    ; Since walking through String by Character, no need to count bytes/char.
      If *p\a[i] = CharFrom
        *p\a[i] = CharTo
        nChars + 1
      EndIf
    Next i
  EndIf
  ;ShowMemoryViewer(@*p,msLen+32)
  ProcedureReturn nChars
EndProcedure
Procedure.i FindStringMem(*String, LenStr.i, Srch$, UseCase.i=#PB_String_CaseSensitive, Enc.i=#PB_Unicode)
  ; REV:  140905, skywalk
  ;       Return byte location(0-based) of Srch$ in *String.
  ;       Useful to search an Ascii string in Unicode app(default as of v5.4).
  Protected.i lenSrch = Len(Srch$)
  Protected.i lenChar
  Protected.i *Srch = SF_ToMem(srch$, Enc)
  If Enc = #PB_Unicode  ; 2 bytes/char
    lenChar = 2
  Else                  ; #PB_Ascii = 1 bytes/char, #PB_UTF8 = variable bytes/char
    lenChar = 1
  EndIf
  If *Srch
    Protected.i *pos = *String
    lenStr * lenchar + *String
    While *pos <= lenStr
      If CompareMemoryString(*pos, *Srch, UseCase, lenSrch, Enc) = #PB_String_Equal
        ProcedureReturn *pos - *String
      EndIf 
      *pos + lenChar
    Wend
    FreeMemory(*Srch)
  Else
    ProcedureReturn 0
  EndIf
EndProcedure
If 1  ;-! TRY FindStringMem()
  Define.s haystack$, needle$
  Define.i lenhaystack, posmem, *haystack
  Debug "-- FindStringMem() --"
  Debug "-- Unicode --"
  haystack$ = "ABCDEF"+"|"+"GHIJKLMNO" + "|" + "123"  ; '|' is placeholder for #Null.
  lenhaystack = Len(haystack$)                        ; Remember len of this string. Adding #Null's will "shorten" it by 1st Null appearance.
  *haystack = SF_ToMem(haystack$, #PB_Unicode)        ; Convert string to mem. Note this = Unicode.
  SF_CharReplace(*haystack)                           ; '|' converted to Null in next step.
  ShowMemoryViewer(*haystack, 512)                    ; Show it in debugger
  needle$ = "123"                                     ; Do not mix Unicode and Ascii strings! Here we search for a Unicode string.
  posmem = FindStringMem(*haystack, lenhaystack, needle$, #PB_String_NoCase, #PB_Unicode)
  Debug PeekS(*haystack + posmem, Len(needle$))
  Debug "-- Ascii --"
  *haystack = SF_ToMem(haystack$, #PB_Ascii)          ; Convert string to mem. Note this = Ascii.
  ShowMemoryViewer(*haystack, 512)                    ; Show it in debugger
  SF_CharReplace(*haystack, "|", #Empty$, #PB_Ascii)  ; '|' converted to Null in next step.
  needle$ = "123"                                     ; Do not mix Unicode and Ascii strings! Here we search for a Unicode string.
  posmem = FindStringMem(*haystack, lenhaystack, needle$, #PB_String_NoCase, #PB_Ascii)
  Debug PeekS(*haystack + posmem, Len(needle$), #PB_Ascii)
EndIf
The nice thing about standards is there are so many to choose from. ~ Andrew Tanenbaum
User avatar
Jac de Lad
Enthusiast
Enthusiast
Posts: 106
Joined: Wed Jul 15, 2020 7:10 am
Contact:

Re: Some small string functions

Post by Jac de Lad »

Well, yes. Like I said, I know how it's done, I just think it should be natively implemented.

But thanks for the code, this looks versatile.
Post Reply