Advanced Stirng parsing techniques

Share your advanced PureBasic knowledge/code with the community.
SMaag
Enthusiast
Enthusiast
Posts: 324
Joined: Sat Jan 14, 2023 6:55 pm
Location: Bavaria/Germany

Advanced Stirng parsing techniques

Post by SMaag »

here I show some advanced String parsing techniques with univeral CharPointer and the use of my Purebasic Extention Module PX.

- Skip Spaces and Tabs
- Find end of an Expression or Word and get the length
- Check next character

Please download the additional PX module here
https://github.com/Maagic7/PureBasicFra ... dule_PX.pb

Code: Select all


; A very simple Demo for advanced String parsing with pChar Structure

; pChar is part of PX::
; An adapted version of pAny especally for character use
;   Structure pChar   ; ATTENTION! Only use as Pointer Strukture! Do not define as a normal Var!
;     StructureUnion
;       a.a         ; ASCII   : 8 Bit unsigned  [0..255] 
;       c.c         ; CHAR    : 1 Byte for Ascii Chars 2 Bytes for unicode
;       u.u         ; UNICODE : 2 Byte unsigned [0..65535]
;       aa.a[0]     ; ASCII   : 8 Bit unsigned  [0..255] 
;       cc.c[0]     ; CHAR    : 1 Byte for Ascii Chars 2 Bytes for unicode
;       uu.u[0]     ; UNICODE : 2 Byte unsigned [0..65535]
;     EndStructureUnion
;   EndStructure 

; Last modified: 2025/08/14

EnableExplicit

; please download Module PX from:
; https://github.com/Maagic7/PureBasicFrameWork/blob/main/Modules/PbFw_Module_PX.pb

XIncludeFile "PbFw_Module_PX.pb"          ; PX::      Purebasic Extention Module

UseModule PX


Enumeration
  #PbCmd_unknown
  #PbCmd_Procedure
  #PbCmd_Protected
  #PbCmd_Prototype
EndEnumeration

Dim txt.s(10)

Define *pC.PX::pChar
Define cnt

txt(0) = "  Procedure.l MyTestProc (*out.TTestSruct, Testvalue.i, config=#True)"
txt(1) = "    Procedure.d MyTestProc *out.TTestSruct, Testvalue.i, config=#True)"
txt(2) = "      Procedure.d MyTestProc (*out.TTestSruct, Testvalue.i, config=#True"

Define I
For I = 0 To 2
  *pC = @txt(I)
  Debug "***** Test " + Str(i) + " *****"
  Debug txt(I)
  Debug ""
  
  ; ------------------------------------------------------------
  ; first check Len(txt) with *pC - don't move the Pointer
  ; ------------------------------------------------------------
  cnt = 0
  While *pC\cc[cnt]
    cnt+1
  Wend
  Debug "pChar Len(txt) = " + cnt + " : PB Len() = " + Len(txt(I))
  
  Debug ""
  ; ------------------------------------------------------------
  ; Count Spaces and Tabs - don't move the Pointer
  ; ------------------------------------------------------------
  cnt = 0
  While IsSpaceTabChar(*pC\cc[cnt])
    cnt +1
  Wend
  Debug "leading Spaces and Tabs = " +cnt
  
  ; ------------------------------------------------------------
  ; Skip Spaces and Tabs - move the Pointer
  ; ------------------------------------------------------------
  Debug ""
  Debug "Pointer to Text " + Str(*pC)
  Debug "Now skip Space and Tab -> move Pointer to first character"
  While IsSpaceTabChar(*pC\c)
    INCC(*pC)  ; INCC is the Macro to Increase a CharPointer 'INCCharPointer' 
  Wend
  Debug "Pointer of first Letter = " + Str(*pC) + " : Char = " + Chr(*pC\c)
  
  Debug ""
  Debug "Now we check for Procedure, Protected or Prototype"
  
  Define cmd
  
  Select LCaseChar(*pC\c) 
    Case 'p'
      If CompareMemoryString(*pC, @"procedure", #PB_String_NoCase, 9) = #PB_String_Equal
        ; ----------
        ; Procedure
        ; ----------
       cmd = #PbCmd_Procedure
        Debug "First word is " + PeekS(*pC, 9)
        INCC(*pC,9)    ; set Pointer to first Char after Procedure
        
      ElseIf CompareMemoryString(*pC, @"protected", #PB_String_NoCase, 9) = #PB_String_Equal
        ; ----------
        ; Protected
        ; ----------
        cmd = #PbCmd_Protected
        Debug "First word is " + PeekS(*pC, 9)
        INCC(*pC,9) ; set Pointer to first Char after Protected
         
      ElseIf CompareMemoryString(*pC, @"prototype", #PB_String_NoCase, 9) = #PB_String_Equal
        ; ----------
        ; Prototype
        ; ----------
        cmd = #PbCmd_Prototype
        Debug "First word is " + PeekS(*pC, 9)
        INCC(*pC,9) ; set Pointer to first Char after Prototype
        
      Else    ; no PB command found 
        
      EndIf
      
    ; Case 'e' 
      
    Default
     
  EndSelect
  
  Debug ""
  Select cmd
    Case #PbCmd_Procedure
      
      Select LCaseChar(*pC\c)
        Case 32 ; Space
          Debug "Procedure retrun type = i" 
          InCC(*pC) ; set Pointer after Type
        Case '.'
          Debug "Procedure retrun type = " + Chr(*pC\cc[1]) 
          InCC(*pC,2) ; set Pointer after Type
      EndSelect
      
      ; now Skip Space and Tab, so we are at first character of ProcedureName    
      While IsSpaceTabChar(*pC\c)
        INCC(*pC)
      Wend
      
      Debug "First Char of ProcedureName = " + Chr(*pC\c)
      
      ; now find end of ProcedureName
      
      cnt=0
      ; stop if '(' or Space or Tab found
      While  *pC\cc[cnt]<>'(' And (Not IsSpaceTabChar(*pC\cc[cnt]) )
        If *pC\cc[cnt] = 0    ; EndOf String Found
          Debug "Error missing bracket '('"
          Break
        EndIf      
        cnt + 1
      Wend
      Debug "ProcedureName = " + PeekS(*pC, cnt)
      InCC(*pC,cnt)
      
      ; now Skip Space and Tab, so we are at first character of Parameter '('   
      While IsSpaceTabChar(*pC\c)
        INCC(*pC)
      Wend
      
      ; if we skipped Space and Tab, the first Char now should be the bracket '('
      If *pC\c <> '('
        Debug "Error missing bracket '('"
      Else
        INCC(*pC) ; move to first Char after '('
        ; search for ')' and print Parameter List
        cnt=0
        While *pC\cc[cnt] <> ')'
          If *pC\cc[cnt] = 0    ; EndOfString reached
            Debug "Error: missing ')'" 
            Break
          EndIf       
          cnt+1
        Wend
        
        Debug "Parameter list : " + PeekS(*pC, cnt)
        
      EndIf
      
    Case #PbCmd_Protected
      
    Case #PbCmd_Prototype
      
    Default
      
  EndSelect
  
  Debug""
  
Next