there are already some procedures here on the forum for splitting strings, but I'm not aware of a general purpose procedure that splits a string by a Regular Expression.
Though PureBasic has the built-in function ExtractRegularExpression(), this will only retrieve the parts of the source string which match the Regular Expression, while the parts of the source string which don't match won't be retrieved. Sometimes this is exactly what is needed.
However, often we want to get all parts of the source string as the result. The following function splits a string into parts, that match or don't match the given Regular Expression. In other words, the generated list of parts contains the whole string.
Code: Select all
; -- Split a string by a Regular Expression
; tested with PB 5.60
; by Little John, <http://www.purebasic.fr/english/viewtopic.php?f=12&t=69071>
CompilerIf #PB_Compiler_IsMainFile
   EnableExplicit
CompilerEndIf
Structure SplitString
   content$
   match.i
EndStructure
Procedure.i SplitByRegEx (regEx.i, source$, List part.SplitString())
   ; -- split a string into parts, that match or don't match a Regular Expression
   ; in : regEx  : number of a Regular Expression generated by CreateRegularExpression()
   ;      source$: string to be split into parts
   ; out: part()      : resulting list of parts
   ;      return value: number of elements in part():
   ;                    0 if source$ = "", > 0 otherwise;
   ;                   -1 on error
   Protected.i left, right
   
   If ExamineRegularExpression(regEx, source$) = 0
      ProcedureReturn -1              ; error
   EndIf
   
   ClearList(part())
   
   left = 1
   While NextRegularExpressionMatch(regEx)
      right = RegularExpressionMatchPosition(regEx)
      If left < right
         AddElement(part())
         part()\content$ = Mid(source$, left, right-left)
         part()\match = #False
      EndIf   
      AddElement(part())
      part()\content$ = RegularExpressionMatchString(regEx)
      part()\match = #True
      left = right + RegularExpressionMatchLength(regEx)
   Wend 
   
   If left <= Len(source$)
      AddElement(part())
      part()\content$ = Mid(source$, left)
      part()\match = #False
   EndIf   
   
   ProcedureReturn ListSize(part())   ; success
EndProcedure
CompilerIf #PB_Compiler_IsMainFile
   ; -- Demo
   
   Procedure SplitDemo (rex.i, source$)
      Protected n.i
      Protected NewList part.SplitString()
      
      n = SplitByRegEx(rex, source$, part())
      If n = -1
         Debug "Error with ExamineRegularExpression()"
         End
      EndIf
      
      Debug "'" + source$ + "' split into " + n + " part(s):"
      ForEach part()
         If part()\match = #False
            Debug " [-] " + part()\content$
         Else
            Debug " [+] " + part()\content$
         EndIf   
      Next
      Debug ""
   EndProcedure
   
   
   Define regEx$, rex.i, source$
   
   regEx$ = "<[^>]*>"
   
   rex = CreateRegularExpression(#PB_Any, regEx$)
   If rex = 0
      Debug "Error: " + RegularExpressionError()
      End
   EndIf
   
   source$ = "I like <b>bold</b> and <i>italic</i> fonts."
   SplitDemo(rex, source$)
   
   source$ = "<b></b> and <i></i>"
   SplitDemo(rex, source$)
   
   source$ = "plain text"
   SplitDemo(rex, source$)
CompilerEndIf





