Page 1 of 1

Nesting NextRegularExpressionMatch()

Posted: Tue Sep 12, 2017 7:35 pm
by mestnyi

Code: Select all

; This expression will match every word of 3 letter which begin by a lower case letter,
  ; followed with the character 'b' and which ends with an uppercase letter. ex: abC
  ; Each match is printed with its position in the original string.
  ;    
  If CreateRegularExpression(0, "[a-z]b[A-Z]")
    If ExamineRegularExpression(0, "abC ABc zbA abc")
      While NextRegularExpressionMatch(0)
        Debug "Match: " + RegularExpressionMatchString(0)
        Debug "    Position: " + Str(RegularExpressionMatchPosition(0))
        Debug "    Length: " + Str(RegularExpressionMatchLength(0))
        
        If ExamineRegularExpression(0, "SDc abW abc zbR")
          While NextRegularExpressionMatch(0)
            Debug "2Match: " + RegularExpressionMatchString(0)
            Debug "    2Position: " + Str(RegularExpressionMatchPosition(0))
            Debug "    2Length: " + Str(RegularExpressionMatchLength(0))
          Wend
        EndIf
      Wend
    EndIf
  Else
    Debug RegularExpressionError()
  EndIf

Re: Nesting NextRegularExpressionMatch()

Posted: Tue Sep 12, 2017 8:57 pm
by normeus
from instructions:
Starts matching the #RegularExpression against the given String$. Individual matches can be iterated using the NextRegularExpressionMatch() function. From each match, the matching string, its position/length and any groups within the match can be extracted with the appropriate function.
ExamineRegularExpression(#RegularExpression, String$) matches the #RegularExpression to this ExamineRegularExpresion and it becomes an index for NextRegularExpressionMatch()

Would you use the same index for 2 nested For statements?:

Code: Select all

For i = 1 To 10
   For i = 10 To 20
      Debug i
   Next
Next
use a different expression even if you have to match the same thing:

Code: Select all

  If CreateRegularExpression(0, "[a-z]b[A-Z]")
  If CreateRegularExpression(1, "[a-z]b[A-Z]")
    If ExamineRegularExpression(0, "abC ABc zbA abc")
      While NextRegularExpressionMatch(0)
        Debug "Match: " + RegularExpressionMatchString(0)
        Debug "    Position: " + Str(RegularExpressionMatchPosition(0))
        Debug "    Length: " + Str(RegularExpressionMatchLength(0))
       
        If ExamineRegularExpression(1, "SDc abW abc zbR")
          While NextRegularExpressionMatch(1)
            Debug "2Match: " + RegularExpressionMatchString(1)
            Debug "    2Position: " + Str(RegularExpressionMatchPosition(1))
            Debug "    2Length: " + Str(RegularExpressionMatchLength(1))
          Wend
        EndIf
      Wend
    EndIf
  Else
    Debug RegularExpressionError()
 EndIf
 EndIf

but, you probably don't want this result either.
Norm.

Re: Nesting NextRegularExpressionMatch()

Posted: Tue Sep 12, 2017 9:36 pm
by mestnyi
That's how I want or I guess

Code: Select all

Procedure Get(String$)
  Protected Result, Index, Param1, Param2, Param3, Param1$
  
  If ExamineRegularExpression(0, String$)
    While NextRegularExpressionMatch(0)
      Select RegularExpressionGroup(0, 1)
        Case "a"
          Result +5+ Get(RegularExpressionGroup(0, 2))
        Case "b"
          Result -3+ Get(RegularExpressionGroup(0, 2))
        Case "c"
          Result +20+ Get(RegularExpressionGroup(0, 2))
      EndSelect
    Wend
  EndIf
  
  ProcedureReturn Result
EndProcedure


If CreateRegularExpression(0, "(\w+)\((.*)\)")
  Debug Get("c(b(a()))")
EndIf
And this, as you propose normeus

Code: Select all

Procedure Get(String$)
  Protected Result, Index, Param1, Param2, Param3, Param1$
  
  If ExamineRegularExpression(0, String$)
    While NextRegularExpressionMatch(0)
      Select RegularExpressionGroup(0, 1)
        Case "a"
          Result+5
          
          If ExamineRegularExpression(1, RegularExpressionGroup(0, 2))
            While NextRegularExpressionMatch(1)
              Select RegularExpressionGroup(1, 1)
                Case "a"
                  Result+5
                Case "b"
                  Result-3
                  
                  If ExamineRegularExpression(2, RegularExpressionGroup(1, 2))
                    While NextRegularExpressionMatch(2)
                      Select RegularExpressionGroup(2, 1)
                        Case "a"
                          Result+5
                        Case "b"
                          Result-3
                        Case "c"
                          Result+20
                      EndSelect
                    Wend
                  EndIf
                  
                Case "c"
                  Result+20
              EndSelect
            Wend
          EndIf
          
        Case "b"
          Result-3
          
          If ExamineRegularExpression(1, RegularExpressionGroup(0, 2))
            While NextRegularExpressionMatch(1)
              Select RegularExpressionGroup(1, 1)
                Case "a"
                  Result+5
                Case "b"
                  Result-3
                  
                  If ExamineRegularExpression(2, RegularExpressionGroup(1, 2))
                    While NextRegularExpressionMatch(2)
                      Select RegularExpressionGroup(2, 1)
                        Case "a"
                          Result+5
                        Case "b"
                          Result-3
                        Case "c"
                          Result+20
                      EndSelect
                    Wend
                  EndIf
                  
                Case "c"
                  Result+20
              EndSelect
            Wend
          EndIf
          
        Case "c"
          Result+20
          
          If ExamineRegularExpression(1, RegularExpressionGroup(0, 2))
            While NextRegularExpressionMatch(1)
              Select RegularExpressionGroup(1, 1)
                Case "a"
                  Result+5
                Case "b"
                  Result-3
                  
                  If ExamineRegularExpression(2, RegularExpressionGroup(1, 2))
                    While NextRegularExpressionMatch(2)
                      Select RegularExpressionGroup(2, 1)
                        Case "a"
                          Result+5
                        Case "b"
                          Result-3
                        Case "c"
                          Result+20
                      EndSelect
                    Wend
                  EndIf
                  
                Case "c"
                  Result+20
              EndSelect
            Wend
          EndIf
          
      EndSelect
    Wend
  EndIf
  
  ProcedureReturn Result
EndProcedure


If CreateRegularExpression(0, "(\w+)\((.*)\)") And
   CreateRegularExpression(1, "(\w+)\((.*)\)") And
   CreateRegularExpression(2, "(\w+)\((.*)\)")
  Debug Get("c(b(a()))")
EndIf
if the string is such "c(b(a()))" it works because there is one loop, and if the string is such "c(b(a())) c(b(a()))"?

Re: Nesting NextRegularExpressionMatch()

Posted: Tue Sep 12, 2017 10:45 pm
by normeus
there is a program by @Didelphodon to check your regular expressions

http://www.purebasic.fr/english/viewtop ... 39#p284139

you changed the example code and, the regEx on the new code will match all data:

Code: Select all

"(\w+)\((.*)\)"
matches all of this string:

Code: Select all

"c(b(a())) c(b(a()))"
so the first time you are getting "b(a())) c(b(a())" and not "b(a()))" as you expect.


Norm.

Re: Nesting NextRegularExpressionMatch()

Posted: Wed Sep 13, 2017 6:04 am
by mestnyi
Not this is not a problem as you do not see the essence :(

Code: Select all

"((\w+)(?>[^()]|(?R))|[^,\s])+"
https://regex101.com/

Re: Nesting NextRegularExpressionMatch()

Posted: Wed Sep 13, 2017 7:42 am
by Marc56us
I do not understand exactly what you want to do :?
and why imbricate two functions ExamineRegularExpression() ? (and with the same number)
(This resets the index counter)
If there are two strings to test, this works

Code: Select all

EnableExplicit

Enumeration 
     #RegEx
EndEnumeration

Procedure Extract_All(Input_String.s)
     If Not CreateRegularExpression(#RegEx, "[a-z]b[A-Z]")
          Debug "Error creating #RegEx"
          End
     EndIf
     
     If Not ExamineRegularExpression(#RegEx, Input_String)    
          Debug "Nothing to extract from: " + Input_String
          ProcedureReturn 
     EndIf
     
     Protected i
     Debug ~"\n--- String [" + Input_String + "]" + #CRLF$
     While NextRegularExpressionMatch(#RegEx)
        i + 1  
        Debug "  Match " + i + "   : " + RegularExpressionMatchString(#RegEx)
        Debug "    Position: " + Str(RegularExpressionMatchPosition(#RegEx))
        Debug "    Length  : " + Str(RegularExpressionMatchLength(#RegEx))
        Debug ""
     Wend
     
     FreeRegularExpression(#RegEx)
EndProcedure

Extract_All("abC ABc zbA abc")
Extract_All("SDc abW abc zbR")

End

Code: Select all

--- String [abC ABc zbA abc]

  Match 1   : abC
    Position: 1
    Length  : 3

  Match 2   : zbA
    Position: 9
    Length  : 3


--- String [SDc abW abc zbR]

  Match 1   : abW
    Position: 5
    Length  : 3

  Match 2   : zbR
    Position: 13
    Length  : 3

Re: Nesting NextRegularExpressionMatch()

Posted: Thu Sep 14, 2017 8:50 am
by #NULL
there are recursive regular expressions, the PCRE part is the relevant one for pb: https://stackoverflow.com/a/35271017
also you should use dynamic pb object IDs (#PB_ANY) if you use a function recursively.

Code: Select all

Procedure f(str.s, nesting = 0)
	indent.s = RSet("", 8 * nesting, " ")
	Debug indent + "-------------"
	
	; https://stackoverflow.com/a/35271017
	;Protected regex = CreateRegularExpression(#PB_Any, "\((?>[^)(]+|(?R))*\)")
	
	; added '(' after first '\' and ')' after * to create a group of only inner part, without outer braces
	; added '(.?)' in front to get function name
	Protected regex = CreateRegularExpression(#PB_Any, "(.?)\(((?>[^)(]+|(?R))*)\)")
	
	If regex
		If ExamineRegularExpression(regex, str)
			While NextRegularExpressionMatch(regex)
				Debug indent + "Match: " + RegularExpressionMatchString(regex)
				Debug indent + "- Position: " + Str(RegularExpressionMatchPosition(regex))
				Debug indent + "- Length: " + Str(RegularExpressionMatchLength(regex))
				Debug indent + "- Groups: " + Str(CountRegularExpressionGroups(regex))
				If (CountRegularExpressionGroups(regex) = 2)
					Debug indent + "- name: " + RegularExpressionGroup(regex, 1)
					Debug indent + "- inner: " + RegularExpressionGroup(regex, 2)
					If (RegularExpressionGroupLength(regex, 2) > 0)
						f(RegularExpressionGroup(regex, 2), nesting + 1)
					EndIf
				EndIf
			Wend
		EndIf
	Else
		Debug RegularExpressionError()
	EndIf
EndProcedure

;f("some text(text here(possible text)text(possible text(more text)))end text")
f("c(b(a())) c(b(a())) c(a(), b())")
output for "c(b(a())) c(b(a())) c(a(), b())":

Code: Select all

-------------
Match: c(b(a()))
- Position: 1
- Length: 9
- Groups: 2
- name: c
- inner: b(a())
        -------------
        Match: b(a())
        - Position: 1
        - Length: 6
        - Groups: 2
        - name: b
        - inner: a()
                -------------
                Match: a()
                - Position: 1
                - Length: 3
                - Groups: 2
                - name: a
                - inner: 
Match: c(b(a()))
- Position: 11
- Length: 9
- Groups: 2
- name: c
- inner: b(a())
        -------------
        Match: b(a())
        - Position: 1
        - Length: 6
        - Groups: 2
        - name: b
        - inner: a()
                -------------
                Match: a()
                - Position: 1
                - Length: 3
                - Groups: 2
                - name: a
                - inner: 
Match: c(a(), b())
- Position: 21
- Length: 11
- Groups: 2
- name: c
- inner: a(), b()
        -------------
        Match: a()
        - Position: 1
        - Length: 3
        - Groups: 2
        - name: a
        - inner: 
        Match: b()
        - Position: 6
        - Length: 3
        - Groups: 2
        - name: b
        - inner: