Regular expressions with links to groups

Everything else that doesn't fall into one of the other PB categories.
AZJIO
Addict
Addict
Posts: 2141
Joined: Sun May 14, 2017 1:48 am

Regular expressions with links to groups

Post by AZJIO »

Using other people's examples, I made changes
+Once
module by eddy

Code: Select all

EnableExplicit

#RegExp = 0

Procedure.s RegexReplace2(RgEx, *Result.string, Replace0$)
	Protected i, CountGr, Pos, Offset = 1
	Protected Result$, Replace$
	Protected NewList item.s()
	Protected LenT, *Point ;, Count

	CountGr = CountRegularExpressionGroups(RgEx)
	; ограничение групп, только обратные ссылки \1 .. \9
	If CountGr > 9
		CountGr = 9
	EndIf

	If ExamineRegularExpression(RgEx, *Result\s)
		While NextRegularExpressionMatch(RgEx)
			Pos = RegularExpressionMatchPosition(RgEx)
			Replace$ = ReplaceString(Replace0$,"\0", RegularExpressionMatchString(RgEx)) ; обратная ссылка \0
			For i = 1 To CountGr
				Replace$ = ReplaceString(Replace$, "\"+Str(i), RegularExpressionGroup(RgEx, i))
			Next
			; item() = часть строки между началом и первым совпадением или между двумя совпадениями + результат подстановки групп
			
			If AddElement(item())
				item() = Mid(*Result\s, Offset, Pos - Offset) + Replace$
			EndIf
			Offset = Pos + RegularExpressionMatchLength(RgEx)
		Wend
		If AddElement(item())
			item() = Mid(*Result\s, Offset)
		EndIf
		
		; Формирования текстового списка
		; Debug "Count = " + Str(ListSize(item()))
; 		Count = ListSize(item())
		LenT = 0
		ForEach item()
			LenT + Len(item()) ; вычисляем длину данных для  вмещения частей текста
		Next
		
		*Result\s = Space(LenT) ; создаём строку забивая её пробелами
		*Point = @*Result\s	   ; Получаем адрес строки
		ForEach item()
			CopyMemoryString(item(), @*Point) ; копируем очередной путь в указатель
		Next
		; Конец => Формирования текстового списка

		FreeList(item()) ; удаляем список, хотя в функции наверно это не требуется
	EndIf
EndProcedure


#RegExp = 0
Define Text.string
Text\s = "C:\ProgramData\Microsoft\Windows\Start Menu\Programs\7-Zip\7-Zip File Manager"
CreateRegularExpression(#RegExp , "(^.{3,11}/|.{11})(.*)(/.{6,27}|.{27})$" )
RegexReplace2(#RegExp, @Text, "\1...\3" )
FreeRegularExpression(#RegExp)
Debug Text\s

Text\s = "56868797689645"
CreateRegularExpression(#RegExp , "(\A\d{1,3}(?=(\d{3})+\z)|\d{3}(?=\d))" )
RegexReplace2(#RegExp, @Text, "\1 " )
FreeRegularExpression(#RegExp)
Debug Text\s
AZJIO
Addict
Addict
Posts: 2141
Joined: Sun May 14, 2017 1:48 am

Re: Regular expressions with links to groups

Post by AZJIO »

The previous example has problems. Look at the last 2 tests that do not work correctly in the previous code. In the new code, I am looking for the positions of the groups and will replace by positions from the end to the beginning.

I also wanted to add a search for groups in this form \{11}, which would allow to have more groups and more accurately identify. That is, if such groups are found, then ignore the \1 syntax.

Code: Select all

; AZJIO
; https://www.purebasic.fr/english/viewtopic.php?p=575871
Structure ReplaceGr
	pos.i
	ngr.i
	group.s
EndStructure

Procedure RegexReplace2(RgEx, *Result.string, Replace0$, Escaped = 0)
	Protected i, CountGr, Pos, Offset = 1
	Protected Replace$
	Protected NewList item.s()
	Protected LenT, *Point
; 	Static RE2
; 	Static RE3
	Protected RE2
	Protected NewList ReplaceGr.ReplaceGr()

	CountGr = CountRegularExpressionGroups(RgEx)
	; ограничение групп, только обратные ссылки \1 .. \9
	If CountGr > 9
		CountGr = 9
	EndIf

	If ExamineRegularExpression(RgEx, *Result\s)

		; Поиск Esc-символов в поле замены регвыр
		If Escaped
			Replace0$ = ReplaceString(Replace0$, "\r", #CR$)
			Replace0$ = ReplaceString(Replace0$, "\n", #LF$)
			Replace0$ = ReplaceString(Replace0$, "\t", #TAB$)
			Replace0$ = ReplaceString(Replace0$, "\f", #FF$)
		EndIf

		; Поиск ссылок на группы в поле замены регвыр
		RE2 = CreateRegularExpression(#PB_Any, "\\\d")
		If RE2
			If ExamineRegularExpression(RE2, Replace0$)
				While NextRegularExpressionMatch(RE2)
					If AddElement(ReplaceGr())
						ReplaceGr()\pos = RegularExpressionMatchPosition(RE2) ; позиция
						ReplaceGr()\ngr = ValD(Right(RegularExpressionMatchString(RE2), 1)) ; номер группы
						ReplaceGr()\group = RegularExpressionMatchString(RE2) ; текст группы
					EndIf
				Wend
			EndIf
			FreeRegularExpression(RE2) ; убрать строку при Static
		EndIf
		If Not ListSize(ReplaceGr())
			*Result\s = ReplaceRegularExpression(RgEx, *Result\s, Replace0$)
			ProcedureReturn
		EndIf
; 		Сортировка по позиции, чтобы делать замены с конца и не нарушались ранее найденные позиции
		SortStructuredList(ReplaceGr(), #PB_Sort_Descending, OffsetOf(ReplaceGr\pos), TypeOf(ReplaceGr\pos))

		While NextRegularExpressionMatch(RgEx)
			Pos = RegularExpressionMatchPosition(RgEx)
			Replace$ = Replace0$

			ForEach ReplaceGr()
				If ReplaceGr()\ngr
					Replace$ = ReplaceString(Replace$, ReplaceGr()\group, RegularExpressionGroup(RgEx, ReplaceGr()\ngr), #PB_String_CaseSensitive, ReplaceGr()\pos, 1)
				Else
					Replace$ = ReplaceString(Replace$, ReplaceGr()\group, RegularExpressionMatchString(RgEx), #PB_String_CaseSensitive, ReplaceGr()\pos, 1) ; обратная ссылка \0
				EndIf
			Next
			; item() = часть строки между началом и первым совпадением или между двумя совпадениями + результат подстановки групп

			If AddElement(item())
				item() = Mid(*Result\s, Offset, Pos - Offset) + Replace$
			EndIf
			Offset = Pos + RegularExpressionMatchLength(RgEx)
		Wend
		If AddElement(item())
			item() = Mid(*Result\s, Offset)
		EndIf

		; Формирования текстового списка
		; Debug "Count = " + Str(ListSize(item()))
; 		Count = ListSize(item())
		LenT = 0
		ForEach item()
			LenT + Len(item()) ; вычисляем длину данных для вмещения частей текста
		Next

		*Result\s = Space(LenT) ; создаём строку забивая её пробелами
		*Point = @*Result\s    ; Получаем адрес строки
		ForEach item()
			CopyMemoryString(item(), @*Point) ; копируем очередной путь в указатель
		Next
		; Конец => Формирования текстового списка

		FreeList(item()) ; удаляем список, хотя в функции наверно это не требуется
	EndIf
EndProcedure


#RegExp = 0
Define Text.string
Text\s = "C:\ProgramData\Microsoft\Windows\Start Menu\Programs\7-Zip\7-Zip File Manager"
CompilerSelect #PB_Compiler_OS
	CompilerCase #PB_OS_Windows
		CreateRegularExpression(#RegExp, "^(.{3,11}\\|.{11})(.*)(\\.{6,27}|.{27})$" )
	CompilerCase #PB_OS_Linux
		CreateRegularExpression(#RegExp, "^(.{3,11}/|.{11})(.*)(/.{6,27}|.{27})$" )
CompilerEndSelect
RegexReplace2(#RegExp, @Text, "\1...\3" )
FreeRegularExpression(#RegExp)
Debug Text\s

Text\s = "56868797689645"
CreateRegularExpression(#RegExp , "(\A\d{1,3}(?=(\d{3})+\z)|\d{3}(?=\d))" )
RegexReplace2(#RegExp, @Text, "\1 " )
FreeRegularExpression(#RegExp)
Debug Text\s

Text\s = "\2aa"
CreateRegularExpression(#RegExp , "(\D+\d+)(\D+)" )
RegexReplace2(#RegExp, @Text, "\2\1" )
FreeRegularExpression(#RegExp)
Debug Text\s

Text\s = "\2aa"
CreateRegularExpression(#RegExp , "(\D+\d+)(\D+)" )
RegexReplace2(#RegExp, @Text, "\0\2" )
FreeRegularExpression(#RegExp)
Debug Text\s

AZJIO
Addict
Addict
Posts: 2141
Joined: Sun May 14, 2017 1:48 am

Re: Regular expressions with links to groups

Post by AZJIO »

I have added groups greater than 9. Now you can specify a group \15 or \{15}, and if there are numbers in the text that will interfere, you can specify the number in curly brackets, for example: \{1}5. Here is a group 1, not 15.

Code: Select all

; AZJIO
; https://www.purebasic.fr/english/viewtopic.php?p=575871
Structure ReplaceGr
	pos.i
	ngr.i
	group.s
EndStructure

Procedure RegexReplace2(RgEx, *Result.string, Replace0$, Escaped = 0)
	Protected i, CountGr, Pos, Offset = 1
	Protected Replace$
	Protected NewList item.s()
	Protected LenT, *Point
; 	Static RE2
; 	Static RE3
	Protected RE2
	Protected RE3
	Protected NewList ReplaceGr.ReplaceGr()

	CountGr = CountRegularExpressionGroups(RgEx)

	If ExamineRegularExpression(RgEx, *Result\s)

		; Search for Escape characters in the replace field
		If Escaped
			Replace0$ = ReplaceString(Replace0$, "\r", #CR$)
			Replace0$ = ReplaceString(Replace0$, "\n", #LF$)
			Replace0$ = ReplaceString(Replace0$, "\t", #TAB$)
			Replace0$ = ReplaceString(Replace0$, "\f", #FF$)
		EndIf

		; Search for group references in the regular expression replacement field
		RE3 = CreateRegularExpression(#PB_Any, "\\\{(\d+)\}")
		If RE3
			If ExamineRegularExpression(RE3, Replace0$)
				While NextRegularExpressionMatch(RE3)
					If AddElement(ReplaceGr())
; 						ReplaceGr()\ngr = Val(Mid(RegularExpressionMatchString(RE3), 3)) ; group number
						ReplaceGr()\ngr = Val(RegularExpressionGroup(RE3, 1)) ; group number
						If ReplaceGr()\ngr <= CountGr
							ReplaceGr()\pos = RegularExpressionMatchPosition(RE3) ; position
							ReplaceGr()\group = RegularExpressionMatchString(RE3) ; group text
						Else
							DeleteElement(ReplaceGr())
						EndIf
; 						Debug ReplaceGr()\ngr
; 						Debug ReplaceGr()\group
; 						Debug "———————————"
					EndIf
				Wend
			EndIf
			FreeRegularExpression(RE3) ; remove a line when Static
		EndIf

		; Search for group references in the regular expression replacement field
		RE2 = CreateRegularExpression(#PB_Any, "\\(\d+)")
		If RE2
			If ExamineRegularExpression(RE2, Replace0$)
				While NextRegularExpressionMatch(RE2)
					If AddElement(ReplaceGr())
; 						ReplaceGr()\ngr = Val(Right(RegularExpressionMatchString(RE2), 1)) ; group number
						ReplaceGr()\ngr = Val(RegularExpressionGroup(RE2, 1)) ; group number
						If ReplaceGr()\ngr <= CountGr
							ReplaceGr()\pos = RegularExpressionMatchPosition(RE2) ; position
							ReplaceGr()\group = RegularExpressionMatchString(RE2) ; group text
						Else
							DeleteElement(ReplaceGr())
						EndIf
; 						Debug ReplaceGr()\ngr
; 						Debug ReplaceGr()\group
; 						Debug "-----------------------"
					EndIf
				Wend
			EndIf
			FreeRegularExpression(RE2) ; remove a line when Static
		EndIf
		If Not ListSize(ReplaceGr()) ; if there are no groups, then we make a replacement.
			*Result\s = ReplaceRegularExpression(RgEx, *Result\s, Replace0$)
			ProcedureReturn
		EndIf
; 		Sorting by position to make replacements from the end and not disturb previously found positions
		SortStructuredList(ReplaceGr(), #PB_Sort_Descending, OffsetOf(ReplaceGr\pos), TypeOf(ReplaceGr\pos))

		While NextRegularExpressionMatch(RgEx)
			Pos = RegularExpressionMatchPosition(RgEx)
			Replace$ = Replace0$

			ForEach ReplaceGr()
				If ReplaceGr()\ngr
					Replace$ = ReplaceString(Replace$, ReplaceGr()\group, RegularExpressionGroup(RgEx, ReplaceGr()\ngr), #PB_String_CaseSensitive, ReplaceGr()\pos, 1)
				Else
					Replace$ = ReplaceString(Replace$, ReplaceGr()\group, RegularExpressionMatchString(RgEx), #PB_String_CaseSensitive, ReplaceGr()\pos, 1) ; обратная ссылка \0
				EndIf
			Next
			; item() = part of the string between the beginning and the first match, or between two matches + the result of group substitution

			If AddElement(item())
				item() = Mid(*Result\s, Offset, Pos - Offset) + Replace$
			EndIf
			Offset = Pos + RegularExpressionMatchLength(RgEx)
		Wend
		If AddElement(item())
			item() = Mid(*Result\s, Offset)
		EndIf

		; Creating a text list
		; Debug "Count = " + Str(ListSize(item()))
; 		Count = ListSize(item())
		LenT = 0
		ForEach item()
			LenT + Len(item()) ; we calculate the length of the data so that it fits in the allocated memory
		Next

		*Result\s = Space(LenT) ; create a string by filling it with spaces
		*Point = @*Result\s    ; We get the address of the line
		ForEach item()
			CopyMemoryString(item(), @*Point) ; copying the next path to the memory pointer
		Next
		; End => Text list formation

		FreeList(item()) ; we delete the list, although it is probably not required in the function
	EndIf
EndProcedure


#RegExp = 0
Define Text.string


Text\s = "abcdefghijklm"
CreateRegularExpression(#RegExp , "(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)" )
RegexReplace2(#RegExp, @Text, "\{13}-\{12}-\{11}-\{10}-\9-\8-\7-\6-\5-\4-\3-\2-\1-" )
FreeRegularExpression(#RegExp)
Debug Text\s


Text\s = "abcdefghijklm"
CreateRegularExpression(#RegExp , "(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)(\w)" )
RegexReplace2(#RegExp, @Text, "\{1}2" )
; RegexReplace2(#RegExp, @Text, "\12" )
FreeRegularExpression(#RegExp)
Debug Text\s
Post Reply