Below are some procedures for using Perl 5 regular expressions from within PureBasic (4.00 or later). The procedures require that RegExpServices.exe (Windows) or RegExpServices.pl (non-Windows, when PB 4.x becomes available) be present in the directory containing the executable using the procedures. You can get RegExpServices.pl, RegExpServices.exe, and the associated readme file here:
http://www.pebblesoft.com/RegExpServices/
Here are the procedures, followed by an example using them:
Code: Select all
;;;;
; RegExpServices.pb: Procedures for using Perl 5 regular expressions.
; See RegExpSplit(), RegExpExec(), RegExpTest(), and RegExpSub() below.
;
; RegExpServices.exe (Windows) or RegExpServices.pl (Linux / Mac OS X) must
; be present in the folder containing a program that uses this module. On
; Linux or Mac OS X, the default version of Perl must be 5.6 or 5.8 (for
; example, 5.8.6). Use "perl -v" at the command line (or "terminal") to
; determine the Perl version.
;
; When specifying a regular expression pattern, do not enclose it within
; any delimiters (such as forward slashes or quotation marks) as you would
; in Perl code, do not include any literal line-break characters within
; the pattern (use \r?\n to match a line break), and do not specify any
; regular expression options (modifiers)--such as /s or /m--at the end of
; the pattern.
;
; Regular expressions in these procedures always use the /s modifier behind
; the scenes; that is, the dot (.) matches any character (except within a
; character class, where it is simply a period). Regular expressions in
; these procedures *do not* use the /m modifier, the /i modifier, or any
; other modifiers other than /s. Therefore, the following practices
; are recommended:
;
; * For "beginning of input", use this: \A
;
; * For "end of input", use this: \z
;
; * For "beginning of line", use this: (?:\A|(?<=\n))
;
; * For "end of line", use this: (?=\r|\n|\z)
;
; * For "any character except a line-break character", use this: [^\r\n]
;
; * For ignoring case, use this at the beginning of the pattern: (?i)
;;;;
EnableExplicit
; The GetTabletPath() procedure returns the path (including the file name)
; of a temporary file named tablet.txt in one of the system's temporary
; directories (or the current directory if the procedure finds no system
; temporary directory). The procedure does not create the file.
Procedure.s GetTabletPath()
Define .s TabletPath
Define .s Tmpdir
Tmpdir = GetEnvironmentVariable("tmpdir")
If Len(Tmpdir) = 0
Tmpdir = GetEnvironmentVariable("tmp")
EndIf
If Len(Tmpdir) = 0
Tmpdir = GetEnvironmentVariable("temp")
EndIf
If Len(Tmpdir) = 0
Tmpdir = "."
EndIf
If #PB_Compiler_OS = #PB_OS_Windows
TabletPath = Tmpdir + "\tablet.txt"
Else
TabletPath = Tmpdir + "/tablet.txt"
EndIf
ProcedureReturn TabletPath
EndProcedure
; The EncodeString() procedure takes an unencoded string and returns a
; version of it using \r to represent a carriage return, \n to represent a
; line feed, and \\ to represent a literal backslash.
Procedure.s EncodeString(UnencodedString.s)
Define .s EncodedString
EncodedString = UnencodedString
EncodedString = ReplaceString(EncodedString, "\", "\\")
EncodedString = ReplaceString(EncodedString, Chr(13), "\r")
EncodedString = ReplaceString(EncodedString, Chr(10), "\n")
ProcedureReturn EncodedString
EndProcedure
; The DecodeString() procedure takes a string using \r to represent a
; carriage return, \n to represent a line feed, and \\ to represent a
; literal backslash, and returns a string in which carriage returns,
; line feeds, and backslashes all appear literally.
Procedure.s DecodeString(CodedString.s)
Define .s DecodedString
Define .s UniqueNumStr
Define .l UniqueNum
UniqueNum = 0
While FindString(CodedString, "[" + Str(UniqueNum) + "]", 1) > 0
UniqueNum + 1
Wend
UniqueNumStr = "[" + Str(UniqueNum) + "]"
DecodedString = CodedString
DecodedString = ReplaceString(DecodedString, "\\", UniqueNumStr)
DecodedString = ReplaceString(DecodedString, "\r", Chr(13))
DecodedString = ReplaceString(DecodedString, "\n", Chr(10))
DecodedString = ReplaceString(DecodedString, UniqueNumStr, "\")
ProcedureReturn DecodedString
EndProcedure
; The GetElementsFromInput() procedure takes a string, and a list to hold
; the results. The procedure clears the list and then stores in the list
; the individual elements from the string.
;
; Each element must be terminated with \z in the string. The \z is not
; stored in the list.
;
; The string must not contain line-break characters.
Procedure GetElementsFromInput(ZString.s, List ResultList.s())
Define .s Char
Define .s NewString
Define .s UniqueNumStr
Define .l UniqueNum
Define .l i, j, k
Define .b JustStartedNewLine
UniqueNum = 0
While FindString(ZSTring, "[" + Str(UniqueNum) + "]", 1) > 0
UniqueNum + 1
Wend
UniqueNumStr = "[" + Str(UniqueNum) + "]"
NewString = ZString
NewString = ReplaceString(NewString, "\\", UniqueNumStr)
NewString = ReplaceString(NewString, "\z", Chr(10))
NewString = ReplaceString(NewString, UniqueNumStr, "\\")
ClearList(ResultList())
JustStartedNewLine = 1
For i = 1 To Len(NewString)
Char = Mid(NewString, i, 1)
If Char = Chr(13)
; Do nothing.
ElseIf Char = Chr(10) And JustStartedNewLine
AddElement(ResultList())
ResultList() = ""
ElseIf Char = Chr(10)
JustStartedNewLine = 1
ElseIf JustStartedNewLine
JustStartedNewLine = 0
AddElement(ResultList())
ResultList() = Char
Else
ResultList() + Char
EndIf
Next i
EndProcedure
; The RegExpSplit() procedure takes a regular expression pattern, a target
; string, and a list to hold the results. The procedure clears the list,
; performs a Perl split() operation using the pattern and the target string,
; and stores the results in the list. The procedure includes any trailing
; empty fields in the results of the split operation.
;
; For more information, refer to documentation on the Perl split() function.
Procedure RegExpSplit(Pattern.s, Target.s, List ResultList.s())
Define .s TabletPath
Define .s TabletContents
Define .s AppFilePath
Define .s AppDirPath
Define .s AppFileName
Define .s StandardInput
Define .s PerlExtension
Define .l Program
Define .l ExitCode
Define .l i, j, k
NewList InputLines.s()
; Get AppFilePath, AppDirPath, AppFileName, and PerlExtension:
AppFilePath = ProgramFilename()
AppDirPath = GetPathPart(AppFilePath)
AppFileName = GetFilePart(AppFilePath)
If #PB_Compiler_OS = #PB_OS_Windows
PerlExtension = ".exe"
Else
PerlExtension = ".pl"
EndIf
; Write the regular-expression operation's specifications to tablet.txt:
TabletPath = GetTabletPath()
TabletContents = ""
TabletContents + "Pattern=" + Pattern + Chr(10)
TabletContents + "TargetString=" + EncodeString(Target) + Chr(10)
If CreateFile(0, TabletPath)
WriteString(0, TabletContents)
CloseFile(0)
Else
MessageRequester(AppFileName, "ERROR: Couldn't open " + Chr(34) + TabletPath + Chr(34) + " for writing! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
; Run the Perl program and capture its output and exit code:
If #PB_Compiler_OS = #PB_OS_Windows
Program = RunProgram(AppDirPath + "RegExpServices.exe", "split " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
Else
Program = RunProgram("perl", Chr(34) + AppDirPath + "RegExpServices.pl" + Chr(34) + " " + "split " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
EndIf
If Not Program
MessageRequester(AppFileName, "ERROR: Couldn't run " + Chr(34) + AppDirPath + "RegExpServices" + PerlExtension + Chr(34) + "! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
StandardInput = ""
While ProgramRunning(Program)
StandardInput + ReadProgramString(Program)
Wend
If Len(StandardInput) = 0
MessageRequester(AppFileName, "ERROR: Couldn't capture the output from " + Chr(34) + AppDirPath + "RegExpServices" + PerlExtension + Chr(34) + "! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
ExitCode = ProgramExitCode(Program)
CloseProgram(Program)
; Process the exit code:
Select ExitCode
Case 0
; Don't worry; be happy.
Case 1
MessageRequester(AppFileName, "ERROR: Invalid list of " + "command-line arguments passed to the RegExpServices " + "Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
Case 2
MessageRequester(AppFileName, "ERROR: Couldn't read " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 3
MessageRequester(AppFileName, "ERROR: Invalid contents in " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 4
MessageRequester(AppFileName, "ERROR: Invalid " + "regular expression:" + Chr(10) + Chr(10) + " " + StandardInput + Chr(10) + Chr(10) + "Quitting now.", #PB_MessageRequester_Ok)
End
Default
MessageRequester(AppFileName, "ERROR: An unknown " + "problem occurred during execution of the " + "RegExpServices Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
EndSelect
; Process StandardInput and place the results in ResultList():
GetElementsFromInput(StandardInput, InputLines())
ClearList(ResultList())
For i = 0 To ListSize(InputLines()) - 1
SelectElement(InputLines(), i)
AddElement(ResultList())
ResultList() = DecodeString(InputLines())
Next i
EndProcedure
; The RegExpExec() procedure takes a regular expression pattern, a target
; string, and a list to hold the results. The procedure clears the list,
; executes the regular expression against the target string, and stores the
; resulting matches in the list. The list does not include extra elements
; to hold the results of capturing parentheses.
Procedure RegExpExec(Pattern.s, Target.s, List ResultList.s())
Define .s TabletPath
Define .s TabletContents
Define .s AppFilePath
Define .s AppDirPath
Define .s AppFileName
Define .s StandardInput
Define .s PerlExtension
Define .l Program
Define .l ExitCode
Define .l i, j, k
NewList InputLines.s()
; Get AppFilePath, AppDirPath, AppFileName, and PerlExtension:
AppFilePath = ProgramFilename()
AppDirPath = GetPathPart(AppFilePath)
AppFileName = GetFilePart(AppFilePath)
If #PB_Compiler_OS = #PB_OS_Windows
PerlExtension = ".exe"
Else
PerlExtension = ".pl"
EndIf
; Write the regular-expression operation's specifications to tablet.txt:
TabletPath = GetTabletPath()
TabletContents = ""
TabletContents + "Pattern=" + Pattern + Chr(10)
TabletContents + "TargetString=" + EncodeString(Target) + Chr(10)
If CreateFile(0, TabletPath)
WriteString(0, TabletContents)
CloseFile(0)
Else
MessageRequester(AppFileName, "ERROR: Couldn't open " + Chr(34) + TabletPath + Chr(34) + " for writing! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
; Run the Perl program and capture its output and exit code:
If #PB_Compiler_OS = #PB_OS_Windows
Program = RunProgram(AppDirPath + "RegExpServices.exe", "exec " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
Else
Program = RunProgram("perl", Chr(34) + AppDirPath + "RegExpServices.pl" + Chr(34) + " " + "exec " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
EndIf
If Not Program
MessageRequester(AppFileName, "ERROR: Couldn't run " + Chr(34) + AppDirPath + "RegExpServices" + PerlExtension + Chr(34) + "! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
StandardInput = ""
While ProgramRunning(Program)
StandardInput + ReadProgramString(Program)
Wend
ExitCode = ProgramExitCode(Program)
CloseProgram(Program)
; Process the exit code:
Select ExitCode
Case 0
; Don't worry; be happy.
Case 1
MessageRequester(AppFileName, "ERROR: Invalid list of " + "command-line arguments passed to the RegExpServices " + "Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
Case 2
MessageRequester(AppFileName, "ERROR: Couldn't read " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 3
MessageRequester(AppFileName, "ERROR: Invalid contents in " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 4
MessageRequester(AppFileName, "ERROR: Invalid " + "regular expression:" + Chr(10) + Chr(10) + " " + StandardInput + Chr(10) + Chr(10) + "Quitting now.", #PB_MessageRequester_Ok)
End
Default
MessageRequester(AppFileName, "ERROR: An unknown " + "problem occurred during execution of the " + "RegExpServices Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
EndSelect
; Process StandardInput and place the results in ResultList():
If StandardInput = "0"
ClearList(ResultList())
ProcedureReturn;
EndIf
GetElementsFromInput(StandardInput, InputLines())
ClearList(ResultList())
For i = 0 To ListSize(InputLines()) - 1
SelectElement(InputLines(), i)
AddElement(ResultList())
ResultList() = DecodeString(InputLines())
Next i
EndProcedure
; The RegExpTest() procedure takes a regular expression pattern and a target
; string. The procedure returns a Boolean value (0 or 1) indicating whether
; the target string contains a match for the regular expression.
Procedure.b RegExpTest(Pattern.s, Target.s)
Define .s TabletPath
Define .s TabletContents
Define .s AppFilePath
Define .s AppDirPath
Define .s AppFileName
Define .s StandardInput
Define .s PerlExtension
Define .l Program
Define .l ExitCode
Define .b Result
; Get AppFilePath, AppDirPath, AppFileName, and PerlExtension:
AppFilePath = ProgramFilename()
AppDirPath = GetPathPart(AppFilePath)
AppFileName = GetFilePart(AppFilePath)
If #PB_Compiler_OS = #PB_OS_Windows
PerlExtension = ".exe"
Else
PerlExtension = ".pl"
EndIf
; Write the regular-expression operation's specifications to tablet.txt:
TabletPath = GetTabletPath()
TabletContents = ""
TabletContents + "Pattern=" + Pattern + Chr(10)
TabletContents + "TargetString=" + EncodeString(Target) + Chr(10)
If CreateFile(0, TabletPath)
WriteString(0, TabletContents)
CloseFile(0)
Else
MessageRequester(AppFileName, "ERROR: Couldn't open " + Chr(34) + TabletPath + Chr(34) + " for writing! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
; Run the Perl program and capture its output and exit code:
If #PB_Compiler_OS = #PB_OS_Windows
Program = RunProgram(AppDirPath + "RegExpServices.exe", "test " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
Else
Program = RunProgram("perl", Chr(34) + AppDirPath + "RegExpServices.pl" + Chr(34) + " " + "test " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
EndIf
If Not Program
MessageRequester(AppFileName, "ERROR: Couldn't run " + Chr(34) + AppDirPath + "RegExpServices" + PerlExtension + Chr(34) + "! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
StandardInput = ""
While ProgramRunning(Program)
StandardInput + ReadProgramString(Program)
Wend
If Len(StandardInput) = 0
MessageRequester(AppFileName, "ERROR: Couldn't capture the output from " + Chr(34) + AppDirPath + "RegExpServices" + PerlExtension + Chr(34) + "! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
ExitCode = ProgramExitCode(Program)
CloseProgram(Program)
; Process the exit code:
Select ExitCode
Case 0
; Don't worry; be happy.
Case 1
MessageRequester(AppFileName, "ERROR: Invalid list of " + "command-line arguments passed to the RegExpServices " + "Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
Case 2
MessageRequester(AppFileName, "ERROR: Couldn't read " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 3
MessageRequester(AppFileName, "ERROR: Invalid contents in " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 4
MessageRequester(AppFileName, "ERROR: Invalid " + "regular expression:" + Chr(10) + Chr(10) + " " + StandardInput + Chr(10) + Chr(10) + "Quitting now.", #PB_MessageRequester_Ok)
End
Default
MessageRequester(AppFileName, "ERROR: An unknown " + "problem occurred during execution of the " + "RegExpServices Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
EndSelect
; Process StandardInput and return the result:
If StandardInput = "0"
Result = 0
Else
Result = 1
EndIf
ProcedureReturn Result
EndProcedure
; The RegExpSub() procedure takes a regular expression pattern, a target
; string, and a replacement string. The procedure performs a substitution
; operation on the target string using the given regular expression pattern
; and replacement string, and returns the resulting string. The procedure
; does not modify the target string.
;
; NOTE: This procedure treats dollar signs within the replacement string
; literally. Variables such as $1, $2, and so forth are not supported.
Procedure.s RegExpSub(Pattern.s, Target.s, Replace.s)
Define .s TabletPath
Define .s TabletContents
Define .s AppFilePath
Define .s AppDirPath
Define .s AppFileName
Define .s StandardInput
Define .s PerlExtension
Define .l Program
Define .l ExitCode
Define .b Result
; Get AppFilePath, AppDirPath, AppFileName, and PerlExtensions:
AppFilePath = ProgramFilename()
AppDirPath = GetPathPart(AppFilePath)
AppFileName = GetFilePart(AppFilePath)
If #PB_Compiler_OS = #PB_OS_Windows
PerlExtension = ".exe"
Else
PerlExtension = ".pl"
EndIf
; Write the regular-expression operation's specifications to tablet.txt:
TabletPath = GetTabletPath()
TabletContents = ""
TabletContents + "Pattern=" + Pattern + Chr(10)
TabletContents + "TargetString=" + EncodeString(Target) + Chr(10)
TabletContents + "ReplacementString=" + EncodeString(Replace) + Chr(10)
If CreateFile(0, TabletPath)
WriteString(0, TabletContents)
CloseFile(0)
Else
MessageRequester(AppFileName, "ERROR: Couldn't open " + Chr(34) + TabletPath + Chr(34) + " for writing! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
; Run the Perl program and capture its output and exit code:
If #PB_Compiler_OS = #PB_OS_Windows
Program = RunProgram(AppDirPath + "RegExpServices.exe", "sub " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
Else
Program = RunProgram("perl", Chr(34) + AppDirPath + "RegExpServices.pl" + Chr(34) + " " + "sub " + Chr(34) + TabletPath + Chr(34), "", #PB_Program_Open|#PB_Program_Read)
EndIf
If Not Program
MessageRequester(AppFileName, "ERROR: Couldn't run " + Chr(34) + AppDirPath + "RegExpServices" + PerlExtension + Chr(34) + "! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
StandardInput = ""
While ProgramRunning(Program)
StandardInput + ReadProgramString(Program)
Wend
ExitCode = ProgramExitCode(Program)
CloseProgram(Program)
; Process the exit code:
Select ExitCode
Case 0
; Don't worry; be happy.
Case 1
MessageRequester(AppFileName, "ERROR: Invalid list of " + "command-line arguments passed to the RegExpServices " + "Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
Case 2
MessageRequester(AppFileName, "ERROR: Couldn't read " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 3
MessageRequester(AppFileName, "ERROR: Invalid contents in " + "tablet.txt! Quitting now.", #PB_MessageRequester_Ok)
End
Case 4
MessageRequester(AppFileName, "ERROR: Invalid " + "regular expression:" + Chr(10) + Chr(10) + " " + StandardInput + Chr(10) + Chr(10) + "Quitting now.", #PB_MessageRequester_Ok)
End
Default
MessageRequester(AppFileName, "ERROR: An unknown " + "problem occurred during execution of the " + "RegExpServices Perl program! Quitting now.", #PB_MessageRequester_Ok)
End
EndSelect
; Process StandardInput and return the result:
StandardInput = DecodeString(StandardInput)
ProcedureReturn StandardInput
EndProcedure
Code: Select all
;;;;
; ContinueLines.pb: A PureBasic program that processes PureBasic source
; code (stored either in a file or in the system clipboard) in order to
; restore the continuity of long lines that have been split by the
; convention of using an underscore (after at least one space) at the end of
; a line as a line-continuation character. The program removes each such
; underscore together with all immediately following consecutive whitespace
; characters including line breaks.
;
; This program takes an optional command-line argument specifying the path
; to a PureBasic source file. If the file name contains an underscore
; immediately preceding the file-name extension (for example,
; MyProgram_.pb), the program writes the processed text to a file with that
; name minus the underscore (for example, MyProgram.pb). Otherwise, the
; program overwrites the original file.
;
; If there is no command-line argument, the program processes and overwrites
; the text content of the system clipboard.
;;;;
EnableExplicit
IncludeFile "RegExpServices.pb"
Define .s Pattern
Define .s LineBreak
Define .l LengthOfLineBreak
Define .s SourceFilePath
Define .l LengthOfFile
Define .s InputText
Define .s OutputText
Define .s OutputFilePath
Pattern = "(?<= )_[\r\n]+\s*"
If #PB_Compiler_OS = #PB_OS_WINDOWS
LineBreak = Chr(13) + Chr(10)
LengthOfLineBreak = 2
Else
LineBreak = Chr(10)
LengthOfLineBreak = 1
EndIf
SourceFilePath = ProgramParameter()
If Len(SourceFilePath)
If ReadFile(0, SourceFilePath)
LengthOfFile = Lof(0)
InputText = ""
While Eof(0) = 0
InputText + ReadString(0)
InputText + LineBreak
Wend
CloseFile(0)
If Len(InputText) > LengthOfFile
InputText = RSet(InputText, Len(InputText) - LengthOfLineBreak)
EndIf
OutputText = RegExpSub(Pattern, InputText, "")
Else
MessageRequester("ContinueLines", "ERROR: Couldn't open " + Chr(34) + SourceFilePath + Chr(34) + " for reading! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
OutputFilePath = RegExpSub("_(?=\.[^\.]+\z)", SourceFilePath, "")
If CreateFile(0, OutputFilePath)
WriteString(0, OutputText)
CloseFile(0)
Else
MessageRequester("ContinueLines", "ERROR: Couldn't open " + Chr(34) + OutputFilePath + Chr(34) + " for writing! Quitting now.", #PB_MessageRequester_Ok)
End
EndIf
Else
InputText = GetClipboardText()
OutputText = RegExpSub(Pattern, InputText, "")
SetClipboardText(OutputText)
EndIf
MessageRequester("ContinueLines", "ContinueLines is finished!")
End