findInString(), ignore string literals and comments
Posted: Sun Jul 15, 2018 3:13 pm
A prodcedure to search a substring in a string that might be a line of PureBasic code, so it has the option to ignore occurrences in string literals and comments.
For example the string "IncludeFile" will by default be found only in the last of the following 3 lines:
It does not handle case-insensitivity and word boundaries, but i don't want to create a jack of all trades device (if that's proper english).
For example the string "IncludeFile" will by default be found only in the last of the following 3 lines:
Code: Select all
; IncludeFile "MyFile"
myString.s = ~"IncludeFile \"MyFile\""
IncludeFile "MyFile"
Code: Select all
EnableExplicit
DeclareModule common
EnableExplicit
Declare.i findInString(*inStr.Character, *findStr.Character, searchInStrings = #False, searchInComments = #False)
EndDeclareModule
Module common
EnableExplicit
Procedure.i findInString(*inStr.Character, *findStr.Character, searchInStrings = #False, searchInComments = #False)
#_DQ = '"' ; "
#_SQ = 39 ; '
#_BSL = '\' ; \
Protected inStringDQ ; "x"
Protected inStringSQ ; 'x'
Protected inStringDQEsc ; ~"x"
Protected inString ; (any)
Protected inStringPrevious
Protected inComment ; .. ; ...
Protected i
Protected *c.Character
Protected *cTmp.Character
Protected cntBSL
Protected iTmp
Protected *cBefore.Character
Protected *cFind.Character
If *inStr And *findStr And *findStr\c <> 0
*c = *inStr
*cBefore = 0
i = 0
While *c\c <> 0
inStringPrevious = inString
If searchInStrings = #False
; for excluding string content we need to detect string openings/closings
If Not inString
If Not inComment
; start single quote string
If *c\c = #_SQ ; opening '..
inString = #True
inStringSQ = #True
; start double quote string
ElseIf *c\c = #_DQ ; opening ".. or ~"..
inString = #True
If *cBefore And *cBefore\c = '~' : inStringDQEsc = #True ; escape string ~"..
Else : inStringDQ = #True ; normal string "..
EndIf
EndIf
EndIf
Else
; (inString)
If inStringDQEsc
If *c\c = #_DQ
; check if DQ is escaped by preceding backslash.
; backslashes can be escaped themself by a preceding backslash, so the
; DQ is escaped if preceded by an odd number of continuous backslashes.
If i > 0
*cTmp = *c - SizeOf(Character)
iTmp = i - 1
cntBSL = 0
Repeat
If *cTmp\c = #_BSL
cntBSL + 1
Else
Break
EndIf
*cTmp - SizeOf(Character)
iTmp - 1
Until iTmp < 0
If cntBSL % 2 = 0 ; even number of preceding backslashed, DQ is not escaped, close string
inStringDQEsc = #False
inString = #False
EndIf
Else
; just here for logic. if not i>0 we wouldn't be in a string anyway.
inStringDQEsc = #False
inString = #False
EndIf
EndIf
ElseIf inStringDQ
If *c\c = #_DQ
inStringDQ = #False
inString = #False
EndIf
ElseIf inStringSQ
If *c\c = #_SQ
inStringSQ = #False
inString = #False
EndIf
EndIf
EndIf
EndIf
If Not inString
If *c\c = ';' ; begin of comment
inComment = #True
If Not searchInComments
; finish if comment reached and not searched in
ProcedureReturn -1
EndIf
EndIf
EndIf
If ((Not inString) Or (searchInStrings)) And ((Not inComment) Or (searchInComments))
; first char matches
If *c\c = *findStr\c
; if findStr starts with the actual character " or ' and searchInStrings is disabled then we don't want
; a match at a string closing delimiter (inString will be already be reset at this point)
If ((*findStr\c = #_DQ) Or (*findStr\c = #_SQ)) And (Not inString And inStringPrevious And Not searchInStrings)
; (ignore)
Else
; compare all chars
*cTmp = *c
*cFind = *findStr
While (*cTmp\c = *cFind\c) And (*cTmp\c <> 0)
*cTmp + SizeOf(Character)
*cFind + SizeOf(Character)
Wend
If *cFind\c = 0
; reached end of find, all chars matched, return start pos.
; null or empty inStr/findStr are handled by If/While at the beginning of the procedure and will return -1.
ProcedureReturn i
EndIf
EndIf
EndIf
EndIf
*cBefore = *c
*c + SizeOf(Character)
i + 1
Wend
EndIf
ProcedureReturn -1
EndProcedure
EndModule
CompilerIf #PB_Compiler_IsMainFile
Macro DQ
"
EndMacro
Macro assert(exp1, exp2)
If Not ((exp1) = (exp2))
Debug exp1
Debug exp2
DebuggerError("assert failed at line " + Str(#PB_Compiler_Line))
;DebuggerError(DQ#exp1#DQ + " = " + Str(exp1))
EndIf
EndMacro
If 1 ;{
Define s.s = ""
Define p
s = #Null$ : assert(common::findInString(@ s, @ ";"), -1)
s = "" : assert(common::findInString(@ s, @ ";"), -1)
s = "" : assert(common::findInString(@ s, @ ""), -1)
s = "a" : assert(common::findInString(@ s, @ ""), -1)
; if not searching in comments then even the comment opener ';' won't be found
s = ";" : assert(common::findInString(@ s, @ ";"), -1)
s = "abc ; comment" : assert(common::findInString(@ s, @ ";", #True, #True), 4)
s = "abc ; comment" : assert(common::findInString(@ s, @ "com", #True, #True), 6)
s = "abc ; comment and ; more comment" : assert(common::findInString(@ s, @ ";", #True, #True), 4)
s = ";" : assert(common::findInString(@ s, @ ";", #True, #True), 0)
s = "" : assert(common::findInString( 0, @ "", #True, #True), -1)
s = "" : assert(common::findInString(@ s, 0, #True, #True), -1)
s = "" : assert(common::findInString( 0, 0, #True, #True), -1)
s = "" : assert(common::findInString(@ s, @ "", #True, #True), -1)
s = " 'x' ; x" : assert(common::findInString(@ s, @ "x", #False, #False), -1)
s = " 'x' ; x" : assert(common::findInString(@ s, @ "x", #False, #True), 7)
s = " 'x' ; x" : assert(common::findInString(@ s, @ "x", #True, #True), 2)
s = " 'x' ; x" : assert(common::findInString(@ s, @ "x", #True, #False), 2)
s = " y ; 'x' " : assert(common::findInString(@ s, @ "x", #True, #False), -1)
s = ~" y ; \"x\" " : assert(common::findInString(@ s, @ "x", #True, #False), -1)
; if not searching in strings then even a string opener won't be found
s = ~"\"" : assert(common::findInString(@ s, @ ~"\"", #False), -1)
s = ~"\"" : assert(common::findInString(@ s, @ ~"\"", #True), 0)
; if not searching in strings then a string closing delimiter won't be found
s = ~"\"abc\" + str" : assert(common::findInString(@ s, @ ~"\""), -1)
; except if searching and occurring in comments
s = ~"\"abc\" + str ; \"xyz\"" : assert(common::findInString(@ s, @ ~"\"", #False, #True), 14)
Debug "##################### 1"
s = ~"abc + \"xyz;123\" ; comment"
Debug s
p = common::findInString(@ s, @";", #False, #True)
Debug p
assert(p, 16)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
p = common::findInString(@ s, @";", #True, #True)
Debug p
assert(p, 10)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
Debug "##################### 2"
s = "abc + ';' + xyz ; comment"
Debug s
p = common::findInString(@ s, @";", #False, #True)
Debug p
assert(p, 16)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
p = common::findInString(@ s, @";", #True, #True)
Debug p
assert(p, 7)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
Debug "##################### 3"
s = ~"abc + \"';'\" + xyz ; comment"
Debug s
p = common::findInString(@ s, @";", #False, #True)
Debug p
assert(p, 18)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
p = common::findInString(@ s, @";", #True, #True)
Debug p
assert(p, 8)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
Debug "##################### 4"
s = ~"abc + ~\"\\\"';'\\\"\" + xyz ; comment"
Debug s
p = common::findInString(@ s, @";", #False, #True)
Debug p
assert(p, 23)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
p = common::findInString(@ s, @";", #True, #True)
Debug p
assert(p, 11)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
Debug "##################### 5"
s = ~"abc + ~\"\\\";\\\"\" + xyz ; comment"
Debug s
p = common::findInString(@ s, @";", #False, #True)
Debug p
assert(p, 21)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
p = common::findInString(@ s, @";", #True, #True)
Debug p
assert(p, 10)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
Debug "##################### 6"
s = ~"';' ; comment ; more comment"
Debug s
p = common::findInString(@ s, @";", #False, #True)
Debug p
assert(p, 4)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
p = common::findInString(@ s, @";", #True, #True)
Debug p
assert(p, 1)
Debug Mid(s, p+1) ; (mid is 1-based, not 0-based)
Debug ""
Debug "##################### 7"
s = ~"; IncludeFile \"MyFile\""
p = common::findInString(@ s, @ "IncludeFile")
Debug s ; ; IncludeFile "MyFile"
Debug p ; -1
assert(p, -1)
Debug "##################### 8"
s = ~"myString.s = ~\"IncludeFile \\\"MyFile\\\"\""
p = common::findInString(@ s, @ "IncludeFile")
Debug s ; myString.s = ~"IncludeFile \"MyFile\""
Debug p ; -1
assert(p, -1)
Debug "##################### 9"
s = ~"IncludeFile \"MyFile\""
p = common::findInString(@ s, @ "IncludeFile")
Debug s ; IncludeFile "MyFile"
Debug p ; 0
assert(p, 0)
If 1
Debug "##################### 10"
; find x in this file outside of strings and comments
If ReadFile(0, #PB_Compiler_File)
ReadStringFormat(0)
While Not Eof(0)
Define line.s = ReadString(0)
Define pos = common::findInString(@line, @"x")
If pos >= 0
Debug "found x at pos " + Str(pos) + " in line: " + line
EndIf
Wend
CloseFile(0)
Else
Debug "can't read file: " + #PB_Compiler_File
EndIf
EndIf
;}
EndIf
CompilerEndIf
;