Code: Select all
EnableExplicit
; Searches for <a> tags within html page and returns URLs from all matches
; Links$() and array to receive the result
; HTML$ any HTMl text
; RETURN resulting array size
Procedure ExtractLinks (Array Links$(1), HTML$)
Protected exp = CreateRegularExpression(#PB_Any, ~"<a.*?href=.*?<\\/a>", #PB_RegularExpression_NoCase)
Protected res = ExtractRegularExpression(exp, HTML$, Links$()) : FreeRegularExpression(exp)
exp = CreateRegularExpression(#PB_Any, ~"href=.*?\\\".*?\\\"", #PB_RegularExpression_NoCase)
Protected Dim T$(0)
While res
res - 1
ExtractRegularExpression(exp, Links$(res), T$())
Links$(res) = StringField(T$(0), 2, #DOUBLEQUOTE$) ; extract url itself from a tags
Debug Links$(res)
Wend
FreeRegularExpression(exp)
ProcedureReturn ArraySize(Links$()) + 1
EndProcedure
;; Example
Define HTML$ = ~"<A pff \"attributes\" before href=\"./ucp.php?mode=logout&sid=abcdfe\"><img src=\"./styles/subsilver2/theme/images/icon_mini_login.gif\" width=\"12\" height=\"13\" alt=\"*\" /> Logout [ ]</a>"
Dim Out$(0)
Debug "Links found: " + ExtractLinks(Out$(), HTML$)