Es wird ein Platzhalter in den Tags unterstützt (*) für den im zu parsenden Text beliebig viele Zeichen stehen können. Als Beispiel einfach mal 3 HTML-Tags für Titel, Bilder und Links auf einer Webseite.
Code: Alles auswählen
EnableExplicit
Structure tagstruct
begin.s ; starttag
stop.s ; endtag
EndStructure
Structure parsedstruct
tag.l ; index aus der linkedlist tag()
wert.s ; wert aus dem geparsten file
EndStructure
Global NewList tag.tagstruct()
Global NewList parsed.parsedstruct()
AddElement(tag())
tag()\begin = "<title>"
tag()\stop = "</title>"
AddElement(tag())
tag()\begin = "<img src="+Chr(34)
tag()\stop = Chr(34)+">"
AddElement(tag())
tag()\begin = "<a href=*>"
tag()\stop = "</a>"
#CompletePart = 1
#StartingPart = 2
#EndingPart = 3
Procedure.s placeholder(str.s,cpltagb.s,returnpart.l)
; str.s : eg. <a href="xyz">link</a>
; cpltagb : Complete tag including placeholder, eg. <a href=*>
; returnpart : Which part should be returned?
; #CompletePart returns the fully extractet tag, the placeholder is
; replaced with the strings found in str. Eg. <a href="xyz">
; #StartingPart returns the left part of the tag without placeholder, eg. <a href=
; #EndingPart returns the right part of the tag without placeholder, eg. >
Define.l pospb, pospe, posp
Select returnpart
Case #CompletePart
pospb = FindString(str,Left(cpltagb,FindString(cpltagb,"*",0)-1),0)
If pospb>0
pospe = FindString(str,Right(cpltagb,Len(cpltagb)-FindString(cpltagb,"*",0)),pospb)
If pospe>0
ProcedureReturn Mid(str,pospb,pospe-pospb+Len(Right(cpltagb,Len(cpltagb)-FindString(cpltagb,"*",0))))
EndIf
EndIf
Case #StartingPart
ProcedureReturn Left(cpltagb,FindString(cpltagb,"*",0)-1)
Case #EndingPart
ProcedureReturn Right(cpltagb,Len(cpltagb)-FindString(cpltagb,"*",0))
EndSelect
EndProcedure
Procedure analyzestring(str.s)
Define.l posb, pose, posp, startpos, currel
Define.s tagl, tagr, tagbeg, tagend
Define.b found
If Len(str)>0
FirstElement(tag())
Repeat
found = #False
If FindString(tag()\begin,"*",0) > 0
tagbeg = placeholder(str,tag()\begin,#CompletePart)
Else
tagbeg = tag()\begin
EndIf
If FindString(tag()\stop,"*",0) > 0
tagend = placeholder(str,tag()\stop,#CompletePart)
Else
tagend = tag()\stop
EndIf
posb = FindString(str,tagbeg,0)
If posb>0
pose = FindString(str,tagend,posb)
If pose>0
found = #True
EndIf
EndIf
If found = #True
AddElement(parsed())
parsed()\tag = ListIndex(tag())
parsed()\wert = Mid(str,posb+Len(tagbeg),(pose-posb)-Len(tagbeg))
str =Mid(str,0,posb+Len(tagbeg)-Len(tagbeg)-1)+Mid(str,posb+Len(tagbeg)+(pose-posb)-Len(tagbeg)+Len(tagend),Len(str))
Debug parsed()\wert
found = #False
EndIf
If FindString(str,placeholder(str,tag()\begin,#StartingPart),0) > 0
analyzestring(str)
EndIf
currel = ListIndex(tag())
NextElement(tag())
Until currel = CountList(tag())-1
EndIf
EndProcedure
Define buff.s = ""
OpenFile(0,"testhtml.html")
Repeat
buff = buff + ReadString(0)
Until Eof(0)
CloseFile(0)
analyzestring(buff)
Code: Alles auswählen
<html>
<head>
<title>Testdocument</title>
</head>
<body>
<a href="testlinka.html">Testlink1</a>
<a href="testlinkgfd.html">Testlink2</a>
<img src="bild1.jpg">
<img src="bild2.bmp">
<img src="bild3.png">
<a href="testlinkvbx.html">Testlink3</a>
<a href="testlinktre.html">Testlink4</a>
<a href="test.html">Testlink5</a>
<a href="testlinklangerhtmldateiname.html">Testlink6</a>
<img src="bild3.png">
<img src="bild3.png">
</body>
</html>