Seite 1 von 1

libXML-TextReader mit PB

Verfasst: 02.12.2005 16:21
von Kiffi
Tach,

nachfolgend möchte ich kurz erläutern, wie man von PB aus auf die
XmlTextReader-Funktionen der libXML zugreifen kann.

Was ist libXML?

libXML ist ein XMLParser in Form einer in C++ geschriebenen DLL und
zeichnet sich durch seine extreme Schnelligkeit aus. Mit dem eingebauten
XmlTextReader ist man in der Lage, vorwärts durch eine XML-Datei zu
browsen. Dabei meldet der XmlTextReader jedes Element, das gefunden
wird. Diese Art der XML-Verarbeitung hat den Vorteil, dass beliebig grosse
XML-Dateien verarbeitet werden können.

Was brauche ich dafür?

Zunächst braucht man die libXML2.DLL. Diese DLL benötigt noch die
iconv.dll und die zlib.dll.

Diese drei Zips bitte herunterladen und entpacken. Die darin enthaltenen
DLLs in den System32-Ordner kopieren.

iconv.dll und zlib.dll sind beispielsweise auch im GTK enthalten. Wer das
GTK (beispielsweise für GIMP) installiert hat, braucht also nur die
libXML2-DLL herunterladen.

Danach kann man folgenden Code laden und ausführen:

(bitte den Pfad zur XML-Datei anpassen!)

Code: Alles auswählen


Enumeration
  #XmlNodeType_None                  ; =  0
  #XmlNodeType_Element               ; =  1
  #XmlNodeType_Attribute             ; =  2
  #XmlNodeType_Text                  ; =  3
  #XmlNodeType_CDATA                 ; =  4
  #XmlNodeType_EntityReference       ; =  5
  #XmlNodeType_Entity                ; =  6
  #XmlNodeType_ProcessingInstruction ; =  7
  #XmlNodeType_Comment               ; =  8
  #XmlNodeType_Document              ; =  9
  #XmlNodeType_DocumentType          ; = 10
  #XmlNodeType_DocumentFragment      ; = 11
  #XmlNodeType_Notation              ; = 12
  #XmlNodeType_Whitespace            ; = 13
  #XmlNodeType_SignificantWhitespace ; = 14
  #XmlNodeType_EndElement            ; = 15
  #XmlNodeType_EndEntity             ; = 16
  #XmlNodeType_XmlDeclaration        ; = 17
EndEnumeration

Global xmlReaderForFile.l
Global xmlTextReaderRead.l
Global xmlTextReaderConstName.l
Global xmlTextReaderConstValue.l
Global xmlTextReaderDepth.l
Global xmlTextReaderNodeType.l
Global xmlTextReaderIsEmptyElement.l
Global xmlTextReaderHasValue.l
Global xmlTextReaderClose.l
Global xmlTextReaderByteConsumed.l
Global xmlTextReaderHasAttributes.l
Global xmlTextReaderAttributeCount.l
Global xmlTextReaderGetAttribute.l
Global xmlTextReaderGetAttributeNo.l

Procedure libxml2_Init()
  Shared DLL.l
  DLL = LoadLibrary_("libxml2.dll")
  If DLL
    xmlReaderForFile = GetProcAddress_(DLL, "xmlReaderForFile")
    xmlTextReaderRead = GetProcAddress_(DLL, "xmlTextReaderRead")
    xmlTextReaderConstName = GetProcAddress_(DLL, "xmlTextReaderConstName")
    xmlTextReaderConstValue = GetProcAddress_(DLL, "xmlTextReaderConstValue")
    xmlTextReaderDepth = GetProcAddress_(DLL, "xmlTextReaderDepth")
    xmlTextReaderNodeType = GetProcAddress_(DLL, "xmlTextReaderNodeType")
    xmlTextReaderIsEmptyElement = GetProcAddress_(DLL, "xmlTextReaderIsEmptyElement")
    xmlTextReaderHasValue = GetProcAddress_(DLL, "xmlTextReaderHasValue")
    xmlTextReaderClose = GetProcAddress_(DLL, "xmlTextReaderClose")
    xmlTextReaderByteConsumed = GetProcAddress_(DLL, "xmlTextReaderByteConsumed")
    xmlTextReaderHasAttributes = GetProcAddress_(DLL, "xmlTextReaderHasAttributes")
    xmlTextReaderAttributeCount = GetProcAddress_(DLL, "xmlTextReaderAttributeCount")
    xmlTextReaderGetAttribute = GetProcAddress_(DLL, "xmlTextReaderGetAttribute")
    xmlTextReaderGetAttributeNo = GetProcAddress_(DLL, "xmlTextReaderGetAttributeNo")
  EndIf
EndProcedure

Procedure.l xmlReaderForFile(a.l, b.l, c.l)
  ProcedureReturn CallCFunctionFast(xmlReaderForFile, a, b, c)
EndProcedure

Procedure.l xmlTextReaderRead(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderRead, a)
EndProcedure

Procedure.l xmlTextReaderConstName(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderConstName, a)
EndProcedure

Procedure.l xmlTextReaderConstValue(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderConstValue, a)
EndProcedure

Procedure.l xmlTextReaderDepth(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderDepth, a)
EndProcedure

Procedure.l xmlTextReaderNodeType(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderNodeType, a)
EndProcedure

Procedure.l xmlTextReaderIsEmptyElement(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderIsEmptyElement, a)
EndProcedure

Procedure.l xmlTextReaderHasValue(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderHasValue, a)
EndProcedure

Procedure.l xmlTextReaderClose(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderClose, a)
EndProcedure

Procedure.l xmlTextReaderByteConsumed(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderByteConsumed, a)
EndProcedure

Procedure.l xmlTextReaderHasAttributes(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderHasAttributes, a)
EndProcedure

Procedure.l xmlTextReaderAttributeCount(a.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderAttributeCount, a)
EndProcedure

Procedure.l xmlTextReaderGetAttribute(a.l, b.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderGetAttribute, a, b)
EndProcedure

Procedure.l xmlTextReaderGetAttributeNo(a.l, b.l)
  ProcedureReturn CallCFunctionFast(xmlTextReaderGetAttributeNo, a, b)
EndProcedure

Procedure libxml2_End()
  Shared DLL.l
  FreeLibrary_(DLL)
EndProcedure

Procedure.s XmlNodeType(NodeType.l)
  
  Select NodeType
    Case #XmlNodeType_Attribute             : ProcedureReturn "XmlNodeType_Attribute"
    Case #XmlNodeType_CDATA                 : ProcedureReturn "XmlNodeType_CDATA"
    Case #XmlNodeType_Comment               : ProcedureReturn "XmlNodeType_Comment"
    Case #XmlNodeType_Document              : ProcedureReturn "XmlNodeType_Document"
    Case #XmlNodeType_DocumentFragment      : ProcedureReturn "XmlNodeType_DocumentFragment"
    Case #XmlNodeType_DocumentType          : ProcedureReturn "XmlNodeType_DocumentType"
    Case #XmlNodeType_Element               : ProcedureReturn "XmlNodeType_Element"
    Case #XmlNodeType_EndElement            : ProcedureReturn "XmlNodeType_EndElement"
    Case #XmlNodeType_EndEntity             : ProcedureReturn "XmlNodeType_EndEntity"
    Case #XmlNodeType_Entity                : ProcedureReturn "XmlNodeType_Entity"
    Case #XmlNodeType_EntityReference       : ProcedureReturn "XmlNodeType_EntityReference"
    Case #XmlNodeType_None                  : ProcedureReturn "XmlNodeType_None"
    Case #XmlNodeType_Notation              : ProcedureReturn "XmlNodeType_Notation"
    Case #XmlNodeType_ProcessingInstruction : ProcedureReturn "XmlNodeType_ProcessingInstruction"
    Case #XmlNodeType_SignificantWhitespace : ProcedureReturn "XmlNodeType_SignificantWhitespace"
    Case #XmlNodeType_Text                  : ProcedureReturn "XmlNodeType_Text"
    Case #XmlNodeType_Whitespace            : ProcedureReturn "XmlNodeType_Whitespace"
    Case #XmlNodeType_XmlDeclaration        : ProcedureReturn "XmlNodeType_XmlDeclaration"
    Default                                 : ProcedureReturn "Unknown XmlNodeType"
  EndSelect
  
EndProcedure

libxml2_Init()

XML$ = "c:\test1.xml" ; Pfad bitte anpassen!

Z1 = ElapsedMilliseconds()

myXmlTextReader = xmlReaderForFile(@XML$, 0, 0 )

If myXmlTextReader
  
  While xmlTextReaderRead(myXmlTextReader)
    
    ElementCounter + 1
    
    ReturnValue = xmlTextReaderNodeType(myXmlTextReader)
    Debug "NodeType: " + XmlNodeType(ReturnValue)
    
    ReturnValue = xmlTextReaderConstName(myXmlTextReader)
    Debug "Name: " + PeekS(ReturnValue)
    
    If xmlTextReaderIsEmptyElement(myXmlTextReader)
      
      Debug "Element is empty"
      
    Else
      
      If xmlTextReaderHasValue(myXmlTextReader)
        ReturnValue = xmlTextReaderConstValue(myXmlTextReader)
        Debug "Value: " + PeekS(ReturnValue)
      EndIf
      
    EndIf
    
    If xmlTextReaderHasAttributes(myXmlTextReader)
      
      ; receive attributes by number
      AttributeCount = xmlTextReaderAttributeCount(myXmlTextReader)
      Debug "AttributeCount: " + Str(AttributeCount)
      For lCounter = 0 To AttributeCount - 1
        Debug "Attribute by number: " + PeekS(xmlTextReaderGetAttributeNo(myXmlTextReader, lCounter))
      Next
      
      ; receive attributes by name
      ReturnValue = xmlTextReaderGetAttribute(myXmlTextReader, @"this")
      If ReturnValue
        Debug "Attribute by name: " + PeekS(ReturnValue)
      EndIf
      
    EndIf
    
    ReturnValue = xmlTextReaderDepth(myXmlTextReader)
    Debug "Depth: " + Str(ReturnValue)
    
    Debug "BytesConsumed: " + Str(xmlTextReaderByteConsumed(myXmlTextReader))
    
    Debug "-------------------------"
    
  Wend
  
  xmlTextReaderClose(myXmlTextReader)
  
EndIf
  
libxml2_End()

Z2 = ElapsedMilliseconds()

Debug "Elapsed Milliseconds: " + Str(Z2-Z1) 
Debug "Counted Elements: " + Str(ElementCounter)

Have fun ... Kiffi

// Edit:

hier noch mal ein Test-Xml:

Code: Alles auswählen

<nodes>
<node this="is an attribute">This is just a simple XML-File</node>
<node><![CDATA[This is just a simple XML-File]]></node>
<!-- This is a comment -->
</nodes>

Verfasst: 26.05.2006 13:46
von mar
Super funktioniert!

Ich hatte es etwas anders versucht, aber bin gescheitert.

Vielen Dank!!!!!!!

mar :D

Verfasst: 26.05.2006 15:30
von MVXA
HTTP Status 404 - /pub/libxml/libxml2-2.6.20.win32.zip
:(

Verfasst: 26.05.2006 15:44
von Kiffi
MVXA hat geschrieben:HTTP Status 404 - /pub/libxml/libxml2-2.6.20.win32.zip
:(
nich taurich sein, sondern lieber mal eine Ebene höher suchen:

http://www.zlatkovic.com/pub/libxml/

Grüße ... Kiffi

Verfasst: 26.05.2006 15:49
von MVXA
Danke, werde ich sofort testen ;).

Verfasst: 26.05.2006 16:24
von winduff
super arbeit kiffi :allright:

Darf ich dir die Schuhe putzen? :mrgreen:

Verfasst: 09.06.2006 13:47
von Kiffi
> Darf ich dir die Schuhe putzen? :mrgreen:

mh, mein Auto müsste mal wieder gewaschen werden... ;-)

OK, hier noch mal der Code von oben in der
ImportC-Variante (ist neu ab PureBasic 4).

Code: Alles auswählen

Enumeration
  #XmlNodeType_None                  ; =  0
  #XmlNodeType_Element               ; =  1
  #XmlNodeType_Attribute             ; =  2
  #XmlNodeType_Text                  ; =  3
  #XmlNodeType_CDATA                 ; =  4
  #XmlNodeType_EntityReference       ; =  5
  #XmlNodeType_Entity                ; =  6
  #XmlNodeType_ProcessingInstruction ; =  7
  #XmlNodeType_Comment               ; =  8
  #XmlNodeType_Document              ; =  9
  #XmlNodeType_DocumentType          ; = 10
  #XmlNodeType_DocumentFragment      ; = 11
  #XmlNodeType_Notation              ; = 12
  #XmlNodeType_Whitespace            ; = 13
  #XmlNodeType_SignificantWhitespace ; = 14
  #XmlNodeType_EndElement            ; = 15
  #XmlNodeType_EndEntity             ; = 16
  #XmlNodeType_XmlDeclaration        ; = 17
EndEnumeration

Import "libxml2\lib\libxml2.lib" ; Evtl. Pfad anpassen
  
  xmlReaderForFile(a.l, b.l, c.l)       As "_xmlReaderForFile"
  xmlTextReaderRead(a.l)                As "_xmlTextReaderRead"
  xmlTextReaderConstName(a.l)           As "_xmlTextReaderConstName"
  xmlTextReaderConstValue(a.l)          As "_xmlTextReaderConstValue"
  xmlTextReaderDepth(a.l)               As "_xmlTextReaderDepth"
  xmlTextReaderNodeType(a.l)            As "_xmlTextReaderNodeType"
  xmlTextReaderIsEmptyElement(a.l)      As "_xmlTextReaderIsEmptyElement"
  xmlTextReaderHasValue(a.l)            As "_xmlTextReaderHasValue"
  xmlTextReaderClose(a.l)               As "_xmlTextReaderClose"
  xmlTextReaderByteConsumed(a.l)        As "_xmlTextReaderByteConsumed"
  xmlTextReaderHasAttributes(a.l)       As "_xmlTextReaderHasAttributes"
  xmlTextReaderAttributeCount(a.l)      As "_xmlTextReaderAttributeCount"
  xmlTextReaderGetAttribute(a.l, b.l)   As "_xmlTextReaderGetAttribute"
  xmlTextReaderGetAttributeNo(a.l, b.l) As "_xmlTextReaderGetAttributeNo"
  
EndImport

Procedure.s XmlNodeType(NodeType.l)
  
  Select NodeType
    Case #XmlNodeType_Attribute             : ProcedureReturn "XmlNodeType_Attribute"
    Case #XmlNodeType_CDATA                 : ProcedureReturn "XmlNodeType_CDATA"
    Case #XmlNodeType_Comment               : ProcedureReturn "XmlNodeType_Comment"
    Case #XmlNodeType_Document              : ProcedureReturn "XmlNodeType_Document"
    Case #XmlNodeType_DocumentFragment      : ProcedureReturn "XmlNodeType_DocumentFragment"
    Case #XmlNodeType_DocumentType          : ProcedureReturn "XmlNodeType_DocumentType"
    Case #XmlNodeType_Element               : ProcedureReturn "XmlNodeType_Element"
    Case #XmlNodeType_EndElement            : ProcedureReturn "XmlNodeType_EndElement"
    Case #XmlNodeType_EndEntity             : ProcedureReturn "XmlNodeType_EndEntity"
    Case #XmlNodeType_Entity                : ProcedureReturn "XmlNodeType_Entity"
    Case #XmlNodeType_EntityReference       : ProcedureReturn "XmlNodeType_EntityReference"
    Case #XmlNodeType_None                  : ProcedureReturn "XmlNodeType_None"
    Case #XmlNodeType_Notation              : ProcedureReturn "XmlNodeType_Notation"
    Case #XmlNodeType_ProcessingInstruction : ProcedureReturn "XmlNodeType_ProcessingInstruction"
    Case #XmlNodeType_SignificantWhitespace : ProcedureReturn "XmlNodeType_SignificantWhitespace"
    Case #XmlNodeType_Text                  : ProcedureReturn "XmlNodeType_Text"
    Case #XmlNodeType_Whitespace            : ProcedureReturn "XmlNodeType_Whitespace"
    Case #XmlNodeType_XmlDeclaration        : ProcedureReturn "XmlNodeType_XmlDeclaration"
    Default                                 : ProcedureReturn "Unknown XmlNodeType"
  EndSelect
  
EndProcedure

XML$ = "c:\test1.xml" ; Pfad bitte anpassen!

Z1 = ElapsedMilliseconds()

myXmlTextReader = xmlReaderForFile(@XML$, 0, 0 )

If myXmlTextReader
  
  While xmlTextReaderRead(myXmlTextReader)
    
    ElementCounter + 1
    
    ReturnValue = xmlTextReaderNodeType(myXmlTextReader)
    Debug "NodeType: " + XmlNodeType(ReturnValue)
    
    ReturnValue = xmlTextReaderConstName(myXmlTextReader)
    Debug "Name: " + PeekS(ReturnValue)
    
    If xmlTextReaderIsEmptyElement(myXmlTextReader)
      
      Debug "Element is empty"
      
    Else
      
      If xmlTextReaderHasValue(myXmlTextReader)
        ReturnValue = xmlTextReaderConstValue(myXmlTextReader)
        Debug "Value: " + PeekS(ReturnValue)
      EndIf
      
    EndIf
    
    If xmlTextReaderHasAttributes(myXmlTextReader)
      
      ; receive attributes by number
      AttributeCount = xmlTextReaderAttributeCount(myXmlTextReader)
      Debug "AttributeCount: " + Str(AttributeCount)
      For lCounter = 0 To AttributeCount - 1
        Debug "Attribute by number: " + PeekS(xmlTextReaderGetAttributeNo(myXmlTextReader, lCounter))
      Next
      
      ; receive attributes by name
      ReturnValue = xmlTextReaderGetAttribute(myXmlTextReader, @"this")
      If ReturnValue
        Debug "Attribute by name: " + PeekS(ReturnValue)
      EndIf
      
    EndIf
    
    ReturnValue = xmlTextReaderDepth(myXmlTextReader)
    Debug "Depth: " + Str(ReturnValue)
    
    Debug "BytesConsumed: " + Str(xmlTextReaderByteConsumed(myXmlTextReader))
    
    Debug "-------------------------"
    
  Wend
  
  xmlTextReaderClose(myXmlTextReader)
  
EndIf
 
Z2 = ElapsedMilliseconds()

Debug "Elapsed Milliseconds: " + Str(Z2-Z1)
Debug "Counted Elements: " + Str(ElementCounter)
Grüße ... Kiffi