It takes so long to read large xml files sometimes hours for large xml.
My data in xml file are predictable with similar data below.
Here's my code to convert xml to csv<element>
<more>more elements</more>
</element>
<element>
<other>other elements</other>
</element>
Code: Select all
Structure table
title.s
content.s
EndStructure
Global NewList table_title.table()
Global NewList table_title_record.table()
Global first_time = #True
Global win_main
Global Text_0, str_xmlpath, btn_xmlpath, Text_0_Copy1, str_exportpath, btn_savepath, btn_convert, prg_conversion, txt_bytes, txt_records
Global total_record = 0
Procedure get_id(table_title.s)
ForEach table_title()
If table_title()\title = table_title
ProcedureReturn ListIndex(table_title())
EndIf
Next
ProcedureReturn -1
EndProcedure
Procedure get_id_record(table_title.s)
ForEach table_title_record()
If table_title_record()\title = table_title
ProcedureReturn ListIndex(table_title_record())
EndIf
Next
ProcedureReturn -1
EndProcedure
Procedure AddNode(*Node,Position)
Static count,*lastnode,*lastchild
count+1
If position=1
*lastnode=*node
EndIf
text.s=GetXMLNodeText(*node)
text.s=Trim(ReplaceString(text.s,#CR$,"")) ; filter carriage return
text.s=Trim(ReplaceString(text.s,#LF$,"")) ; filter line feed
text.s=Trim(ReplaceString(text.s," ","")) ; filter tab
Select XMLNodeType(*node)
Case #PB_XML_Root
; do nothing
Case #PB_XML_Comment
; do nothing
Case #PB_XML_CData
; do nothing
Default
name.s=GetXMLNodeName(*Node)
If ExamineXMLAttributes(*Node)
While NextXMLAttribute(*Node)
name.s + " "+XMLAttributeName(*node)+"="+XMLAttributeValue(*node)+" "
Wend
EndIf
If get_id(name) = -1
AddElement(table_title())
table_title()\title = name
table_title()\content = text
Else
If first_time = #True
CopyList(table_title(), table_title_record())
ForEach table_title_record()
out.s + table_title_record()\title + ","
Next
Debug out
WriteStringN(0, out)
out = ""
first_time = #False
EndIf
first = #True
ForEach table_title()
If first = #True
length = get_id_record(table_title()\title)
out.s = RSet("", length, ",")
first = #False
EndIf
If table_title()\content <> ""
out.s + Chr(34) + table_title()\content + Chr(34) + ","
Else
out.s + table_title()\content + ","
EndIf
Next
Debug out
total_record + 1
WriteStringN(0, out)
SetGadgetText(txt_records, "Total Records: " + Str(total_record))
ClearList(table_title())
AddElement(table_title())
table_title()\title = name
table_title()\content = text
EndIf
EndSelect
If XMLChildCount(*Node)
For child=1 To XMLChildCount(*Node)
*child=ChildXMLNode(*Node,child)
AddNode(*child,position+1)
Next
EndIf
ProcedureReturn *lastnode
EndProcedure
Procedure LoadXMLStream(filename.s,blocksize)
If filename.s
If ReadFile(0,filename.s)
length=Lof(0) ; get the length of the opened file
SetGadgetAttribute(prg_conversion, #PB_ProgressBar_Maximum, length)
*buffer=AllocateMemory(blocksize)
If *buffer ; if the memory buffer is alloced
xml=CatchXML(#PB_Any,*buffer,0,#PB_XML_StreamStart) ; begin the xml stream
Repeat ; repeat until the depth read into the file = the file length
If length<(depth+blocksize) ; if the filesize is < depth+blocksize
buffer=length-depth ; set the buffer to the total file size
depth+buffer ; depth is the amount of data read into the file
Else
buffer=blocksize ; otherwise set it to the blocksize
depth+blocksize
EndIf
; Debug "buffer: "+Str(depth)+" "+Str(length)
ReadData(0,*buffer,buffer) ; read the buffer size of data from the xml file into a memory block
CatchXML(xml,*buffer,buffer,#PB_XML_StreamNext)
SetGadgetState(prg_conversion, depth)
SetGadgetText(txt_bytes, "Bytes Read: " + Str(depth) + "/" + Str(length) )
Until length=depth
CatchXML(xml,*buffer,0,#PB_XML_StreamEnd)
EndIf ; endif *memory
CloseFile(0)
EndIf
EndIf
ProcedureReturn xml
EndProcedure
Procedure Openwin_main(x = 0, y = 0, width = 550, height = 130)
win_main = OpenWindow(#PB_Any, x, y, width, height, "XML to CSV Converter", #PB_Window_SystemMenu | #PB_Window_ScreenCentered)
Text_0 = TextGadget(#PB_Any, 15, 10, 90, 15, "XML Path")
str_xmlpath = StringGadget(#PB_Any, 110, 5, 400, 20, "")
btn_xmlpath = ButtonGadget(#PB_Any, 515, 5, 30, 20, "...")
Text_0_Copy1 = TextGadget(#PB_Any, 15, 30, 90, 15, "CSV Export Path")
str_exportpath = StringGadget(#PB_Any, 110, 30, 400, 20, "")
btn_savepath = ButtonGadget(#PB_Any, 515, 30, 30, 20, "...")
btn_convert = ButtonGadget(#PB_Any, 425, 55, 120, 70, "Convert XML to CSV")
prg_conversion = ProgressBarGadget(#PB_Any, 5, 55, 415, 15, 0, 0)
txt_bytes = TextGadget(#PB_Any, 5, 75, 415, 15, "Bytes Read: 0")
txt_records = TextGadget(#PB_Any, 5, 90, 415, 15, "Total Records: 0")
EndProcedure
Procedure thread_run(dummy=0)
Protected filename.s
Protected save_path.s
DisableGadget(btn_convert, 1)
filename = GetGadgetText(str_xmlpath)
save_path = GetGadgetText(str_exportpath)
If FileSize(filename) > -1
xml=LoadXMLStream(filename,102400)
CreateFile(0, save_path + GetFilePart(filename, #PB_FileSystem_NoExtension) + ".csv")
If Not XMLStatus(xml)
AddNode(MainXMLNode(xml),0)
EndIf
CloseFile(0)
;FormatXML(#XML,#PB_XML_ReFormat)
;Debug SaveXML(#XML,"xml_test.xml")
Else
MessageRequester("Error", "XML file is missing.")
EndIf
DisableGadget(btn_convert, 0)
EndProcedure
Procedure win_main_Events(event)
Select event
Case #PB_Event_CloseWindow
ProcedureReturn #False
Case #PB_Event_Menu
Select EventMenu()
EndSelect
Case #PB_Event_Gadget
Select EventGadget()
Case btn_convert
SetGadgetState(prg_conversion, 0)
SetGadgetText(txt_bytes, "Bytes Read: 0")
SetGadgetText(txt_records, "Total Records: 0")
CreateThread(@thread_run(),0)
Case btn_xmlpath
path.s = OpenFileRequester("Select XML File", "", "XML File (*.xml)", 0)
If path <> ""
SetGadgetText(str_xmlpath, path)
SetGadgetText(str_exportpath, GetPathPart(path))
EndIf
Case btn_savepath
path.s = PathRequester("Select export folder", "")
If path <> ""
SetGadgetText(str_exportpath, path)
EndIf
EndSelect
EndSelect
ProcedureReturn #True
EndProcedure
Openwin_main()
Repeat
Event = WaitWindowEvent()
stay = win_main_Events(Event)
Until stay = #False