Page 1 of 1

Mork Parser

Posted: Tue Jan 03, 2017 4:59 pm
by xakep
The mork format is used by Mozilla Firefox/Thunderbird/SeaMonkey for store URL history, form history, bookmarks and address book.

My implementation use no regex(due to low speed and no real portability).

Code: Select all

;Mozilla Mork Basic Parser
;Version: 0.1.2
;Date: 04.01.2017
;Coded by: xakep

;Changelog:
;03.01.2017 - first cut 
;04.01.2017 - fixed an bug with reinitialization of dictionaryes
;04.01.2017 - latter tonight:
;+fixed an bug with parsing integer values
;+fixed an bug with parsing multiple rows 

EnableExplicit

Enumeration
  #MOZ_FILE
EndEnumeration

Structure Dictionary
  Key.s
  Value.s
EndStructure

Structure XML_TABLE
  Map Rows.s()
EndStructure

Global Dim cDict.Dictionary(0) ;Column Dictionary
Global Dim aDict.Dictionary(0) ;Atom Dictionary
Global Dim mTable.XML_TABLE(0) ;XML-style output

Global DictSize.l
Global iCountTables.l

Procedure.l GetList_Between(String.s, Left.s, Right.s, List inList.s())
  Define LenString.l, LeftMarker.l, RightMarker.l, NewString.s, LeftL.l, MaxLoops.l, cLoop.l
  
  MaxLoops = 999 ;Just to be safe
  
  Repeat
    cLoop + 1
    
    If NewString
      String = NewString
    EndIf
    
    LenString = Len(String)
    LeftL = Len(Left)
    
    If LenString And LeftL
    
      LeftMarker = FindString(String, Left)
    
      If LeftMarker>0
      
        RightMarker = FindString(String, Right, LeftMarker+1)
      
        If RightMarker>0

          If AddElement(inList()) <> 0
            inList() = Mid(String, LeftMarker+LeftL, (RightMarker-LeftMarker)-LeftL)
          Else
            NewString = ""
          EndIf
          
          NewString = Right(String, LenString - RightMarker)
          
          If Len(NewString) < (LeftL * 2) ;To be extra safe =)
            NewString = ""
          EndIf

          Else
            NewString = ""
        EndIf
      Else 
        NewString = ""
      EndIf
    Else
      NewString = ""
    EndIf

  Until NewString = "" Or cLoop => MaxLoops 
 
EndProcedure


Procedure explodeStringArray(Array a.s(1), xString.s, sDelimeter.s)
  Define count, i
  count = CountString(xString, sDelimeter)
  If count
    
    count + 1
    Dim a.s(count - 1)
    
    For i = 1 To count
      a(i - 1) = StringField(xString, i, sDelimeter)
    Next
  Else
    FreeArray(a())
  EndIf

EndProcedure

Procedure.s ArrayFindString(sString.s,  Array inArray.Dictionary(1))
  Define i.i, iFound.i
  
  For i = 0 To ArraySize(inArray())
    If inArray(i)\Key = sString 
      iFound = i
      Break
    EndIf
  Next
  
  If iFound > 0
    ProcedureReturn inArray(iFound)\Value
  Else
    ProcedureReturn sString
  EndIf
EndProcedure

Procedure Mork_Save_Cell(nTable.l, Key.s, Value.s, integerRecord.l)
  Define nKey.s, nValue.s, cSize.l
  
  If Key
    
    nKey = ArrayFindString(Key, cDict())
    
    If nKey
      
      If integerRecord = 0
        nValue = ArrayFindString(Value, aDict())
      Else
        nValue = Value
      EndIf
      
      If nValue
        If nTable > 0
            ReDim mTable(nTable)
        EndIf
        mTable(nTable)\Rows(nKey) = nValue
      EndIf
    
    
    EndIf
  EndIf
EndProcedure

Procedure Mork_ParseRows(WholeTable.s)
  Define Key.s, myCells.s, NewResults.s, pRow.s, RowsSize.l, CellsSize.l, Step1.l, Step2.l, Plus.l
  
  NewList pRow.s()
  
  GetList_Between(WholeTable, "[", "]", pRow()) ;Get Rows  regex: (-?)\s*\[(.+?)((\(.+?\)\s*)*)\]
  
  RowsSize = ListSize(pRow())
  
  If RowsSize > 0
    
    ResetList(pRow())
    While NextElement(pRow())
      
      NewList NewResults.s()
        
      GetList_Between(pRow(), "(", ")", NewResults()) ;Get Cells Text (A1=18) regex: (\(.+?\))
      CellsSize = ListSize(NewResults())
      
      If CellsSize > 0
        Dim explode.s(0)
        Dim Explode2.s(0)
        
        ResetList(NewResults())
        Plus = 0
        While NextElement(NewResults())

            explodeStringArray(explode(), NewResults(), "^")
            ;Parse string records
            Step1 = ArraySize(explode())
            
            If Step1 > -1
              
              If Step1 = 2
                myCells = explode(2)
                If myCells
                  Key = explode(1)
                  If Key
                    Plus = 1
                    
                    Mork_Save_Cell(iCountTables, Key, myCells, 0)
                  EndIf
                EndIf
              Else
                explodeStringArray(Explode2(), explode(1), "=")
                ;Parse integer records
                
                Step2 = ArraySize(Explode2())
                
                If Step2 > -1
                  If Step2 = 1
                    myCells = Explode2(1)
                  
                    If myCells
                      Key = Explode2(0)
                      If Key
                        ;Plus = 1

                        Mork_Save_Cell(iCountTables, Key, myCells, 1)
                      EndIf
                    EndIf
                  
                  EndIf
                EndIf
                Step2 = 0
              EndIf
              Step1 = 0
            EndIf
          Wend
          
          myCells = "" : Key = ""
          
          iCountTables + Plus
          Plus = 0 : CellsSize = 0
          FreeArray(explode())
          FreeArray(Explode2())
          FreeList(NewResults())
        EndIf
      Wend
    
    FreeList(pRow())
    RowsSize = 0
  EndIf

EndProcedure

Procedure Mork_Parse_Dictionary(WholeDictionary.s, Array OutArray.Dictionary(1))
  Define i.i, Key.s, Cells.s, Dict.s, dSizeNow.l, explode.s, Minus2.l, rI.l
  
  NewList Dict.s()
  GetList_Between(WholeDictionary, "(", ")", Dict())
  
  dSizeNow = ListSize(Dict())
  
  If dSizeNow > 0
  
    Dim explode.s(0)
    ReDim OutArray(DictSize + dSizeNow)
    
      rI = 0
      ResetList(Dict())
      While NextElement(Dict())
        
        explodeStringArray(explode(), Dict(), "=")
        
        If ArraySize(explode()) = 1

          Key = explode(0)
          Cells = explode(1)
          
          If Len(Key) = 2
            OutArray(DictSize + rI)\Key = Key
            OutArray(DictSize + rI)\Value = Cells
            
            rI + 1
          EndIf
            
        EndIf
      Wend
    
    DictSize = DictSize + rI
    
    FreeArray(explode())
    FreeList(Dict())
  EndIf
  
EndProcedure

Procedure.i Mork_Finish()
  Define i.i
  
  If ArraySize(cDict()) > -1
    FreeArray(cDict())
  EndIf
  
  If ArraySize(aDict()) > -1
    FreeArray(aDict())
  EndIf
  
  If ArraySize(mTable()) > -1
    For i = 0 To ArraySize(mTable())
      If MapSize(mTable(i)\Rows()) > 0
        FreeMap(mTable(i)\Rows())
      EndIf
    Next

    FreeArray(mTable())
  EndIf  
  
  DictSize = 0 : iCountTables = 0
EndProcedure


Procedure.l Mork_Parse(sString.s)
  Define EndParse.i, MaxPos.i, aResult.s, NbResults, i.i, SizeNow.i, Atom.s, Column.s, Table.s
  
  If sString
    Dim cDict.Dictionary(0)
    Dim aDict.Dictionary(0)
    Dim mTable.XML_TABLE(0)
    
    sString = ReplaceString(sString, "  ", "")
    
    ;Parse column dictionary
    NewList Column.s()
    
    GetList_Between(sString, "(f=iso-8859-1)", ">", Column()) ;regex: <\s*<\(a=c\)>\s*(?:\/\/)?\s*(\(.+?\))\s*>
    
    If ListSize(Column()) > 0
      DictSize = 0
      
      ResetList(Column())
      
      While NextElement(Column())
        Mork_Parse_Dictionary(Column(), cDict())
      Wend
      
      FreeList(Column())
    Else
      Mork_Finish()
      ProcedureReturn #False
    EndIf
    
    ;Parse atom dictionary
    NewList Atom.s()
    
    GetList_Between(sString, "<(", ")>", Atom())
    
    If ListSize(Atom()) > 0
      DictSize = 0
      ResetList(Atom())
      
      While NextElement(Atom())
        Mork_Parse_Dictionary("(" + Atom() + ")", aDict())
      Wend
      
      FreeList(Atom())
    Else
      Mork_Finish()
      ProcedureReturn #False
    EndIf
    
   
    DictSize = 0
    SortStructuredArray(cDict(), #PB_Sort_Ascending, OffsetOf(Dictionary\Key), TypeOf(Dictionary\Key))
    SortStructuredArray(aDict(), #PB_Sort_Ascending, OffsetOf(Dictionary\Key), TypeOf(Dictionary\Key))
    
    ;Parse a table. regex: \{-?(\d+):\^(..)\s*\{\(k\^(..):c\)\(s=9u?\)\s*(.*?)\}\s*(.+?)\}
    NewList Table.s()
    
    sString = ReplaceString(sString, "(f=iso-8859-1)", "")
    
    GetList_Between(sString, ">{", "]}", Table())
    
    If ListSize(Table()) > 0
      ResetList(Table())
      
      While NextElement(Table())
        Mork_ParseRows("{" + Table() + "]}")
      Wend
      
      FreeList(Table())
    Else
      Mork_Finish()
      ProcedureReturn #False
    EndIf
    
    NewList Table.s()
    GetList_Between(sString, "@{", "]}", Table()) ;Process extra tables

    If ListSize(Table()) > 0
      ResetList(Table())
     
      While NextElement(Table())
        Mork_ParseRows("{" + Table() + "]}")
      Wend
     
      FreeList(Table())
    EndIf
    
  Else
    Mork_Finish()
    ProcedureReturn #False
  EndIf
  
  ProcedureReturn #True
EndProcedure

Procedure.s Mork_Open(sFile.s)
  Define MorkHeader.s, HeaderL.l, *ReadMem, oString.s
    
  If OpenFile(#MOZ_FILE, sFile, #PB_File_SharedRead)
    
    FileSeek(#MOZ_FILE, 0)
    
    MorkHeader = "// <!-- <mdb:mork:z v=" + #DQUOTE$ + "1.4" + #DQUOTE$ + "/> -->"
    HeaderL = Len(MorkHeader)
    
    *ReadMem = AllocateMemory(HeaderL + SizeOf(Character))
    
    If *ReadMem
      ReadData(#MOZ_FILE, *ReadMem, HeaderL)
      
      If PeekS(*ReadMem, -1, #PB_Ascii) <> MorkHeader
        CloseFile(#MOZ_FILE)
        FreeMemory(*ReadMem)
        ProcedureReturn ""
      EndIf
      
      FreeMemory(*ReadMem)
    Else
      ProcedureReturn ""
    EndIf
    
    While Eof(#MOZ_FILE) = #False
        oString + ReadString(#MOZ_FILE, #PB_Ascii)
    Wend
      
    MorkHeader = "" : HeaderL = 0
    
    ProcedureReturn oString
  Else
    ProcedureReturn ""
  EndIf
  
EndProcedure

Procedure.l Mork_Close()
  CloseFile(#MOZ_FILE)
  DictSize = 0 : iCountTables = 0
EndProcedure


Example of usage:

Code: Select all

;Get thunderbird address book
Procedure.s GetSpecialFolder(csidl.l)
  Define ret1.l, Length.l, *Buffer, rString.s
  
  Length = #MAX_PATH
  *Buffer = AllocateMemory(Length + SizeOf(Character))
  
  If *Buffer
    
    If SHGetSpecialFolderPath_(0, *Buffer, csidl, 0)
      rString = PeekS(*Buffer)
    EndIf 
    
    FreeMemory(*Buffer)
  EndIf
  
    ProcedureReturn rString
  EndProcedure
  
Define i.i, myMork.s, Rows_Now.s, tSize.l, Mork_File.s
  
Mork_File = GetSpecialFolder(#CSIDL_APPDATA) + "\Thunderbird\Profiles\YOURPROFILE\abook.mab"
  
If FileSize(Mork_File) <> -1
  myMork = Mork_Open(Mork_File)
  If myMork

    If Mork_Parse(myMork)
      tSize = ArraySize(mTable())
     
      If tSize > -1
        For i = 0 To tSize
          If MapSize(mTable(i)\Rows()) > 0
            If FindMapElement(mTable(i)\Rows(), "PrimaryEmail")
              Rows_Now = Rows_Now + mTable(i)\Rows("PrimaryEmail") + "," + mTable(i)\Rows("DisplayName") + #CRLF$
            EndIf
          EndIf
        Next
      EndIf

      Debug Rows_Now
      
      Mork_Finish()
    EndIf
  
    Mork_Close()
    myMork = ""
  EndIf
EndIf
Keep in mind this is my first module, don't be too harsh on me =)

Please report bugs there.

Re: Mork Parser

Posted: Tue Jan 03, 2017 6:35 pm
by uweb
Greate!
Even so a small hint:

Code: Select all

Define i.i, myMork.s, Rows_Now.s, tSize.l

myMork = Mork_Open(OpenFileRequester(">>>>>>>>   i.e. \Thunderbird\Profiles\random-name.default  OR  ...random-name.profil-name  OR  \ThunderbirdPortable\Data\profile", "D:\ThunderbirdPortable\Data\profile\abook.mab", "thunderbird address (abook.mab)", 0))

If myMork
  
  If Mork_Parse(myMork)
    tSize = ArraySize(mTable())
    
    If tSize > -1
      For i = 0 To tSize
        If MapSize(mTable(i)\Rows()) > 0
          If FindMapElement(mTable(i)\Rows(), "PrimaryEmail")
            Rows_Now = Rows_Now + mTable(i)\Rows("PrimaryEmail") + "," + mTable(i)\Rows("DisplayName") + #CRLF$
          EndIf
        EndIf
      Next
    EndIf
    
    Debug Rows_Now
    
    Mork_Finish()
  EndIf
  
  Mork_Close()
  myMork = ""
EndIf

Re: Mork Parser

Posted: Tue Jan 03, 2017 8:41 pm
by Mistrel
xakep wrote:My implementation use no regex(due to low speed and no real portability).
PureBasic's regex library links against PCRE which is an open source implementation of the Perl regular expression. It's both widely compatible with other languages and is a standard for many languages.

I wouldn't worry about portability and the flexibility of expressions will make future maintenance much easier than trying to evaluate branching logic. Less is not always more.

Re: Mork Parser

Posted: Wed Jan 04, 2017 12:21 am
by xakep
Mistrel wrote:
xakep wrote:My implementation use no regex(due to low speed and no real portability).
PureBasic's regex library links against PCRE which is an open source implementation of the Perl regular expression. It's both widely compatible with other languages and is a standard for many languages.

I wouldn't worry about portability and the flexibility of expressions will make future maintenance much easier than trying to evaluate branching logic. Less is not always more.
In the case of mork format, the structure won't be changed, as the developing of this format is over.
So in mork case, getting strings between two keywords is alot more fast than regex: \{-?(\d+):\^(..)\s*\{\(k\^(..):c\)\(s=9u?\)\s*(.*?)\}\s*(.+?)\}
Also, regex will add +100kb to the size of binary, for who care.

Ofcourse you are perfectly right about other formats.

@uweb
Thanks for the tip.

LE: Fixed an bug with the reinitialization of dictionaryes.

LE2: latter tonight:
+fixed an bug with parsing integer values
+fixed an bug with parsing multiple rows