Convert a WhatsApp chat to an HTML file

Share your advanced PureBasic knowledge/code with the community.
Little John
Addict
Addict
Posts: 4791
Joined: Thu Jun 07, 2007 3:25 pm
Location: Berlin, Germany

Convert a WhatsApp chat to an HTML file

Post by Little John »

After I sent a WhatsApp chat by mail from my smartphone to my PC, I was surprised that it looked not nearly like the original one. This is mainly because text and pictures are located in different files, and the text is in a plain text file. Fortunately, the text file contains references to the pictures. So te following little program can convert the plain text file to an HTML file, which shows the chat in a way which is much closer to the original.

Have fun!

Code: Select all

; Version 0.75, 2014-03-12
; tested with PB 5.21 on Windows 7 and on Xubuntu 13.10
; Public domain -- use at your own risk.

; > After sending a WhatsApp chat by E-Mail to your PC, save
;   all files that are attached to the mail in one folder.
;   Then run this program, and process the text file in that folder.
;   The result will be a HTML file, which shows text and pictures
;   of the chat in the proper context and order.

; Note: Most smileys will be missing. This is because they are not
;       contained in the mail which is sent by WhatsApp. So it's
;       no fault of this program. :-)

EnableExplicit

#ProgTitle$ = "WhatsAppToHTML 0.75"


Macro NewElement (_ListName_, _Value_)
   AddElement(_ListName_) : _ListName_ = _Value_
EndMacro

Macro EscapeHTML (_text_)
   _text_ = ReplaceString(_text_, "&", "&")   ; replace this first!
   _text_ = ReplaceString(_text_, "<", "<")
   _text_ = ReplaceString(_text_, ">", ">")
EndMacro


CompilerSelect #PB_Compiler_OS
   CompilerCase #PB_OS_Linux
      #LANG_GERMAN = 7        ; primary language identifier used on Windows
CompilerEndSelect

Procedure.i GetUserLanguage()
   ; out: current primary language identifier (0 on error)
   
   CompilerSelect #PB_Compiler_OS
      CompilerCase #PB_OS_Windows
         ProcedureReturn GetUserDefaultLangID_() & $03FF
         
      CompilerCase #PB_OS_Linux
         Select Left(GetEnvironmentVariable("LANGUAGE"), 2)
            Case "de"
               ProcedureReturn #LANG_GERMAN
            Default
               ProcedureReturn 0             ; error
         EndSelect
   CompilerEndSelect
EndProcedure


; Indexes for the language array
Enumeration
   #Lng_Open_Text
   #Lng_Open_Pattern
   #Lng_Error
   #Lng_Err_TooMany
   #Lng_Err_CantOpen
   #Lng_Err_CantCreate
   #Lng_Terminated
EndEnumeration

Global Dim g_Lang$(#PB_Compiler_EnumerationValue-1)

Procedure LoadLanguage (lang.i)
   Select lang
      Case #LANG_GERMAN
         g_Lang$(#Lng_Open_Text)      = "Öffnen Sie eine Textdatei die einen exportierten WhatsApp-Chat enthält"
         g_Lang$(#Lng_Open_Pattern)   = "Text (*.txt)|*.txt|Alle Dateien (*.*)|*.*"
         g_Lang$(#Lng_Error)          = "Fehler:"
         g_Lang$(#Lng_Err_TooMany)    = "Nur null oder ein Program-Parameter erlaubt."
         g_Lang$(#Lng_Err_CantOpen)   = "Kann Datei nicht öffnen: '"
         g_Lang$(#Lng_Err_CantCreate) = "Kann Datei nicht erzeugen: '"
         g_Lang$(#Lng_Terminated)     = "Program beendet."
      Default                         ; -- Englisch:
         g_Lang$(#Lng_Open_Text)      = "Open a text file that contains an exported WhatsApp chat"
         g_Lang$(#Lng_Open_Pattern)   = "Text (*.txt)|*.txt|All files (*.*)|*.*"
         g_Lang$(#Lng_Error)          = "Error:"
         g_Lang$(#Lng_Err_TooMany)    = "Only zero or one program parameter allowed."
         g_Lang$(#Lng_Err_CantOpen)   = "Can't open file: '"
         g_Lang$(#Lng_Err_CantCreate) = "Can't create file: '"
         g_Lang$(#Lng_Terminated)     = "Program terminated."
   EndSelect
EndProcedure


Procedure WriteHTMLhead (ofn.i, htmlTitle$, HTMLformat.i, HTMLcharset$, marginLeft.i=100, marginRight.i=100)
   ; in: ofn         : number of output file
   ;     htmlTitle$  : title of created HTML file
   ;     HTMLformat  : format of the output file (e.g. #PB_UTF8)
   ;     HTMLcharset$: character set of the created HTML file (e.g. "UTF-8")
   ;     marginLeft  : linker  Rand (Pixel)
   ;     marginRight : rechter Rand (Pixel)
   
   WriteStringN(ofn, "<!DOCTYPE HTML PUBLIC " + #DQUOTE$ + 
                     "-//W3C//DTD HTML 4.01 Transitional//EN" + #DQUOTE$ + 
                     " " + #DQUOTE$ + "http://www.w3.org/TR/html4/loose.dtd" +
                     #DQUOTE$ + ">", HTMLformat)
   WriteStringN(ofn, "<!-- converted by " + #ProgTitle$ + " -->")
   WriteStringN(ofn, "<html>", HTMLformat)
   WriteStringN(ofn, "<head>", HTMLformat)
   WriteStringN(ofn, "<meta http-equiv=" + #DQUOTE$ + "content-type" + 
                     #DQUOTE$ + " content=" + #DQUOTE$ + "text/html; charset=" +
                     HTMLcharset$ + #DQUOTE$ + ">", HTMLformat)
   WriteStringN(ofn, "<title>" + htmlTitle$ + "</title>", HTMLformat)
   
   ; -- CSS
   WriteStringN(ofn, "<style type=" + #DQUOTE$ + "text/css" + #DQUOTE$ + ">", HTMLformat)
   WriteStringN(ofn, "body {font-family: Arial,Helvetica,sans-serif;", HTMLformat)
   WriteStringN(ofn, "      margin-left: " + Str(marginLeft) + "px; margin-right: " + 
                     Str(marginRight) + "px}", HTMLformat)
   WriteStringN(ofn, "p    {box-shadow: 0px 0px 5px #888}", HTMLformat)
   WriteStringN(ofn, "</style>", HTMLformat)
   
   WriteStringN(ofn, "</head>" + #CRLF$, HTMLformat)
EndProcedure


Procedure.s WriteMessage (ofn.i, List line$(), HTMLformat.i)
   ; -- write current message to the HTML file
   ; in: ofn       : number of output file
   ;     line$()   : message to write
   ;     HTMLformat: format of the output file (e.g. #PB_UTF8)
   Protected dash.i, colon.i, img.i, text$
   Protected NewList token$()
   
   ; first line
   FirstElement(line$())
   dash = FindString(Line$(), "-")
   colon = FindString(line$(), ":", dash)
   WriteStringN(ofn, "<p>" + Left(line$(), colon) + "<br>", HTMLformat)
   line$() = LTrim(Mid(line$(), colon+1))
   
   img = FindString(line$(), ".jpg")
   If img
      text$ = "<img src=" + #DQUOTE$ + Left(line$(), img) + "jpg" + #DQUOTE$ + " alt=" + #DQUOTE$ + #DQUOTE$ + ">"
      WriteStringN(ofn, text$ + "</p>", HTMLformat)
      
   Else   
      ; subsequent lines
      Repeat
         text$ = line$()
         EscapeHTML(text$)
         
         If NextElement(line$())
            WriteStringN(ofn, text$ + "<br>", HTMLformat)
         Else
            WriteStringN(ofn, text$ + "</p>", HTMLformat)
            Break
         EndIf  
      ForEver
   EndIf
EndProcedure


Procedure ConvertChat (infile$, chatDir$, HTMLformat, HTMLcharset$)
   ; -- convert one chat
   ; in: infile$     : file that contains the text of an exported WhatsApp chat (always UTF-8)
   ;     chatDir$    : directory that contains the exported WhatsApp chat
   ;     HTMLformat  : format of the output file (e.g. #PB_UTF8)
   ;     HTMLcharset$: character set of the created HTML file (e.g. "UTF-8")
   Protected.i ifn, ofn, dash, colon
   Protected outfile$, htmlTitle$, text$
   Protected NewList Line$()
   
   htmlTitle$ = GetFilePart(infile$, #PB_FileSystem_NoExtension)
   outfile$ = chatDir$ + htmlTitle$ + ".htm"
   
   ifn = ReadFile(#PB_Any, infile$)
   If ifn = 0
      MessageRequester(#ProgTitle$,
                       g_Lang$(#Lng_Error) + #LF$ +
                       g_Lang$(#Lng_Err_CantOpen) + infile$ + "'." + #LF$ +
                       g_Lang$(#Lng_Terminated))
      End
   EndIf
   
   ofn = CreateFile(#PB_Any, outfile$)
   If ofn = 0
      MessageRequester(#ProgTitle$,
                       g_Lang$(#Lng_Error) + #LF$ +
                       g_Lang$(#Lng_Err_CantCreate) + outfile$ + "'." + #LF$ +
                       g_Lang$(#Lng_Terminated))
      End
   EndIf
   
   WriteHTMLhead(ofn, htmlTitle$, HTMLformat, HTMLcharset$)
   WriteStringN(ofn, "<body>", HTMLformat)
   
   While Not Eof(ifn)
      text$ = ReadString(ifn, #PB_UTF8)
      colon = FindString(text$, ":")
      If colon
         dash = FindString(text$, "-", colon)
         If dash = colon + 4
            colon = FindString(text$, ":", dash)
            If colon                              ; begin of a new message
               If ListSize(line$())
                  WriteMessage(ofn, line$(), HTMLformat)
                  WriteStringN(ofn, "", HTMLformat)
                  ClearList(line$())
               EndIf
            EndIf
         EndIf
      EndIf
      NewElement(line$(), text$)
   Wend
   WriteMessage(ofn, line$(), HTMLformat)
   
   WriteStringN(ofn, "</body>", HTMLformat)
   WriteStringN(ofn, "</html>", HTMLformat)
   CloseFile(ifn)
   CloseFile(ofn)
EndProcedure


;===========================
;-- EXECUTION ENTRY POINT
;===========================

Define.i HTMLformat
Define HTMLcharset$, infile$, chatDir$

HTMLformat   = #PB_UTF8
HTMLcharset$ = "UTF-8"

LoadLanguage(GetUserLanguage())

Select CountProgramParameters()
   Case 1
      infile$ = ProgramParameter()
   Case 0
      infile$ = OpenFileRequester(#ProgTitle$ + " - " + g_Lang$(#Lng_Open_Text),
                                  "", g_Lang$(#Lng_Open_Pattern), 0)
      If infile$ = ""
         End
      EndIf
   Default
      MessageRequester(#ProgTitle$,
                       g_Lang$(#Lng_Error) + #LF$ +
                       g_Lang$(#Lng_Err_TooMany) + #LF$ +
                       g_Lang$(#Lng_Terminated))
      End
EndSelect

chatDir$ = GetPathPart(infile$)
ConvertChat(infile$, chatDir$, HTMLformat, HTMLcharset$)

; automatically open the desired folder
CompilerSelect #PB_Compiler_OS
   CompilerCase #PB_OS_Windows
      RunProgram(chatDir$)
   CompilerCase #PB_OS_Linux
      RunProgram("xdg-open", chatDir$, "")
CompilerEndSelect