Outlook MSG to Text decoder
Posted: Fri Aug 08, 2025 7:33 am
I tried it out just for fun and now I'm deeply shocked. ChatGPT 5 can program Purebasic and created an Outlook Msg to Text converter for me.
The whole thing worked out of the box. That's crazy.
The whole thing worked out of the box. That's crazy.


Code: Select all
; ================== MSG -> Header + Body ==================
; initiated-by: Dige
; implemented-by: ChatGPT
EnableExplicit
; --- STGM ---
#STGM_READ = $00000000
#STGM_SHARE_DENY_WRITE = $00000020
#STGM_SHARE_EXCLUSIVE = $00000010
; --- MSG-Streams (Body) ---
#STREAM_BODY_UNICODE = "__substg1.0_1000001F"
#STREAM_BODY_ANSI = "__substg1.0_1000001E"
#STREAM_HTML_UNICODE = "__substg1.0_1013001F"
#STREAM_HTML_ANSI = "__substg1.0_1013001E"
#STREAM_RTF_COMP = "__substg1.0_10090102"
; --- MSG-Streams (Header/Felder) ---
#STREAM_HEADERS_UNICODE = "__substg1.0_007D001F" ; PR_TRANSPORT_MESSAGE_HEADERS
#STREAM_HEADERS_ANSI = "__substg1.0_007D001E"
#STREAM_SUBJECT_UNICODE = "__substg1.0_0037001F" ; PR_SUBJECT
#STREAM_SUBJECT_ANSI = "__substg1.0_0037001E"
#STREAM_DISPLAY_TO_U = "__substg1.0_0E04001F" ; PR_DISPLAY_TO
#STREAM_DISPLAY_TO_A = "__substg1.0_0E04001E"
#STREAM_SENDER_NAME_U = "__substg1.0_0C1A001F" ; PR_SENDER_NAME
#STREAM_SENDER_NAME_A = "__substg1.0_0C1A001E"
#STREAM_SENDER_ADDR_U = "__substg1.0_0C1F001F" ; PR_SENDER_EMAIL_ADDRESS
#STREAM_SENDER_ADDR_A = "__substg1.0_0C1F001E"
#STREAM_SENTREP_ADDR_U = "__substg1.0_0065001F" ; PR_SENT_REPRESENTING_EMAIL_ADDRESS
#STREAM_SENTREP_ADDR_A = "__substg1.0_0065001E"
#STREAM_CLIENT_SUBMIT = "__substg1.0_00390040" ; PR_CLIENT_SUBMIT_TIME (FILETIME)
#STREAM_DELIVERY_TIME = "__substg1.0_0E060040" ; PR_MESSAGE_DELIVERY_TIME (FILETIME)
; --- COM-Interfaces (Minimal) ---
ImportC "ole32.lib"
CoInitializeEx(*pvReserved, dwCoInit.l)
CoUninitialize()
StgOpenStorage(*pwcsName, pStgPriority, grfMode.l, pSnB, reserved.l, *ppstg.IStorage)
EndImport
; ---------- Helpers ----------
Procedure.i WideAlloc(str.s)
Protected *p = AllocateMemory((Len(str)+1)*SizeOf(Character))
If *p : PokeS(*p, str, -1, #PB_Unicode) : EndIf
ProcedureReturn *p
EndProcedure
Procedure.s ReadAllFromIStream_Text(*stm.IStream, isUnicode.i, isAnsi.i)
Protected chunk = 8192, *buf = AllocateMemory(chunk)
If *buf = 0 : ProcedureReturn "" : EndIf
Protected *mem = #Null, total = 0, bytesRead.l, ok = #True
While ok
bytesRead = 0
If *stm\Read(*buf, chunk, @bytesRead) <> 0 : ok = #False : Break : EndIf
If bytesRead = 0 : Break : EndIf
If *mem = #Null
*mem = AllocateMemory(bytesRead) : If *mem = 0 : ok = #False : Break : EndIf
CopyMemory(*buf, *mem, bytesRead) : total = bytesRead
Else
Protected *new = ReAllocateMemory(*mem, total + bytesRead)
If *new = 0 : ok = #False : Break : EndIf
*mem = *new : CopyMemory(*buf, *mem + total, bytesRead) : total + bytesRead
EndIf
If bytesRead < chunk : Break : EndIf
Wend
FreeMemory(*buf)
If ok = #False Or total = 0
If *mem : FreeMemory(*mem) : EndIf
ProcedureReturn ""
EndIf
Protected out.s
If isUnicode
out = PeekS(*mem, total/SizeOf(Character), #PB_Unicode)
ElseIf isAnsi
out = PeekS(*mem, total, #PB_Ascii)
Else
out = ""
EndIf
FreeMemory(*mem)
ProcedureReturn out
EndProcedure
Procedure.i ReadAllFromIStream_Binary(*stm.IStream, *outSize.Integer)
Protected chunk = 4096, *buf = AllocateMemory(chunk)
If *buf = 0 : ProcedureReturn #Null : EndIf
Protected *mem = #Null, total = 0, bytesRead.l, ok = #True
While ok
bytesRead = 0
If *stm\Read(*buf, chunk, @bytesRead) <> 0 : ok = #False : Break : EndIf
If bytesRead = 0 : Break : EndIf
If *mem = #Null
*mem = AllocateMemory(bytesRead) : If *mem = 0 : ok = #False : Break : EndIf
CopyMemory(*buf, *mem, bytesRead) : total = bytesRead
Else
Protected *new = ReAllocateMemory(*mem, total + bytesRead)
If *new = 0 : ok = #False : Break : EndIf
*mem = *new : CopyMemory(*buf, *mem + total, bytesRead) : total + bytesRead
EndIf
If bytesRead < chunk : Break : EndIf
Wend
FreeMemory(*buf)
If ok = #False Or total = 0
If *mem : FreeMemory(*mem) : EndIf
ProcedureReturn #Null
EndIf
*outSize\i = total
ProcedureReturn *mem
EndProcedure
Procedure.s TryOpenStreamText(*stg.IStorage, name.s, isUnicode.i, isAnsi.i)
Protected *stm.IStream, txt.s, *w = WideAlloc(name)
If *w = 0 : ProcedureReturn "" : EndIf
If *stg\OpenStream(*w, 0, #STGM_READ|#STGM_SHARE_EXCLUSIVE, 0, @*stm) = 0 And *stm
txt = ReadAllFromIStream_Text(*stm, isUnicode, isAnsi)
*stm\Release()
EndIf
FreeMemory(*w)
ProcedureReturn txt
EndProcedure
Procedure.i TryOpenStreamBinary(*stg.IStorage, name.s, *outSize.Integer)
Protected *stm.IStream, *data = #Null, *w = WideAlloc(name)
If *w = 0 : ProcedureReturn #Null : EndIf
If *stg\OpenStream(*w, 0, #STGM_READ|#STGM_SHARE_EXCLUSIVE, 0, @*stm) = 0 And *stm
*data = ReadAllFromIStream_Binary(*stm, *outSize)
*stm\Release()
EndIf
FreeMemory(*w)
ProcedureReturn *data
EndProcedure
Procedure.s StripHtml(html.s)
Protected out.s = html, s, e, pos
out = ReplaceString(out, Chr(13), " ")
out = ReplaceString(out, Chr(10), " ")
While FindString(LCase(out), "<script", 1)
s = FindString(LCase(out), "<script", 1)
e = FindString(LCase(out), "</script>", s+1) : If e=0:Break:EndIf
out = Left(out, s-1) + Mid(out, e+9)
Wend
While FindString(LCase(out), "<style", 1)
s = FindString(LCase(out), "<style", 1)
e = FindString(LCase(out), "</style>", s+1) : If e=0:Break:EndIf
out = Left(out, s-1) + Mid(out, e+8)
Wend
pos = 1
While pos
pos = FindString(out, "<", 1)
If pos
e = FindString(out, ">", pos+1) : If e=0:Break:EndIf
out = Left(out, pos-1) + Mid(out, e+1)
EndIf
Wend
out = ReplaceString(out, " ", " ")
out = ReplaceString(out, "&", "&")
out = ReplaceString(out, "<", "<")
out = ReplaceString(out, ">", ">")
ProcedureReturn Trim(out)
EndProcedure
; ---- RFC822 Headerblock parsen (gefaltete Zeilen unterstützt) ----
Procedure.s GetHeaderValue(headers.s, key.s)
Protected lines.s = headers, line.s, i, n, k.s = LCase(key + ":")
Protected result.s
n = CountString(lines, #CRLF$) + 1
Dim arr.s(n-1)
For i=0 To n-1
arr(i) = StringField(lines, i+1, #CRLF$)
Next
For i=0 To n-1
line = arr(i)
If LCase(Left(Trim(line), Len(k))) = k
result = Mid(line, Len(key)+2)
; Fortsetzungzeilen aufsammeln (beginnen mit Space/Tab)
While i+1 <= n-1 And (Left(arr(i+1),1) = " " Or Left(arr(i+1),1) = #TAB$)
result + " " + Trim(arr(i+1))
i + 1
Wend
ProcedureReturn Trim(result)
EndIf
Next
ProcedureReturn ""
EndProcedure
; ---- FILETIME (8 Bytes) -> ISO-String ----
Procedure.s FileTimeToIso8601(*bin, size.i)
If size < 8 : ProcedureReturn "" : EndIf
Protected.q ft = PeekQ(*bin)
If ft = 0 : ProcedureReturn "" : EndIf
Protected.d secs = ft / 10000000.0
Protected.d offset = 11644473600.0 ; 1601->1970
Protected.i unix = Int(secs - offset)
If unix <= 0 : ProcedureReturn "" : EndIf
ProcedureReturn FormatDate("%yyyy-%mm-%dd %hh:%ii:%ss", unix)
EndProcedure
; ------------------------- API -------------------------
; ExtractMsgWithHeader(path$, stripHtmlIfNeeded=#True)
; Rückgabe:
; From: ...
; To: ...
; Subject: ...
; Date: ...
;
; <Body>
Procedure.s ExtractMsgWithHeader(path.s, stripHtmlIfNeeded.i = #True)
Protected body.s, html.s, headersRaw.s
Protected from.s, rcpt.s, subj.s, dateHdr.s, dateProp.s, hDate.s
Protected *stg.IStorage
If path = "" Or FileSize(path) <= 0 : ProcedureReturn "" : EndIf
CoInitializeEx(#Null, 0)
Protected *wPath = WideAlloc(path) : If *wPath = 0 : CoUninitialize() : ProcedureReturn "" : EndIf
If StgOpenStorage(*wPath, #Null, #STGM_READ|#STGM_SHARE_DENY_WRITE, #Null, 0, @*stg) <> 0 Or *stg = 0
FreeMemory(*wPath) : CoUninitialize() : ProcedureReturn ""
EndIf
FreeMemory(*wPath)
; ---- Body: Plain > HTML > RTF-Hinweis
body = TryOpenStreamText(*stg, #STREAM_BODY_UNICODE, #True, #False)
If body = "" : body = TryOpenStreamText(*stg, #STREAM_BODY_ANSI, #False, #True) : EndIf
If body = ""
html = TryOpenStreamText(*stg, #STREAM_HTML_UNICODE, #True, #False)
If html = "" : html = TryOpenStreamText(*stg, #STREAM_HTML_ANSI, #False, #True) : EndIf
If html <> ""
If stripHtmlIfNeeded
body = StripHtml(html)
Else
body = html
EndIf
EndIf
EndIf
If body = ""
If TryOpenStreamText(*stg, #STREAM_RTF_COMP, #False, #False) <> ""
body = "[Hinweis] Nur RTF-komprimierter Body gefunden (10090102). RTF-Dekompression nötig."
EndIf
EndIf
; ---- Headerblock
headersRaw = TryOpenStreamText(*stg, #STREAM_HEADERS_UNICODE, #True, #False)
If headersRaw = "" : headersRaw = TryOpenStreamText(*stg, #STREAM_HEADERS_ANSI, #False, #True) : EndIf
If headersRaw <> ""
from = GetHeaderValue(headersRaw, "From")
rcpt = GetHeaderValue(headersRaw, "To")
subj = GetHeaderValue(headersRaw, "Subject")
dateHdr = GetHeaderValue(headersRaw, "Date")
EndIf
; ---- Fallbacks
If subj = "" : subj = TryOpenStreamText(*stg, #STREAM_SUBJECT_UNICODE, #True, #False) : EndIf
If subj = "" : subj = TryOpenStreamText(*stg, #STREAM_SUBJECT_ANSI, #False, #True) : EndIf
If rcpt = "" : rcpt = TryOpenStreamText(*stg, #STREAM_DISPLAY_TO_U, #True, #False) : EndIf
If rcpt = "" : rcpt = TryOpenStreamText(*stg, #STREAM_DISPLAY_TO_A, #False, #True) : EndIf
If from = ""
from = TryOpenStreamText(*stg, #STREAM_SENDER_ADDR_U, #True, #False)
If from = "" : from = TryOpenStreamText(*stg, #STREAM_SENDER_ADDR_A, #False, #True) : EndIf
If from = ""
from = TryOpenStreamText(*stg, #STREAM_SENDER_NAME_U, #True, #False)
If from = "" : from = TryOpenStreamText(*stg, #STREAM_SENDER_NAME_A, #False, #True) : EndIf
EndIf
If from = ""
from = TryOpenStreamText(*stg, #STREAM_SENTREP_ADDR_U, #True, #False)
If from = "" : from = TryOpenStreamText(*stg, #STREAM_SENTREP_ADDR_A, #False, #True) : EndIf
EndIf
EndIf
If dateHdr = ""
Protected sz.Integer, *bin
*bin = TryOpenStreamBinary(*stg, #STREAM_CLIENT_SUBMIT, @sz)
If *bin
dateProp = FileTimeToIso8601(*bin, sz\i) : FreeMemory(*bin)
Else
*bin = TryOpenStreamBinary(*stg, #STREAM_DELIVERY_TIME, @sz)
If *bin
dateProp = FileTimeToIso8601(*bin, sz\i) : FreeMemory(*bin)
EndIf
EndIf
EndIf
*stg\Release()
CoUninitialize()
; ---- Date-Zeile ohne IIf
If dateHdr <> ""
hDate = "Date: " + dateHdr
Else
hDate = "Date: " + dateProp
EndIf
ProcedureReturn Trim("From: " + from + #CRLF$ +
"To: " + rcpt + #CRLF$ +
"Subject: " + subj + #CRLF$ +
hDate + #CRLF$ + #CRLF$ +
body)
EndProcedure
; -------------------- Beispiel --------------------
Debug ExtractMsgWithHeader("P:\outlook.msg")