Detect textfile encoding (UTF1,7,16,32,ANSI,EBCDIC etc)
Posted: Fri Sep 18, 2015 4:53 am
Code: Select all
EnableExplicit
;Im not sure what the values of these should be, if anything other than unique
#PB_UTF1 = 101
#PB_SCSU = 102
#PB_UTF_EBCDIC = 103
#PB_BOCU_1 = 104
#PB_UTF7 = 107
#PB_GB_18030 = 108
Procedure.i GetFileEncoding(sFilename.s)
;BOM source: https://en.wikipedia.org/wiki/Byte_order_mark
Protected lMagic.l, iLen.i, bytesread.i
Protected hFile.i = ReadFile(#PB_Any, sFilename, #PB_File_SharedRead)
If hFile = 0: ProcedureReturn #PB_Any: EndIf
iLen = Lof(hFile)
If iLen < 2: CloseFile(hFile): ProcedureReturn #PB_Ascii: EndIf
If iLen > 4: iLen = 4: EndIf
bytesread = ReadData(hFile, @lMagic, iLen)
CloseFile(hFile)
If bytesread
Select lMagic ;Check 4 byte magic
Case $0000FEFF: ProcedureReturn #PB_UTF32
Case $FFFE0000: ProcedureReturn #PB_UTF32BE
Case $736673DD: ProcedureReturn #PB_UTF_EBCDIC
Case $33953184: ProcedureReturn #PB_GB_18030
Default: ;Check 3 byte magic
lMagic = lMagic & %111111111111111111111111
Select lMagic
Case $BFBBEF: ProcedureReturn #PB_UTF8
Case $762F2B: ProcedureReturn #PB_UTF7
Case $4C64F7: ProcedureReturn #PB_UTF1
Case $FFFE0E: ProcedureReturn #PB_SCSU
Case $28EEFB: ProcedureReturn #PB_BOCU_1
Default: ;Check 2 byte magic
lMagic = lMagic & %1111111111111111
Select lMagic
Case $FEFF: ProcedureReturn #PB_UTF16
Case $FFFE: ProcedureReturn #PB_UTF16BE
EndSelect
EndSelect
EndSelect
ProcedureReturn #PB_Ascii
EndIf
ProcedureReturn #PB_Any
EndProcedure
Procedure.s EncodingName(encId.i)
Select encId
Case #PB_Ascii: ProcedureReturn "ASCII"
Case #PB_UTF1: ProcedureReturn "UTF-1"
Case #PB_UTF7: ProcedureReturn "UTF-7"
Case #PB_UTF8: ProcedureReturn "UTF-8"
Case #PB_UTF16: ProcedureReturn "UTF-16LE"
Case #PB_UTF16BE: ProcedureReturn "UTF-16BE"
Case #PB_UTF32: ProcedureReturn "UTF-32LE"
Case #PB_UTF32BE: ProcedureReturn "UTF-32BE"
Case #PB_UTF_EBCDIC: ProcedureReturn "UTF-EBCDIC"
Case #PB_BOCU_1: ProcedureReturn "BOCU-1"
Case #PB_GB_18030: ProcedureReturn "GB-18030"
Case #PB_SCSU: ProcedureReturn "SCSU"
Default: ProcedureReturn "Error" ;File is inaccessible
EndSelect
EndProcedure
MessageRequester("Get Encoding",EncodingName(GetFileEncoding("c:\temp\encodings\utf32be.txt")))