ReadChar(fH, Format) and ReadUTF8Character()
Posted: Thu Aug 14, 2014 7:38 pm
This uses Wilberts UTF8Size() routine.
...and allows a ReadChar() with format-dependant length:
Code: Select all
Procedure.i UTF8Size(FirstByte.a)
!movzx eax, byte [p.v_FirstByte]
!shr al, 3
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!lea rdx, [utf8size0]
!mov al, [rdx + rax]
CompilerElse
!mov al, [utf8size0 + eax]
CompilerEndIf
ProcedureReturn
!utf8size0:
!db 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
!db 0,0,0,0,0,0,0,0,2,2,2,2,3,3,4,0
EndProcedure
Procedure.i ReadUTF8Character(fPtr.i)
Protected.i utf8Character
Protected.i ByteCounter
If Eof(fPtr) ; Make sure we have something to read
ProcedureReturn 0
EndIf
; Read first byte
utf8Character = ReadByte(fPtr)
If utf8Character = 0 ; an error
ProcedureReturn 0
EndIf
ByteCounter = UTF8Size(utf8Character)
; Read the rest of the character
While ByteCounter > 1
If Eof(fPtr) ; verify another byte exists.
ProcedureReturn 0
EndIf
ByteCounter - 1
utf8Character << 8
utf8Character + ReadByte(fPtr)
Wend
ProcedureReturn utf8Character
EndProcedure
Code: Select all
Procedure ReadChar(fh.i, StrFrmt.i)
Select StrFrmt
Case #PB_Ascii, 0
ProcedureReturn ReadAsciiCharacter(fh)
Case #PB_Unicode
ProcedureReturn ReadUnicodeCharacter(fh)
Case #PB_UTF8
ProcedureReturn ReadUTF8Character(fh)
EndSelect
ProcedureReturn 0
EndProcedure