Module FMT [String formatter]
Posted: Sun Aug 05, 2018 6:29 pm
I know there are already multiple solutions for formatting strings on this forum.
I wanted to explore a different way of passing arguments (casting everything to quad) allowing the format string to be closer to the C specification of the printf / sprintf / snprintf functions.
This module below is my take on it.
The majority of the code is normal PB code with a few C functions and some small asm parts.
Strings have to be passed by address and floating point values by the Flt() procedure.
Compared to the C specification for sprintf, there are a few differences.
- Types a and A (hexadecimal floating point) have not been implemented.
- The default integer length when no length is specified is 64 bits while in C it is 32 bit.
- Length modifier l ("el") always means 32 bit while in C, it means 32 bit for x86 and 64 for x64.
- ' (single quote), b and B are extensions to the original specification.
It's quite a bit of code and different options so if you find a bug, pleat let me know.
The module:
I wanted to explore a different way of passing arguments (casting everything to quad) allowing the format string to be closer to the C specification of the printf / sprintf / snprintf functions.
This module below is my take on it.
The majority of the code is normal PB code with a few C functions and some small asm parts.
Strings have to be passed by address and floating point values by the Flt() procedure.
Compared to the C specification for sprintf, there are a few differences.
- Types a and A (hexadecimal floating point) have not been implemented.
- The default integer length when no length is specified is 64 bits while in C it is 32 bit.
- Length modifier l ("el") always means 32 bit while in C, it means 32 bit for x86 and 64 for x64.
- ' (single quote), b and B are extensions to the original specification.
It's quite a bit of code and different options so if you find a bug, pleat let me know.
The module:
Code: Select all
; =============================================================================
; Module name : FMT
; Author : Wilbert
; Last updated : Aug 5, 2018
; Forum link : https://www.purebasic.fr/english/viewtopic.php?f=12&t=71171
; =============================================================================
; Public procedures
; =============================================================================
; Flt(f.d)
; Format(*fmt, a0.q=0, a1.q=0, a2.q=0, a3.q=0, a4.q=0,
; a5.q=0, a6.q=0, a7.q=0, a8.q=0, a9.q=0)
; SetDecimalSeparator(SeparatorChar.a='.')
; Format (*fmt)
; =============================================================================
; %[parameter][flags][width][.precision][length]type
; Parameter field
; =============================================================================
; n$ n is the number of the parameter to use (starting with 1).
; Flags field
; =============================================================================
; - Left-align the output.
; + Prepend a plus for positive signed numeric types.
; space Prepend a space for positive signed numeric types.
; 0 Pad with '0' character (only when left-align flag is not used).
; # Alternate form:
; For b,B,o,x,X, prefix 0b,0B,0,0x,0X is used for non-zero numbers.
; For e,E,f,F,g,G, the output will always contain a decimal separator.
; For g,G, trailing zeros are not removed.
; 'c Character c is used for padding (numeric characters are not allowed).
; Width field
; =============================================================================
; Width specifies the minimum number of characters to output.
; If the output contains less characters, it will be padded.
; An asterix (*) may be used to supply the width as an argument.
; Precision field
; =============================================================================
; For b,B,d,i,o,u,x,X, the minimum number of digits to output.
; For e,E,f,F, the number of digits to output after the decimal separator.
; For g,G, the maximum number of significant digits to output.
; For s, the maximum number of characters from the string to output.
; An asterix (*) may be used to supply the precision as an argument.
; Length field (for integer types)
; =============================================================================
; hh The supplied argument should be treated as an 8 bit value.
; h The supplied argument should be treated as a 16 bit value.
; l The supplied argument should be treated as a 32 bit value.
; ll The supplied argument should be treated as a 64 bit value.
; Type field
; =============================================================================
; % Outputs a % character.
; d,i Signed decimal integer.
; u Unsigned decimal integer.
; b,B Unsigned binary integer.
; o Unsigned octal integer.
; x,X Unsigned hexadecimal integer.
; f,F Floating point in normal notation (use Flt).
; e,E Floating point in exponential notation (use Flt).
; g,G Most appropriate of normal or exponential notation (use Flt).
; c Character.
; s String (the string needs to be passed by address).
; p Pointer address.
; n No output. A pointer to a variable of type Long needs to be supplied.
; The number of characters outputted so far will be set.
; Example
; =============================================================================
; UseModule FMT
; SetDecimalSeparator(',')
; Debug Format(@"%1$s %2$s. %1$s isn't it ? %3$c", @"Small", @"example", $1F600)
; Debug Format(@"The square root of %i is %f.", 12, Flt(Sqr(12)) )
DeclareModule FMT
Declare.q Flt(f.d)
Declare.s Format(*fmt, a0.q=0, a1.q=0, a2.q=0, a3.q=0, a4.q=0, a5.q=0, a6.q=0, a7.q=0, a8.q=0, a9.q=0)
Declare SetDecimalSeparator(SeparatorChar.a='.')
EndDeclareModule
Module FMT
EnableExplicit
DisableDebugger
EnableASM
;-- Import OS functions
CompilerIf #PB_Compiler_OS = #PB_OS_Linux
Macro snprintf_name : "snprintf" : EndMacro
CompilerElse
CompilerIf #PB_Compiler_OS = #PB_OS_Windows And #PB_Compiler_Processor = #PB_Processor_x86
Macro snprintf_name : "__snprintf" : EndMacro
CompilerElse
Macro snprintf_name : "_snprintf" : EndMacro
CompilerEndIf
CompilerEndIf
ImportC ""
free(*ptr)
malloc(size)
memcpy(*destination, *source, num)
realloc(*ptr, size)
snprintf_d(*s, n, *fmt, precision, d.d) As snprintf_name
snprintf_q(*s, n, *fmt, precision, q.q) As snprintf_name
EndImport
;-- Constants
#_CS = SizeOf(Character)
#_NoCopy = -1
#_MemCopy = 1
#_AsciiCopy = 2
;-- Structures
Structure _args
d.d[0] : q.q[0]
EndStructure
Structure _in
c.c : nextc.c
EndStructure
Structure _buffer
*cur.Character : *mem : *end
EndStructure
Structure _fmt
l0.l : l1.l : null.a
EndStructure
;-- Macros for x86 ASM
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
Macro rax : eax : EndMacro
Macro rbx : ebx : EndMacro
Macro rdx : edx : EndMacro
CompilerEndIf
;-- Global variables
Global _sep.a = '.'; default decimal separator character
;-- Procedures
Procedure SetDecimalSeparator(SeparatorChar.a='.')
If SeparatorChar : _sep.a = SeparatorChar : EndIf
EndProcedure
Procedure.q Flt(f.d)
; Represent double as quad
CompilerIf #PB_Compiler_Processor = #PB_Processor_x64
!mov rax, [p.v_f]
CompilerElse
!mov eax, [p.v_f]
!mov edx, [p.v_f+4]
CompilerEndIf
ProcedureReturn
EndProcedure
Procedure.s Format(*fmt, a0.q=0, a1.q=0, a2.q=0, a3.q=0, a4.q=0, a5.q=0, a6.q=0, a7.q=0, a8.q=0, a9.q=0)
Protected result.s, sep.a=_sep, b._buffer, required.l, *tmp.Long, *a._args, arg_idx.l
Protected *in._in=*fmt, *out.Ascii, *conv.Long, _fmt._fmt, copymode.l, cnt.l, q.q, n.l
Protected leftalign.l, alt.l, pad.c, sgn.a, width.l, prec.l, nbits.l
; Allocate space for output buffer and arguments
b\mem = malloc(8192)
If b\mem
b\cur = b\mem : b\end = b\mem + 8192 : *a = malloc(1024)
If *a
*conv = *a + 192
Else
free(b\mem) : ProcedureReturn ""
EndIf
Else
ProcedureReturn ""
EndIf
; Copy arguments
*a\q[0]=a0 : *a\q[1]=a1 : *a\q[2]=a2 : *a\q[3]=a3
*a\q[4]=a4 : *a\q[5]=a5 : *a\q[6]=a6 : *a\q[7]=a7
*a\q[8]=a8 : *a\q[9]=a9 : *a\q[10]=0
; Main loop
While *in\c
If *in\c = '%'
*in + #_CS
*out = *conv : cnt = 0 : copymode = 0
leftalign = #False : alt = #False : pad = ' '
sgn = 0 : width = -1 : prec = -1 : nbits = 64
Repeat
Select *in\c
; scan number and check for $ at end
Case '1' To '9'
n = *in\c-'0'
While *in\nextc <= '9' And *in\nextc >= '0'
*in + #_CS : n = n*10 + *in\c-'0'
Wend
If prec = 0
prec = n
ElseIf width < 0
If *in\nextc = '$'
*in + #_CS: arg_idx = n-1
If arg_idx < 0 : arg_idx = 0
ElseIf arg_idx > 9 : arg_idx = 10
EndIf
Else
width = n
EndIf
EndIf
; (user defined) precision
Case '.'
If *in\nextc = '*'
q = *a\q[arg_idx] : *in + #_CS
If q > 0 : prec = q : Else : prec = 0 : EndIf
If arg_idx < 10 : arg_idx + 1 : EndIf
ElseIf prec < 0
prec = 0
EndIf
; user defined width
Case '*'
If width < 0
width = *a\q[arg_idx] : *in + #_CS
If width < 0 : width = -width : leftalign = #True : EndIf
If arg_idx < 10 : arg_idx + 1 : EndIf
EndIf
; alternative form
Case '#'
alt = #True
; left alignment within given field width
Case '-'
leftalign = #True
; sign
Case ' '
If sgn = 0 : sgn = ' ' : EndIf
Case '+'
sgn = '+'
; padding
Case '0'
pad = '0'
Case 39; '
If *in\nextc
*in + #_CS
If *in\c < '0' Or *in\ c> '9' : pad = *in\c : EndIf
EndIf
; length
Case 'h'
If nbits = 16 : nbits = 8 : Else : nbits = 16 : EndIf
Case 'l'
If nbits = 32 : nbits = 64 : Else : nbits = 32 : EndIf
; float
Case 'f','F','e','E','g','G'
copymode = #_AsciiCopy
q = *a\q[arg_idx]
If q & $7ff0000000000000 = $7ff0000000000000
; handle inf and nan
If q & $fffffffffffff
*conv\l = $6e616e: sgn = 0; nan
Else
If q<0 : sgn = '-' : EndIf
*conv\l = $666e69; inf
EndIf
If *in\c <= 'G': *conv\l ! $202020 : EndIf; convert case
If sgn
*conv\l = *conv\l << 8 | sgn : cnt = 4 * #_CS; [sgn]inf
Else
cnt = 3 * #_CS
EndIf
Else
_fmt\l0 = $2e303025; %00.
If alt : _fmt\l0 ! $130000 : EndIf
If sgn : _fmt\l0 ! $3000 ! (sgn << 8) : EndIf
_fmt\l1 = $2a | (*in\c << 8)
If *in\c = 'F': _fmt\l1 | $2000 : EndIf
If prec < 0 : prec = 6 : ElseIf prec > 512 : prec = 512 : EndIf
cnt = snprintf_d(*conv, 831, @_fmt, prec, *a\d[arg_idx]) * #_CS
; [ASM Replace decimal separator]
mov rdx, [p.p_conv]
!mov ecx, [p.v_cnt]
!.float_l0:
movzx eax, byte [rdx]
!and eax, 0xfd
!cmp eax, 0x2c
!je .float_l1
add rdx, 1
!sub ecx, 1
!jnz .float_l0
!jmp .float_l2
!.float_l1:
!movzx eax, byte [p.v_sep]
mov [rdx], al
!.float_l2:
; [/ASM]
EndIf
; (un)signed integer
Case 'd','i','u','o','x','X'
copymode = #_AsciiCopy
q = *a\q[arg_idx]
_fmt\l0 = $2a2e3025; %0.*
If *in\c = 'd' Or *in\c = 'i'
If nbits < 64: q = q << (64-nbits) >> (64-nbits) : EndIf
If sgn : _fmt\l0 ! $3000 ! (sgn << 8) : EndIf
Else
If nbits < 64: q = q & (1 << nbits-1) : EndIf
If alt : _fmt\l0 ! $1300 : EndIf
EndIf
If nbits = 64
CompilerIf #PB_Compiler_OS = #PB_OS_Windows
_fmt\l1 = $343649 | *in\c << 24; I64
CompilerElse
_fmt\l1 = $6c6c | *in\c << 16; ll
CompilerEndIf
Else
_fmt\l1 = *in\c
EndIf
If prec < 0 : prec = 1 : Else
If prec > 512 : prec = 512 : EndIf
; no '0' padding allowed if precision was set
If pad = '0' : pad = ' ' : EndIf
EndIf
cnt = snprintf_q(*conv, 831, @_fmt, prec, q) * #_CS
; unsigned binary integer
Case 'b','B'
copymode = #_AsciiCopy
q = *a\q[arg_idx]
If nbits < 64 : q = q & (1 << nbits-1) : EndIf
If prec < 0 : prec = 1 : Else
If prec > 512 : prec = 512 : EndIf
; no '0' padding allowed if precision was set
If pad = '0' : pad = ' ': EndIf
EndIf
If q
*out = *conv + 513 : *out\a = (q & 1) | $30
q & $7fffffffffffffff
While q
*out - 1 : *out\a = (q & 1) | $30 : q >> 1
Wend
cnt = *conv + 513 - *out
If cnt < prec
*out + cnt - prec
FillMemory(*out, prec - cnt, $30) : cnt = prec
EndIf
If alt
*out - 1 : *out\a = *in\c
*out - 1 : *out\a = $30 : cnt + 2
EndIf
Else
FillMemory(*conv, prec, $30) : cnt = prec
EndIf
cnt * #_CS
; string
Case 's'
copymode = #_MemCopy
*out = *a\q[arg_idx]
If *out And prec
; [ASM String length with prec as max length]
!xor edx, edx
!mov ecx, [p.v_prec]
mov rax, [p.p_out]
!.strnlen_l0:
CompilerIf #_CS = 2
cmp [rax], dx
!je .strnlen_l1
add rax, 2
CompilerElse
cmp [rax], dl
!je .strnlen_l1
add rax, 1
CompilerEndIf
!dec ecx
!jnz .strnlen_l0
!.strnlen_l1:
sub rax, [p.p_out]
!mov [p.v_cnt], eax
; [/ASM]
EndIf
; character
Case 'c'
copymode = #_MemCopy
n = *a\q[arg_idx]
CompilerIf #_CS = 2
n - $10000
If n & -$10000 = 0
; surrogate pair
*conv\l = $dc00d800 | (n << 16 & $3ff0000) | (n >> 10 & $7ff)
cnt = 4
Else
*conv\l = n : cnt = 2
EndIf
*out = *conv
CompilerElse
If n > 255 : *conv\l = '?'
Else : *conv\l = n
EndIf
cnt = 1
CompilerEndIf
; pointer
Case 'p'
copymode = #_AsciiCopy
_fmt\l0 = $2a2e2325
CompilerIf #PB_Compiler_Processor = #PB_Processor_x86
_fmt\l1 = $78 : prec = 8
CompilerElse
CompilerIf #PB_Compiler_OS = #PB_OS_Windows
_fmt\l1 = $78343649 : prec = 16
CompilerElse
_fmt\l1 = $786c6c : prec = 16
CompilerEndIf
CompilerEndIf
cnt = snprintf_q(*conv, 831, @_fmt, prec, *a\q[arg_idx]) * #_CS
; %
Case '%'
copymode = #_AsciiCopy
*conv\l = '%' : cnt = #_CS : arg_idx-1
; return number of characters written so far
Case 'n'
copymode = #_NoCopy
*tmp = *a\q[arg_idx]
If *tmp
CompilerIf #_CS = 2
*tmp\l = (b\cur - b\mem) >> 1
CompilerElse
*tmp\l = (b\cur - b\mem)
CompilerEndIf
EndIf
; unrecognized character
Default
arg_idx - 1
Break
EndSelect
*in + #_CS
Until copymode
If arg_idx < 10 : arg_idx + 1 : EndIf
Else
*out = *in
; [ASM Scan for % or end of string]
mov rdx, [p.p_in]
CompilerIf #_CS=2
sub rdx, 2
!.scan_l0:
add rdx, 2
movzx eax, word [rdx]
CompilerElse
sub rdx, 1
!.scan_l0:
add rdx, 1
movzx eax, byte [rdx]
CompilerEndIf
!cmp eax, '%'
!je .scan_l1
!cmp eax, 0
!jne .scan_l0
!.scan_l1:
mov [p.p_in], rdx
; [/ASM]
width = 0 : cnt = *in - *out
copymode = #_MemCopy
EndIf
If copymode > 0
; Verify free buffer space
width = width * #_CS - cnt
If width < 0 : width = 0 : EndIf
required = cnt + width + #_CS
If b\end - b\cur < required
n = b\end - b\mem
required + b\cur - b\mem
While n < required : n << 1 : Wend
*tmp = realloc(b\mem, n)
If *tmp
b\cur + *tmp - b\mem
b\mem = *tmp
b\end = *tmp + n
Else
Break
EndIf
EndIf
; Copy to buffer
If width And leftalign = #False
CompilerIf #_CS = 2
FillMemory(b\cur, width, pad, #PB_Word)
CompilerElse
FillMemory(b\cur, width, pad)
CompilerEndIf
b\cur + width
EndIf
If cnt
If copymode = #_MemCopy Or #_CS = 1
memcpy(b\cur, *out, cnt)
Else
*tmp = b\cur
; [ASM Copy ascii to unicode]
!mov ecx, [p.v_cnt]
!sub ecx, 2
!js .atou_l1
mov rdx, [p.p_tmp]
mov rax, [p.p_out]
push rbx
!.atou_l0:
movzx ebx, byte [rax]
mov [rdx], bx
add rax, 1
add rdx, 2
!sub ecx, 2
!jnc .atou_l0
!.atou_l1:
pop rbx
; [/ASM]
EndIf
b\cur + cnt
EndIf
If width And leftalign
; no '0' padding allowed when left aligned
If pad = '0' : pad = ' ' : EndIf
CompilerIf #_CS = 2
FillMemory(b\cur, width, pad, #PB_Word)
CompilerElse
FillMemory(b\cur, width, pad)
CompilerEndIf
b\cur + width
EndIf
EndIf
Wend
; Copy buffer to result
b\cur\c = 0
result = PeekS(b\mem)
free(b\mem) : free(*a)
ProcedureReturn result
EndProcedure
EndModule