The smaller the number the smaller it's size.
This is commonly known as VLI (variable length integer)
Here i made a variation of that idea i call LDVLI (Length Described Variable Length Integer).
The difference to VLI is that it stores the length in the least significant bits.
This has advantages and disadvantages. I use it on my own archive file format to save a few bytes.
Advantages compared to VLI:
- In theory faster, because you know the size from the start of decoding. However my code is not well optimized, feel free to improve it.
- More secure because you can easily get the size and check for buffer/file sizes before decoding. This makes it well suited for untrusted or potentially corrupted data. The code does not do it for you automatically, so it's only more secure if you do the checks.
- Smaller size on big numbers. VLI needs a whole additional byte to store a 61bit number, so 9 byte. While LDVLI does it in 8 byte.
- Can only store up to 61bit large integers. Which is plenty for many cases. However VLI can theoretically store unlimited size integers.
- Bigger size on small numbers because LDVLI always uses 3 bit for the size descriptor, so can only store 5 bit integer in the first byte, while VLI can store 7 bit in the first byte.
Code: Select all
;/------------------------------------------\
;| Length Described Variable Length Integer |
;| |
;| Date: 30.08.2024 |
;| PureBasic 6.11 |
;\------------------------------------------/
;LdVli = Length Described Variable Length Integer
;The 3 least significant bit plus 1 describe the size of the number in bytes.
;Can store up to 61 bit unsigned numbers and takes 1 to 8 bytes to store.
EnableExplicit
Structure QuadBytes
Bytes.a[8]
EndStructure
Structure LdVli
Bytes.a[8]
EndStructure
Procedure.i WriteLdVli(QuadVar.q, *LdVli.LdVli)
Protected Size.i
Protected *QuadBytes.QuadBytes
Protected ShiftedQuad.q
!v_shiftedquad = (unsigned long long)v_quadvar << 3;
*QuadBytes = @ShiftedQuad
If (*QuadBytes\Bytes[7] <> 0)
!p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 7;
!p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
!p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
!p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
!p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
!p_ldvli->f_bytes[5] = ((unsigned long long)v_quadvar >> 37) & 0xFF;
!p_ldvli->f_bytes[6] = ((unsigned long long)v_quadvar >> 45) & 0xFF;
!p_ldvli->f_bytes[7] = ((unsigned long long)v_quadvar >> 53) & 0xFF;
Size = 8
ElseIf (*QuadBytes\Bytes[6] <> 0)
!p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 6;
!p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
!p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
!p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
!p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
!p_ldvli->f_bytes[5] = ((unsigned long long)v_quadvar >> 37) & 0xFF;
!p_ldvli->f_bytes[6] = ((unsigned long long)v_quadvar >> 45) & 0xFF;
Size = 7
ElseIf (*QuadBytes\Bytes[5] <> 0)
!p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 5;
!p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
!p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
!p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
!p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
!p_ldvli->f_bytes[5] = ((unsigned long long)v_quadvar >> 37) & 0xFF;
Size = 6
ElseIf (*QuadBytes\Bytes[4] <> 0)
!p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 4;
!p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
!p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
!p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
!p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
Size = 5
ElseIf (*QuadBytes\Bytes[3] <> 0)
!p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 3;
!p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
!p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
!p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
Size = 4
ElseIf (*QuadBytes\Bytes[2] <> 0)
!p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 2;
!p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
!p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
Size = 3
ElseIf (*QuadBytes\Bytes[1] <> 0)
!p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 1;
!p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
Size = 2
Else
!p_ldvli->f_bytes[0] = ((unsigned long long)v_quadvar << 3) & 0xFF;
Size = 1
EndIf
ProcedureReturn Size
EndProcedure
Procedure.i WriteLdVliToFile(File.i, QuadVar.q)
Protected LdVli.LdVli
Protected Size.i
Size = WriteLdVli(QuadVar, LdVli)
If (WriteData(File, LdVli, Size) <> Size)
ProcedureReturn #False
EndIf
ProcedureReturn Size
EndProcedure
Procedure.i CalcLdVliSize(QuadVar.q)
Protected *QuadBytes.QuadBytes
!v_quadvar = (unsigned long long)v_quadvar << 3;
*QuadBytes = @QuadVar
If (*QuadBytes\Bytes[7] <> 0)
ProcedureReturn 8
ElseIf (*QuadBytes\Bytes[6] <> 0)
ProcedureReturn 7
ElseIf (*QuadBytes\Bytes[5] <> 0)
ProcedureReturn 6
ElseIf (*QuadBytes\Bytes[4] <> 0)
ProcedureReturn 5
ElseIf (*QuadBytes\Bytes[3] <> 0)
ProcedureReturn 4
ElseIf (*QuadBytes\Bytes[2] <> 0)
ProcedureReturn 3
ElseIf (*QuadBytes\Bytes[1] <> 0)
ProcedureReturn 2
Else
ProcedureReturn 1
EndIf
EndProcedure
Procedure.q ReadLdVli(*LdVli.LdVli, *Size.Integer)
Protected QuadVar.q
Protected Size.i
*Size\i = (*LdVli\Bytes[0] & %00000111) + 1
Select *Size\i
Case 1
!v_quadvar = p_ldvli->f_bytes[0] >> 3;
Case 2
!v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5);
Case 3
!v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13);
Case 4
!v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21);
Case 5
!v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29);
Case 6
!v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29) | ((unsigned long long)p_ldvli->f_bytes[5] << 37);
Case 7
!v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29) | ((unsigned long long)p_ldvli->f_bytes[5] << 37) | ((unsigned long long)p_ldvli->f_bytes[6] << 45);
Case 8
!v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29) | ((unsigned long long)p_ldvli->f_bytes[5] << 37) | ((unsigned long long)p_ldvli->f_bytes[6] << 45) | ((unsigned long long)p_ldvli->f_bytes[7] << 53);
EndSelect
ProcedureReturn QuadVar
EndProcedure
Procedure.i GetLdVliSizeFromFile(File.i, *Size.Integer)
Protected FirstByte.a
If (ReadData(File, @FirstByte, 1) <> 1)
ProcedureReturn #False
EndIf
FileSeek(File, -1, #PB_Relative)
*Size\i = (FirstByte & %00000111) + 1
ProcedureReturn #True
EndProcedure
Procedure.i GetLdVliSize(*LdVli.LdVli)
ProcedureReturn (*LdVli\Bytes[0] & %00000111) + 1
EndProcedure
Procedure.i ReadLdVliFromFile(File.i, *QuadVar.Quad, *Size.Integer)
Protected LdVli.LdVli
If (GetLdVliSizeFromFile(File, *Size) = #False)
ProcedureReturn #False
EndIf
If (ReadData(File, LdVli, *Size\i) < *Size\i)
ProcedureReturn #False
EndIf
*QuadVar\q = ReadLdVli(LdVli, *Size)
ProcedureReturn #True
EndProcedure
Procedure TestVli()
Protected LdVli.LdVli
Protected TestNum.q
Protected Size.i
TestNum = 55951651515
TestNum = TestNum & $1FFFFFFFFFFFFFFF ;cut to 61 bit
Size = WriteLdVli(TestNum, LdVli)
Debug "Size: " + Str(Size)
Debug "TestNum: " + Str(TestNum)
Debug "ReadBack: " + Str(ReadLdVli(LdVli, @Size))
EndProcedure
TestVli()