Length Described Variable Length Integer

Share your advanced PureBasic knowledge/code with the community.
Thorium
Addict
Addict
Posts: 1305
Joined: Sat Aug 15, 2009 6:59 pm

Length Described Variable Length Integer

Post by Thorium »

This code stores a positive integer number in a variable size of memory.
The smaller the number the smaller it's size.
This is commonly known as VLI (variable length integer)

Here i made a variation of that idea i call LDVLI (Length Described Variable Length Integer).
The difference to VLI is that it stores the length in the least significant bits.
This has advantages and disadvantages. I use it on my own archive file format to save a few bytes.

Advantages compared to VLI:
  • In theory faster, because you know the size from the start of decoding. However my code is not well optimized, feel free to improve it.
  • More secure because you can easily get the size and check for buffer/file sizes before decoding. This makes it well suited for untrusted or potentially corrupted data. The code does not do it for you automatically, so it's only more secure if you do the checks.
  • Smaller size on big numbers. VLI needs a whole additional byte to store a 61bit number, so 9 byte. While LDVLI does it in 8 byte.
Disadvantages compared to VLI:
  • Can only store up to 61bit large integers. Which is plenty for many cases. However VLI can theoretically store unlimited size integers.
  • Bigger size on small numbers because LDVLI always uses 3 bit for the size descriptor, so can only store 5 bit integer in the first byte, while VLI can store 7 bit in the first byte.
Following code requires C-Backend because it uses inline C for the logical shift. Unfortunately PureBasic only offers arithmetic shift.

Code: Select all

;/------------------------------------------\
;| Length Described Variable Length Integer |
;|                                          |
;| Date: 30.08.2024                         |
;| PureBasic 6.11                           |
;\------------------------------------------/


;LdVli = Length Described Variable Length Integer
;The 3 least significant bit plus 1 describe the size of the number in bytes.
;Can store up to 61 bit unsigned numbers and takes 1 to 8 bytes to store.

EnableExplicit

Structure QuadBytes
  Bytes.a[8]
EndStructure

Structure LdVli
  Bytes.a[8]
EndStructure

Procedure.i WriteLdVli(QuadVar.q, *LdVli.LdVli)
  
  Protected Size.i
  Protected *QuadBytes.QuadBytes
  Protected ShiftedQuad.q
  
  !v_shiftedquad = (unsigned long long)v_quadvar << 3;
  *QuadBytes = @ShiftedQuad
  
  If (*QuadBytes\Bytes[7] <> 0)
    !p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 7;
    !p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
    !p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
    !p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
    !p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
    !p_ldvli->f_bytes[5] = ((unsigned long long)v_quadvar >> 37) & 0xFF;
    !p_ldvli->f_bytes[6] = ((unsigned long long)v_quadvar >> 45) & 0xFF;
    !p_ldvli->f_bytes[7] = ((unsigned long long)v_quadvar >> 53) & 0xFF;
    Size = 8
    
  ElseIf (*QuadBytes\Bytes[6] <> 0)
    !p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 6;
    !p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
    !p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
    !p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
    !p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
    !p_ldvli->f_bytes[5] = ((unsigned long long)v_quadvar >> 37) & 0xFF;
    !p_ldvli->f_bytes[6] = ((unsigned long long)v_quadvar >> 45) & 0xFF;
    Size = 7
    
  ElseIf (*QuadBytes\Bytes[5] <> 0)
    !p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 5;
    !p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
    !p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
    !p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
    !p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
    !p_ldvli->f_bytes[5] = ((unsigned long long)v_quadvar >> 37) & 0xFF;
    Size = 6
    
  ElseIf (*QuadBytes\Bytes[4] <> 0)
    !p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 4;
    !p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
    !p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
    !p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
    !p_ldvli->f_bytes[4] = ((unsigned long long)v_quadvar >> 29) & 0xFF;
    Size = 5
    
  ElseIf (*QuadBytes\Bytes[3] <> 0)
    !p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 3;
    !p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
    !p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
    !p_ldvli->f_bytes[3] = ((unsigned long long)v_quadvar >> 21) & 0xFF;
    Size = 4
    
  ElseIf (*QuadBytes\Bytes[2] <> 0)
    !p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 2;
    !p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
    !p_ldvli->f_bytes[2] = ((unsigned long long)v_quadvar >> 13) & 0xFF;
    Size = 3
    
  ElseIf (*QuadBytes\Bytes[1] <> 0)
    !p_ldvli->f_bytes[0] = (((unsigned long long)v_quadvar << 3) & 0xFF) | 1;
    !p_ldvli->f_bytes[1] = ((unsigned long long)v_quadvar >> 5) & 0xFF;
    Size = 2
    
  Else
    !p_ldvli->f_bytes[0] = ((unsigned long long)v_quadvar << 3) & 0xFF;
    Size = 1
    
  EndIf
  
  ProcedureReturn Size
  
EndProcedure

Procedure.i WriteLdVliToFile(File.i, QuadVar.q)
  
  Protected LdVli.LdVli
  Protected Size.i
  
  Size = WriteLdVli(QuadVar, LdVli)
  If (WriteData(File, LdVli, Size) <> Size)
    ProcedureReturn #False
  EndIf
  
  ProcedureReturn Size
    
EndProcedure

Procedure.i CalcLdVliSize(QuadVar.q)
  
  Protected *QuadBytes.QuadBytes
  
  !v_quadvar = (unsigned long long)v_quadvar << 3;
  *QuadBytes = @QuadVar
  
  If (*QuadBytes\Bytes[7] <> 0)
    ProcedureReturn 8
  ElseIf (*QuadBytes\Bytes[6] <> 0)
    ProcedureReturn 7
  ElseIf (*QuadBytes\Bytes[5] <> 0)
    ProcedureReturn 6
  ElseIf (*QuadBytes\Bytes[4] <> 0)
    ProcedureReturn 5
  ElseIf (*QuadBytes\Bytes[3] <> 0)
    ProcedureReturn 4
  ElseIf (*QuadBytes\Bytes[2] <> 0)
    ProcedureReturn 3
  ElseIf (*QuadBytes\Bytes[1] <> 0)
    ProcedureReturn 2
  Else
    ProcedureReturn 1
  EndIf
  
EndProcedure

Procedure.q ReadLdVli(*LdVli.LdVli, *Size.Integer)
  
  Protected QuadVar.q
  Protected Size.i
  
  *Size\i = (*LdVli\Bytes[0] & %00000111) + 1
  Select *Size\i
    
    Case 1
      !v_quadvar = p_ldvli->f_bytes[0] >> 3;
      
    Case 2
      !v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5);
      
    Case 3
      !v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13);
      
    Case 4
      !v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21);
      
    Case 5
      !v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29);
      
    Case 6
      !v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29) | ((unsigned long long)p_ldvli->f_bytes[5] << 37);
      
    Case 7
      !v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29) | ((unsigned long long)p_ldvli->f_bytes[5] << 37) | ((unsigned long long)p_ldvli->f_bytes[6] << 45);
      
    Case 8
      !v_quadvar = (p_ldvli->f_bytes[0] >> 3) | ((unsigned long long)p_ldvli->f_bytes[1] << 5) | ((unsigned long long)p_ldvli->f_bytes[2] << 13) | ((unsigned long long)p_ldvli->f_bytes[3] << 21) | ((unsigned long long)p_ldvli->f_bytes[4] << 29) | ((unsigned long long)p_ldvli->f_bytes[5] << 37) | ((unsigned long long)p_ldvli->f_bytes[6] << 45) | ((unsigned long long)p_ldvli->f_bytes[7] << 53);
      
  EndSelect
  
  ProcedureReturn QuadVar
    
EndProcedure

Procedure.i GetLdVliSizeFromFile(File.i, *Size.Integer)
  
  Protected FirstByte.a
  
  If (ReadData(File, @FirstByte, 1) <> 1)
    ProcedureReturn #False
  EndIf
  FileSeek(File, -1, #PB_Relative)
  
  *Size\i = (FirstByte & %00000111) + 1
  
  ProcedureReturn #True
  
EndProcedure

Procedure.i GetLdVliSize(*LdVli.LdVli)
  
  ProcedureReturn (*LdVli\Bytes[0] & %00000111) + 1
  
EndProcedure

Procedure.i ReadLdVliFromFile(File.i, *QuadVar.Quad, *Size.Integer)
  
  Protected LdVli.LdVli
  
  If (GetLdVliSizeFromFile(File, *Size) = #False)
    ProcedureReturn #False
  EndIf
  
  If (ReadData(File, LdVli, *Size\i) < *Size\i)
    ProcedureReturn #False
  EndIf
  
  *QuadVar\q = ReadLdVli(LdVli, *Size)
  
  ProcedureReturn #True
  
EndProcedure

Procedure TestVli()

  Protected LdVli.LdVli
  Protected TestNum.q
  Protected Size.i
  
  TestNum = 55951651515
  TestNum = TestNum & $1FFFFFFFFFFFFFFF ;cut to 61 bit
  
  Size = WriteLdVli(TestNum, LdVli)
  Debug "Size: " + Str(Size)
  Debug "TestNum: " + Str(TestNum)
  Debug "ReadBack: " + Str(ReadLdVli(LdVli, @Size))
  
EndProcedure

TestVli()