bitwise trie

Share your advanced PureBasic knowledge/code with the community.
User avatar
idle
Always Here
Always Here
Posts: 5840
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

bitwise trie

Post by idle »

bitwise trie a fast memory efficient replacement to maps using integer keys

A bitwise Trie (reTRIEeval) is a basically a form of tree which maps keys by their bit values
and stores the item at the last node along the traversal path.
In terms of complexity a bitwise trie is O(1) and a map is O(1) but degrades towards O(n) when fully loaded

Code: Select all

; bitwise Trie wilbert, idle
v1.3
; Walk walks over all allocated keys
; keys are always allocated 4 at a time

Structure TrieNode   ; 4 * 4 bytes
  n.l[4]
EndStructure

Structure Trie
  *vt.l 
  nodes.TrieNode[256] ; offset 0
  firstMem.l          ; offset 4096
  currentMem.l        ; offset 4100
  memCount.l          ; offset 4104
  freeNodes.l         ; offset 4108
  nextNode.l          ; offset 4112
EndStructure

Structure TrieMem
  nextMem.l           ; offset 0
  m.Long[4092]        ; offset 4
EndStructure

Declare Trie_clear(*this.Trie)
Declare Trie_Free(*this.Trie)
Declare Trie_New()
Declare Trie_SetValue(*this.Trie,key.l, value.l)
Declare.l Trie_GetValue(*this.Trie, key.l)
Declare.s Trie_GetMemSize(*this.Trie)
Declare Trie_Walk(*this.Trie, *callback.l, userInfo.l = 0)

Interface Trie_Class
  Clear()
  Free()
  Set(key.l,value.l)
  Get(key.l)
  GetSize.s()
  Walk(*callback.l, userInfo.l = 0)
EndInterface

DataSection: vt_Trie:
  Data.l @Trie_Clear()
  Data.l @Trie_Free()
  Data.l @Trie_SetValue()
  Data.l @Trie_GetValue()
  Data.l @Trie_GetMemSize()
  Data.l @Trie_Walk()
EndDataSection

Procedure Trie_Clear(*this.Trie)
  Protected *mem.TrieMem, *nextMem.TrieMem, idx.l
  If *this
    *mem = *this\firstMem
    While *mem
      *nextMem = *mem\nextMem
      FreeMemory(*mem)
      *mem = *nextMem
    Wend
    FillMemory(*this\nodes[0], 4096)
    *this\firstMem = AllocateMemory(SizeOf(TrieMem))
    *this\currentMem = *this\firstMem
    *this\memCount = 1
    *this\freeNodes = 1022
    *this\nextNode = (*this\firstMem + 20) & $fffffff0
  EndIf
EndProcedure

Procedure Trie_Free(*this.Trie)
  Protected *mem.TrieMem, *nextMem.TrieMem, idx.l
  If *this
    *mem = *this\firstMem
    While *mem
      *nextMem = *mem\nextMem
      FreeMemory(*mem)
      *mem = *nextMem
    Wend
    FreeMemory(*this)
  EndIf
  ProcedureReturn 0
EndProcedure

Procedure Trie_New()
  Protected *this.Trie
  *this = AllocateMemory(SizeOf(Trie))
  If *this
    *this\vt = ?vt_Trie
    Trie_Clear(*this)
  EndIf 
  ProcedureReturn *this
EndProcedure

Procedure.s Trie_GetMemSize(*this.Trie)
  Protected sz.s,size.i
  If *this
    size = SizeOf(Trie) + *this\memCount * SizeOf(TrieMem)
    If size < 1048576
      sz = Str(size/1024) + " kb"
    Else
      sz = Str(size/1048576) + " mb"
    EndIf
  EndIf
  ProcedureReturn sz
EndProcedure

Procedure Trie_SetValue(*this.Trie, key.l, value.l)
  EnableASM
  MOV eax, *this
  MOV ecx, key
  !and eax, eax
  !jnz trie_set_ok
  ProcedureReturn             ; return if *this is zero
  !trie_set_ok:
  !lea edx, [eax + 4]
  !push ebx
  !push esi
  !push ebp
  !rol ecx, 12
  !mov esi, ecx
  !and esi, 0xffc
  !add esi, edx
  !mov bl, 11
  !trie_set_loop:
  !mov eax, [esi]
  !and eax, eax                 ; check for zero pointer
  !jnz trie_set_cont
  ; ** allocate **
  !mov eax, [edx + 4112]        ; eax = *this\nextNode
  !mov [esi], eax
  !add dword [edx + 4112], 16   ; *this\nextNode + 16
  !dec dword [edx + 4108]       ; *this\freeNodes - 1
  !jnz trie_set_cont
  !push ecx
  !push edx
  CompilerSelect #PB_Compiler_OS
    CompilerCase #PB_OS_Windows
      !push dword 16372
      !call _PB_AllocateMemory@4
    CompilerCase #PB_OS_Linux
      !push dword 16372
      !call PB_AllocateMemory
      !add esp, 4
    CompilerCase #PB_OS_MacOS
      !mov ebp, esp
      !and esp, 0xfffffff0
      !sub esp, 12
      !push dword 16372
      !call _PB_AllocateMemory
      !mov esp, ebp
  CompilerEndSelect
  !pop edx
  !mov ecx, [edx + 4100]        ; ecx = *this\currentMem
  !mov [ecx], eax               ; set nextMem
  !mov [edx + 4100], eax        ; set currentMem
  !add eax, 20
  !and eax, 0xfffffff0
  !mov [edx + 4112], eax        ; set nextNode (16 bytes aligned)
  !inc dword [edx + 4104]       ; memCount + 1
  !mov dword [edx + 4108], 1022 ; freeNodes = 1022
  !pop ecx
  !mov eax, [esi]
  ; ** continue **
  !trie_set_cont:
  !rol ecx, 2
  !mov ebp, esi
  !mov esi, ecx
  !and esi, 0xc
  !add esi, eax
  !dec bl
  !jnz trie_set_loop
  !and ecx, 0xc
  !and eax, 0xfffffff0
  !add eax, ecx
  !shr ecx, 2
  !bts dword [ebp], ecx
  !pop ebp
  !pop esi
  !pop ebx
  MOV edx, value
  MOV [eax], edx
  DisableASM
EndProcedure

Procedure.l Trie_GetValue(*this.Trie, key.l)
  EnableASM
  MOV eax, *this
  MOV ecx, key
  !and eax, eax
  !jnz trie_get_ok
  ProcedureReturn       ; return if *this is zero
  !trie_get_ok:
  !lea edx, [eax + 4]
  !push esi
  !rol ecx, 12
  !mov esi, ecx
  !and esi, 0xffc
  !add esi, edx
  !mov dl, 11
  !trie_get_loop:
  !mov eax, [esi]
  !and eax, 0xfffffff0    ; check for zero pointer
  !jz trie_get_exit
  !rol ecx, 2             ; next 2 bits
  !mov esi, ecx
  !and esi, 0xc
  !add esi, eax
  !dec dl
  !jnz trie_get_loop
  !mov eax, [esi]
  !trie_get_exit:
  !pop esi
  DisableASM
  ProcedureReturn
EndProcedure

Macro Trie_WalkRecurseMacro(n)
  !push esi
  !mov eax, ebx
  !and eax, 3
  !mov esi, [esi + eax*4]
  !and esi, esi
  !jz trie_walk_recurse_skip#n
  !call trie_walk_recurse
  !trie_walk_recurse_skip#n#:
  !pop esi
EndMacro

Macro Trie_WalkValueMacro(n)
  !bt esi, n
  !jnc trie_walk_novalue#n
  !call trie_walk_handle_cb
  !trie_walk_novalue#n#:
EndMacro

Procedure Trie_Walk(*this.Trie, *callback.l, userInfo.l = 0); *callback(key.l, value.l [,userInfo.l])
  EnableASM
  MOV eax, *this
  MOV ecx, *callback      ; ecx = callback
  MOV edx, userInfo
  !and eax, eax
  !jnz trie_walk_ok
  ProcedureReturn       ; return if *this is zero
  !trie_walk_ok:
  !push ebx
  !push esi
  !push edi
  !push ebp
  !mov edi, edx           ; edi = userInfo
  !lea edx, [eax + 4]     ; edx = root table
  !xor ebx, ebx           ; ebx = key
  !trie_walk_loop:
  !mov esi, [edx + ebx*4]
  !and esi, esi
  !jz trie_walk_skip_root
  !or ebx, 0x400
  !call trie_walk_recurse
  !and ebx, 0x3ff
  !trie_walk_skip_root:
  !inc ebx
  !cmp ebx, 0x400
  !jb trie_walk_loop
  !pop ebp
  !pop edi
  !pop esi
  !pop ebx
  ProcedureReturn
  !trie_walk_recurse:
  !shl ebx, 2
  !jc trie_walk_value
  Trie_WalkRecurseMacro(0)
  !inc ebx
  Trie_WalkRecurseMacro(1)
  !inc ebx
  Trie_WalkRecurseMacro(2)
  !inc ebx
  Trie_WalkRecurseMacro(3)
  !shr ebx, 2
  !ret 
  !trie_walk_value:
  Trie_WalkValueMacro(0)
  !inc ebx
  Trie_WalkValueMacro(1)
  !inc ebx
  Trie_WalkValueMacro(2)
  !inc ebx
  Trie_WalkValueMacro(3)
  !shr ebx, 2
  !bts ebx, 30
  !ret 
  ; universal callback
  !trie_walk_handle_cb:
  !mov eax, ebx
  !and eax, 3
  !mov ebp, esi
  !and ebp, 0xfffffff0
  !mov eax, [ebp + eax*4]
  !push ecx
  !push edx
  !mov ebp, esp
  !and esp, 0xfffffff0
  !sub esp, 4
  !push edi     ; edi = userInfo
  !push eax     ; eax = value
  !push ebx     ; ebx = key
  !call ecx     ; ecx = callback
  !mov esp, ebp
  !pop edx
  !pop ecx
  !ret
EndProcedure

Global size = 1024*1024
Global mt.Trie_Class = Trie_New()
Global NewMap mp(size)

Procedure cb(key.l, value.l)
EndProcedure

start = ElapsedMilliseconds()
For a = 0 To size
  mt\Set(a,size-a)
Next

stop = ElapsedMilliseconds()
Trie_Add = stop-start

start = ElapsedMilliseconds()
For a = 0 To size
  mp(Str(a))=size-a; this line results in a crash on OS X when compiling without debug
Next
stop = ElapsedMilliseconds()
Map_add = Stop-start

start = ElapsedMilliseconds()
For a = 0 To size
  x = mt\Get(a)
Next

stop = ElapsedMilliseconds()
Trie_Look = stop-start

start = ElapsedMilliseconds()
For a = 0 To size
  x = mp(Str(a))
Next

stop = ElapsedMilliseconds()
map_Look = stop-start

start = ElapsedMilliseconds()
mt\Walk(@cb())
stop = ElapsedMilliseconds()
Trie_Walk = stop-start

start = ElapsedMilliseconds()
ForEach mp()
  cb(0, mp())
Next
stop = ElapsedMilliseconds()
map_Walk = stop-start

times.s = "Trie Add = " + Str(Trie_Add) + " Map Add = "+ Str(Map_Add) + #CRLF$
times   + "Trie Look = " + Str(Trie_Look) + " Map Look = "+ Str(Map_Look) + #CRLF$
times   + "Trie Walk = " + Str(Trie_Walk) + " Map Walk = "+ Str(Map_Walk)
MessageRequester("Times", times)

mem.s = mt\GetSize()
MessageRequester("", mem)

mt\Free()
Last edited by idle on Sat Sep 10, 2011 10:17 pm, edited 11 times in total.
Windows 11, Manjaro, Raspberry Pi OS
Image
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: bitwise trie

Post by wilbert »

I believe you could replace the get function like this

Code: Select all

Procedure Get_Trie(*this.Trie,key.l)
  EnableASM
  MOV edx, *this
  MOV ecx, key
  DisableASM
  !mov edx, [edx + 4]; point edx to root node
  !push ebx
  !get_trie_loop:
  !mov ebx, edx
  !shr ecx, 1
  !cmovnc edx, [ebx + 4]; 0
  !cmovc edx, [ebx + 8]; 1
  !and edx, edx
  !jnz get_trie_loop
  !mov eax, [ebx]
  !pop ebx
  ProcedureReturn
EndProcedure
But the get function already was very fast.

You could make the Trie faster by starting with a table.
You could first take for example bit 0 - 8 (value 0 - 511) as a table index with a pointer to a node and start searching from bit 9.
User avatar
idle
Always Here
Always Here
Posts: 5840
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: bitwise trie

Post by idle »

Thanks, I will add it, I don't think I'd have ever thought of doing it like that, nice!
I had tried using a table instead of shr 1 but it appeared to run slower, I like what you've done there.
took me a while to grok it.
Last edited by idle on Mon Sep 05, 2011 8:12 pm, edited 1 time in total.
Windows 11, Manjaro, Raspberry Pi OS
Image
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: bitwise trie

Post by wilbert »

In a lot of situations the key values are probably close to eachother.
I don't know if I'm thinking correctly but wouldn't it require less memory allocation in this case to work the other way around and shift left instead of right so the most significant bits are close to the root node and the least significant ones close to the end nodes ?
User avatar
idle
Always Here
Always Here
Posts: 5840
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: bitwise trie

Post by idle »

I don't know, yes that would probably make quite a bit of difference
otherwise if the cost of a reverse bit scan to find the first set bit isn't to much, then it would only do what required

Yes that makes its heaps faster!
Windows 11, Manjaro, Raspberry Pi OS
Image
User avatar
idle
Always Here
Always Here
Posts: 5840
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: bitwise trie

Post by idle »

updated, nearly as fast as an allocated map now!
Windows 11, Manjaro, Raspberry Pi OS
Image
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: bitwise trie

Post by wilbert »

There seems to be a little problem with the return value when the max key is used.

Here's my attempt on it.
It uses a table, allocates more memory at once and handles 2 bits at a time instead of 1.
In theory that should improve performance but I don't know if it really does :shock:
I try to keep my code OS X compatible and for some reason the object like structure you are using (interface) crashes when compiled when debug is turned off.
For that reason I haven't done a speed comparison.

Code: Select all

Structure TrieNode; 4 * 4 bytes
  n.l[4]
EndStructure

Structure Trie
  nodes.TrieNode[256]
  firstMem.l
  currentMem.l
  memCount.l
  freeNodes.l
  nextNode.l
EndStructure

Structure TrieMem
  m.TrieNode[1023]
  nextMem.l
EndStructure

Procedure Trie_Clear(*trie.Trie)
  Protected *mem.TrieMem, *nextMem.TrieMem, idx.l
  *mem = *trie\firstMem
  While *mem
    *nextMem = *mem\nextMem
    FreeMemory(*mem)
    *mem = *nextMem
  Wend
  FillMemory(*trie, 4096)
  *trie\firstMem = AllocateMemory(SizeOf(TrieMem))
  *trie\currentMem = *trie\firstMem
  *trie\memCount = 1
  *trie\freeNodes = 1023
  *trie\nextNode = *trie\currentMem
EndProcedure

Procedure.l Trie_New()
  Protected *trie.Trie
  *trie = AllocateMemory(SizeOf(Trie))
  Trie_Clear(*trie)
  ProcedureReturn *trie
EndProcedure

Procedure.l Trie_GetMemSize(*trie.Trie)
  ProcedureReturn SizeOf(Trie) + *trie\memCount * SizeOf(TrieMem)
EndProcedure

Procedure Trie_SetValue(*trie.Trie, key.l, value.l)
  Protected *node.TrieNode, *mem.TrieMem, *ptr, idx.l, b.l
  *node = *trie\nodes[key >> 24 & $ff]
  b = 24
  While b
    b - 2
    idx = key >> b & 3
    *ptr = *node\n[idx]
    If *ptr
      *node = *ptr
    Else
      Break
    EndIf
  Wend
  While b
    *ptr = *trie\nextNode
    *trie\freeNodes - 1
    If *trie\freeNodes
      *trie\nextNode + 16
    Else
      *mem = *trie\currentMem
      *mem\nextMem = AllocateMemory(SizeOf(TrieMem))
      *trie\memCount + 1
      *trie\currentMem = *mem\nextMem
      *trie\freeNodes = 1023
      *trie\nextNode = *trie\currentMem
    EndIf
    *node\n[idx] = *ptr
    *node = *ptr
    b - 2
    idx = key >> b & 3
  Wend
  *node\n[idx] = value
EndProcedure

Procedure.l Trie_GetValue(*trie.Trie, key.l)
  EnableASM
  MOV edx, *trie
  MOV ecx, key
  DisableASM
  !push esi
  !rol ecx, 12
  !mov esi, ecx
  !and esi, 0xffc
  !add esi, edx
  !mov dl, 12
  !trie_get_loop:
  !mov eax, [esi]
  !and eax, eax; check for zero pointer
  !jz trie_get_exit
  !rol ecx, 2; next 2 bits
  !mov esi, ecx
  !and esi, 0xc
  !add esi, eax
  !dec dl
  !jnz trie_get_loop
  !trie_get_exit:
  !pop esi
  ProcedureReturn  
EndProcedure


; test the code

*myTrie.Trie = Trie_New()

For i = 0 To 65000
  Trie_SetValue(*myTrie, i, i << 16)
Next

For i = 0 To 65000
  v = Trie_GetValue(*myTrie, i)
  If i << 16 <> v
    Debug "error" 
  EndIf
Next

Debug Trie_GetMemSize(*myTrie)

Trie_Clear(*myTrie)
Edit:
I'm simply using a table for the most significant bits so the amount of steps to get to the result decreases.
Wikipedia suggests using something like the ASM instruction BSR to scan for the most significant bit that is set and built a small index table based on that.
Especially when dealing with small key values this would increase speed since there would be very little steps to get to the result in those cases with this approach.
Last edited by wilbert on Tue Sep 06, 2011 4:26 pm, edited 3 times in total.
Foz
Addict
Addict
Posts: 1359
Joined: Tue Nov 13, 2007 12:42 pm
Location: Manchester, UK

Re: bitwise trie

Post by Foz »

What can I say?

This was stemmed off from me asking if Maps were really the best way of handling integer lookups and all I can do is bask in the footsteps these giants!

You guys are insanely awesomely much more cleverer than me! :mrgreen:
User avatar
idle
Always Here
Always Here
Posts: 5840
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: bitwise trie

Post by idle »

Here's my attempt on it.
It uses a table, allocates more memory at once and handles 2 bits at a time instead of 1.
In theory that should improve performance but I don't know if it really does :shock:
I try to keep my code OS X compatible and for some reason the object like structure you are using (interface) crashes when compiled when debug is turned off.
For that reason I haven't done a speed comparison.
That's pretty quick!
I don't know why the interface version would crashed out on osx, weird! (it's just by habit I do it that way)
I'll replace the code at the top if thats ok
Windows 11, Manjaro, Raspberry Pi OS
Image
User avatar
idle
Always Here
Always Here
Posts: 5840
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: bitwise trie

Post by idle »

added remove and free

Code: Select all

;bitwise Trie wilbert  

Structure TrieNode
  n.l[4]
EndStructure

Structure Trie
  nodes.TrieNode[256]
  firstMem.l
  currentMem.l
  memCount.l
  freeNodes.l
  nextNode.l
EndStructure

Structure TrieMem
  m.TrieNode[1023]
  nextMem.l
EndStructure

Procedure Trie_Clear(*Trie.Trie)
  Protected *mem.TrieMem, *nextMem.TrieMem, idx.l 
  *mem = *trie\firstMem
  While *mem
    *nextMem = *mem\nextMem
    FreeMemory(*mem)
    *mem = *nextMem
  Wend
  FillMemory(*trie, 4096)
  *trie\firstMem = AllocateMemory(SizeOf(TrieMem))
  *trie\currentMem = *trie\firstMem
  *trie\memCount = 1
  *trie\freeNodes = 1023
  *trie\nextNode = *trie\currentMem
EndProcedure

Procedure Trie_Free(*Trie.Trie)
  Protected *mem.TrieMem, *nextMem.TrieMem, idx.l 
 *mem = *trie\firstMem
  While *mem
    *nextMem = *mem\nextMem
     FreeMemory(*mem)
    *mem = *nextMem
  Wend
  FreeMemory(*trie) 
  *trie=0
EndProcedure

Procedure.l Trie_New()
  Protected *trie.Trie
  *trie = AllocateMemory(SizeOf(Trie))
  Trie_Clear(*trie)
  ProcedureReturn *trie
EndProcedure

Procedure.s Trie_GetMemSize(*trie.Trie) 
  Protected sz.s,size.i 
  size = SizeOf(Trie) + *trie\memCount * SizeOf(TrieMem)
  If size < 1024 
    sz = Str(size) + " bytes" 
  ElseIf size < 1048576
    sz = Str(size/1024) + " kb" 
  Else 
    sz = Str(size/1048576) + " mb" 
  EndIf 
  ProcedureReturn sz
EndProcedure 

Procedure Trie_SetValue(*trie.Trie, key.l, value.l)
  Protected *node.TrieNode, *mem.TrieMem, *ptr, idx.l, b.l 
   
  *node = *trie\nodes[key >> 24 & $ff]
  b = 24
  While b
    b - 2
    idx = key >> b & 3
    *ptr = *node\n[idx]
    If *ptr
      *node = *ptr
    Else
      Break
    EndIf
  Wend
  While b
    *ptr = *trie\nextNode
    *trie\freeNodes - 1
    If *trie\freeNodes
      *trie\nextNode + 16
    Else
      *mem = *trie\currentMem
      *mem\nextMem = AllocateMemory(SizeOf(TrieMem))
      *trie\memCount + 1
      *trie\currentMem = *mem\nextMem
      *trie\freeNodes = 1023
      *trie\nextNode = *trie\currentMem
    EndIf
    *node\n[idx] = *ptr
    *node = *ptr
    b - 2
    idx = key >> b & 3
  Wend
  *node\n[idx] = value
EndProcedure

Procedure.l Trie_GetValue(*trie.Trie, key.l)
   
  EnableASM
  MOV edx, *trie
  MOV ecx, key
  DisableASM
  !push esi
  !rol ecx, 12
  !mov esi, ecx
  !and esi, 0xffc
  !add esi, edx
  !mov dl, 12
  !trie_get_loop:
  !mov eax, [esi]
  !and eax, eax; check for zero pointer
  !jz trie_get_exit
  !rol ecx, 2; next 2 bits
  !mov esi, ecx
  !and esi, 0xc
  !add esi, eax
  !dec dl
  !jnz trie_get_loop
  !trie_get_exit:
  !pop esi
  ProcedureReturn 
EndProcedure

Procedure.l Trie_RemoveValue(*trie.Trie, key.l)
  Protected *node.TrieNode, *mem.TrieMem, *ptr, idx.l, b.l
  *node = *trie\nodes[key >> 24 & $ff]
  b = 24
  While b > 2
    b - 2
    idx = key >> b & 3
    *ptr = *node\n[idx]
    If *ptr
      *node = *ptr
      
    Else
      Break
    EndIf
  Wend
  If *node\n[idx] 
     *node\n[idx] = 0
  EndIf  
     
EndProcedure

Global size = 65000 
Global NewMap mp(size)  

Global *mt.Trie = Trie_New()

start = ElapsedMilliseconds()
For a = 0 To size
 Trie_SetValue(*mt,a,size-a) 
Next 
stop = ElapsedMilliseconds() 
Trie_Add = stop-start 

start = ElapsedMilliseconds()
For a = 0 To size
  mp(Str(a))=size-a
Next 
stop = ElapsedMilliseconds() 
Map_add = Stop-start  

start = ElapsedMilliseconds()
For a = 0 To size
  x =Trie_GetValue(*mt,a)
Next 
 
stop = ElapsedMilliseconds()
Trie_time = stop-start 

start = ElapsedMilliseconds()
For a = 0 To size
  x = mp(Str(a))
Next 

stop = ElapsedMilliseconds()
map_time = stop-start 

MessageRequester("Times","Trie Add = " + Str(Trie_Add) + " Trie Look = "+ Str(Trie_time)+#CRLF$+" Map Add= "+ Str(Map_add) + " Map look= " + Str(map_time))


mem.s = Trie_GetMemSize(*mt) 
MessageRequester("",mem) 

Trie_Free(*mt)
Windows 11, Manjaro, Raspberry Pi OS
Image
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: bitwise trie

Post by wilbert »

It might have been not your interface at all. It seems now the crash on OS X has to do with the map.
What makes things a bit complicated sometimes on OS X is that the value of the stack pointer has to be aligned at 16 bytes at the moment a call is made.

Is there any advantage of the remove function over setting a value to 0 ?
It seems to it would only take up extra memory in case the nodes have to be recreated again when a value is set after it resulting in more memory use.
I can however see an advantage of it if there would be a function to iterate over all values the trie has with a callback procedure.
User avatar
idle
Always Here
Always Here
Posts: 5840
Joined: Fri Sep 21, 2007 5:52 am
Location: New Zealand

Re: bitwise trie

Post by idle »

No real reason but when I tried to set an exiting key to 0 it was resulting in invalid memory.
In the remove it's while b > 2 instead of while b

yes it gets IMA's the same on linux sometimes, it can be irritating!
There's no need for the interface, it's just habit.
Windows 11, Manjaro, Raspberry Pi OS
Image
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: bitwise trie

Post by wilbert »

I tried to convert the set procedure to ASM as well

Code: Select all

;bitwise Trie wilbert  

Structure TrieNode   ; 4 * 4 bytes
  n.l[4]
EndStructure

Structure Trie
  nodes.TrieNode[256] ; offset 0
  firstMem.l          ; offset 4096
  currentMem.l        ; offset 4100
  memCount.l          ; offset 4104
  freeNodes.l         ; offset 4108
  nextNode.l          ; offset 4112
EndStructure

Structure TrieMem
  m.TrieNode[1023]    ; offset 0
  nextMem.l           ; offset 16368
EndStructure

Procedure Trie_Clear(*trie.Trie)
  Protected *mem.TrieMem, *nextMem.TrieMem, idx.l 
  *mem = *trie\firstMem
  While *mem
    *nextMem = *mem\nextMem
    FreeMemory(*mem)
    *mem = *nextMem
  Wend
  FillMemory(*trie, 4096)
  *trie\firstMem = AllocateMemory(SizeOf(TrieMem))
  *trie\currentMem = *trie\firstMem
  *trie\memCount = 1
  *trie\freeNodes = 1023
  *trie\nextNode = *trie\currentMem
EndProcedure

Procedure Trie_Free(*trie.Trie)
  Protected *mem.TrieMem, *nextMem.TrieMem, idx.l 
  *mem = *trie\firstMem
  While *mem
    *nextMem = *mem\nextMem
     FreeMemory(*mem)
    *mem = *nextMem
  Wend
  FreeMemory(*trie) 
  *trie = 0
EndProcedure

Procedure.l Trie_New()
  Protected *trie.Trie
  *trie = AllocateMemory(SizeOf(Trie))
  Trie_Clear(*trie)
  ProcedureReturn *trie
EndProcedure

Procedure.s Trie_GetMemSize(*trie.Trie) 
  Protected sz.s,size.i 
  size = SizeOf(Trie) + *trie\memCount * SizeOf(TrieMem)
  If size < 1024 
    sz = Str(size) + " bytes" 
  ElseIf size < 1048576
    sz = Str(size/1024) + " kb" 
  Else 
    sz = Str(size/1048576) + " mb" 
  EndIf 
  ProcedureReturn sz
EndProcedure 

Procedure Trie_SetValue(*trie.Trie, key.l, value.l)
  EnableASM
  MOV edx, *trie
  MOV ecx, key
  !push ebx
  !push esi
  !rol ecx, 12
  !mov esi, ecx
  !and esi, 0xffc
  !add esi, edx
  !mov bl, 11
  !trie_set_loop:
  !mov eax, [esi]
  !and eax, eax                 ; check for zero pointer
  !jnz trie_set_cont
  ; ** allocate **
  !mov eax, [edx + 4112]        ; eax = *trie\nextNode
  !mov [esi], eax
  !add dword [edx + 4112], 16   ; *trie\nextNode + 16
  !dec dword [edx + 4108]       ; *trie\freeNodes - 1
  !jnz trie_set_cont
  !push ecx
  !push edx
CompilerIf #PB_Compiler_OS = #PB_OS_Windows
  !push dword 16372
  !call _PB_AllocateMemory@4
CompilerElse
  !sub esp, 4
  !push dword 16372
  !call _PB_AllocateMemory
  !add esp, 8
CompilerEndIf
  !pop edx
  !mov ecx, [edx + 4100]        ; ecx = *trie\currentMem
  !mov [ecx + 16368], eax       ; set nextMem
  !mov [edx + 4100], eax        ; set currentMem
  !mov [edx + 4112], eax        ; set nextNode
  !inc dword [edx + 4104]       ; memCount + 1
  !mov dword [edx + 4108], 1023 ; freeNodes = 1023
  !pop ecx
  !mov eax, [esi]
  ; ** continue **
  !trie_set_cont:
  !rol ecx, 2
  !mov esi, ecx
  !and esi, 0xc
  !add esi, eax
  !dec bl
  !jnz trie_set_loop
  !mov eax, esi
  !pop esi
  !pop ebx
  MOV edx, value
  MOV [eax], edx
  DisableASM
EndProcedure

Procedure.l Trie_GetValue(*trie.Trie, key.l)
  EnableASM
  MOV edx, *trie
  MOV ecx, key
  !push esi
  !rol ecx, 12
  !mov esi, ecx
  !and esi, 0xffc
  !add esi, edx
  !mov dl, 12
  !trie_get_loop:
  !mov eax, [esi]
  !and eax, eax       ; check for zero pointer
  !jz trie_get_exit
  !rol ecx, 2         ; next 2 bits
  !mov esi, ecx
  !and esi, 0xc
  !add esi, eax
  !dec dl
  !jnz trie_get_loop
  !trie_get_exit:
  !pop esi
  DisableASM
  ProcedureReturn
EndProcedure

Procedure.l Trie_Remove(*trie.Trie, key.l)
  Protected *node.TrieNode, *mem.TrieMem, *ptr, idx.l, b.l 
  *node = *trie\nodes[key >> 24 & $ff]
  b = 24
  While b > 2
    b - 2
    idx = key >> b & 3
    *ptr = *node\n[idx]
    If *ptr
      *node = *ptr
      *lnode = *ptr  
    Else
      Break
    EndIf
  Wend
  If *node\n[idx] 
     *node\n[idx] = 0
  EndIf  
EndProcedure

Global *mt.Trie = Trie_New()

Global size = 1024*1024 
Global NewMap mp(size)  

Global *mt.Trie = Trie_New()

start = ElapsedMilliseconds()
For a = 0 To size
Trie_SetValue(*mt,a,size-a) 
Next 

stop = ElapsedMilliseconds() 
Trie_Add = stop-start 

start = ElapsedMilliseconds()
For a = 0 To size
  mp(Str(a))=size-a; this line results in a crash on OS X when compiling without debug
Next 
stop = ElapsedMilliseconds() 
Map_add = Stop-start  

start = ElapsedMilliseconds()
For a = 0 To size
  x =Trie_GetValue(*mt,a)
Next 

stop = ElapsedMilliseconds()
Trie_time = stop-start 

start = ElapsedMilliseconds()
For a = 0 To size
  x = mp(Str(a))
Next 

stop = ElapsedMilliseconds()
map_time = stop-start 

MessageRequester("Times","Trie Add = " + Str(Trie_Add) + " Trie Look = "+ Str(Trie_time)+#CRLF$+" Map Add= "+ Str(Map_add) + " Map look= " + Str(map_time))

mem.s = Trie_GetMemSize(*mt) 
MessageRequester("",mem) 

Trie_Free(*mt)
The memory allocation uses a CompilerIf .
I don't know how it should look for Linux.

It's hard to do a good speed comparison with a Map.
The Str function to produce the key already takes up a lot of time. On the other hand, the map doesn't have to allocate a lot of memory.
A NewMap(1024 * 1024) allocates about 4M in advance if I saw it correctly.
User avatar
blueb
Addict
Addict
Posts: 1111
Joined: Sat Apr 26, 2003 2:15 pm
Location: Cuernavaca, Mexico

Re: bitwise trie

Post by blueb »

I was checking out the value of memory after Trie_Free(*mt)
with:

Code: Select all

mem2.s = Trie_GetMemSize(*mt)
MessageRequester("",mem2)
It still displays 5megs of space used. :?:
- It was too lonely at the top.

System : PB 6.21(x64) and Win 11 Pro (x64)
Hardware: AMD Ryzen 9 5900X w/64 gigs Ram, AMD RX 6950 XT Graphics w/16gigs Mem
wilbert
PureBasic Expert
PureBasic Expert
Posts: 3942
Joined: Sun Aug 08, 2004 5:21 am
Location: Netherlands

Re: bitwise trie

Post by wilbert »

You're not supposed to use the trie after it has been freed.
I was assuming the last line *trie = 0 in the Trie_Free procedure Idle created would clear the pointer so it couldn't be used anymore but it doesn't seem to do that.

If you want to keep using *mt, use Trie_Clear(*mt) instead of Trie_Free(*mt).
That keeps the trie itself and one block of allocated memory but clears everything else. After a Trie_Clear(*mt) the size should be down to about 20 kb.
Post Reply