Inspired by this thread: http://www.purebasic.fr/english/viewtop ... 13&t=67525
Here is an implementation of a simple key-value pairs structure, similar maps, where keys are string and values are always integer.
Compared to PB native map, this simple structure Dict offers readily sorted Keys and the implemetation seems faster that PB maps in most cases, keys don't need to have uniform casing
I'd like to hear your feedback about possible bugs and speed test ... thanks
Code: Select all
; a Dict simulates a simple map() where key = string and value is integer
; a Dict is readily sorted
DeclareModule Dict
Declare.i New(Flag.i = #PB_String_NoCase, ReturnWhenNotFound.i = 0)
Declare.i AddKeyValue(Dict.i, Key.s, Value.i = 0) ; used to add/update a (key, value)
Declare.i ValueOfKey(Dict.i, Key.s)
Declare.i GetKeys(Dict.i, Array Ret.s(1))
Declare.i GetValues(Dict.i, Array Ret.i(1))
Declare.i DictSize(Dict.i) ; works like ArraySize(): 0 .. last indexed element
Declare.i ValueAtIndex(Dict.i, Index.i)
Declare.s KeyAtIndex(Dict.i, Index.i)
Declare.i Clear(Dict.i)
EndDeclareModule
Module Dict
EnableExplicit
#ChunkSize = 512*2
Structure TKeyValue
S.s
I.i
R.i ; addition rank, we keep the last one
EndStructure
Structure TDict
Array Items.TKeyValue(0)
Flag.i
Size.i ; actual size <= ArraySize(Items())
IsSorted.i
Missing.i
EndStructure
Global NewList Dicts.TDict()
Procedure.i New(Flag.i = #PB_String_NoCase, ReturnWhenNotFound.i = 0)
If ListSize(Dicts()) > 0 : LastElement(Dicts()) : EndIf
If AddElement(Dicts())
If Flag <> #PB_String_NoCase : Flag = #PB_String_CaseSensitive : EndIf
Dicts()\Flag = Flag
Dicts()\Missing = ReturnWhenNotFound
Dicts()\Size = -1
ProcedureReturn ListIndex(Dicts())+1
EndIf
ProcedureReturn 0
EndProcedure
Procedure.i AddKeyValue(Dict.i, Key.s, Value.i = 0)
Protected *D.TDict, n
Static add_rank.i
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
n = ArraySize(*D\Items())
If (*D\Size+1) > n
ReDim *D\Items(n+#ChunkSize)
If ArraySize(*D\Items()) < 0 : ProcedureReturn 0 : EndIf
EndIf
add_rank + 1
*D\Size = *D\Size + 1
*D\Items(*D\Size)\S = Key
*D\Items(*D\Size)\I = Value
*D\Items(*D\Size)\R = add_rank
*D\IsSorted = #False
EndIf
EndProcedure
Procedure.i sort_dict(*D.TDict)
Protected i,n, j, flg, cur.TKeyValue, Dim tmp.TKeyValue(0)
If Not *D\IsSorted
flg = #PB_Sort_Ascending
If *D\Flag = #PB_String_NoCase : flg | #PB_Sort_NoCase : EndIf
SortStructuredArray(*D\Items(), flg, OffsetOf(TKeyValue\S), TypeOf(TKeyValue\S), 0, *D\Size)
; reduce
cur = *D\Items(0)
n = *D\Size
Dim tmp(n):j=-1
For i=1 To n
If CompareMemoryString(@*D\Items(i)\S, @cur\S, *D\Flag) = 0
If *D\Items(i)\R > cur\R : cur = *D\Items(i) : EndIf
Else
j+1 : tmp(j) = cur
cur = *D\Items(i)
EndIf
Next
j+1 : tmp(j) = cur
ReDim tmp(j)
CopyArray(tmp(), *D\Items())
*D\Size = ArraySize(*D\Items())
*D\IsSorted = #True
EndIf
EndProcedure
Procedure.i ValueOfKey(Dict.i, Key.s)
Protected *D.TDict, i,n,r, j,s,e, flg, cur.TKeyValue, Dim tmp.TKeyValue(0)
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
sort_dict(*D)
s = 0
e = *D\Size
While 1
If s > e : Break : EndIf
j = s + (e-s)/2
r = CompareMemoryString(@*D\Items(j)\S, @Key, *D\Flag)
If r = 0
ProcedureReturn *D\Items(j)\I
ElseIf r < 0
s = j+1
Else
e = j-1
EndIf
;Debug j
Wend
ProcedureReturn *D\Missing
EndIf
EndProcedure
Procedure.i GetKeys(Dict.i, Array Ret.s(1))
Protected *D.TDict,i,n
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
sort_dict(*D)
Dim Ret(*D\Size)
For i=0 To *D\Size
Ret(i) = *D\Items(i)\S
Next
EndIf
EndProcedure
Procedure.i GetValues(Dict.i, Array Ret.i(1))
Protected *D.TDict,i,n
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
sort_dict(*D)
Dim Ret(*D\Size)
For i=0 To *D\Size
Ret(i) = *D\Items(i)\I
Next
EndIf
EndProcedure
Procedure.i DictSize(Dict.i)
Protected *D.TDict
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
ProcedureReturn *D\Size
EndIf
EndProcedure
Procedure.i ValueAtIndex(Dict.i, Index.i)
Protected *D.TDict
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
If Index >= 0 And Index <= *D\Size
ProcedureReturn *D\Items(Index)\I
EndIf
ProcedureReturn *D\Missing
EndIf
EndProcedure
Procedure.s KeyAtIndex(Dict.i, Index.i)
Protected *D.TDict
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
If Index >= 0 And Index <= *D\Size
ProcedureReturn *D\Items(Index)\S
EndIf
ProcedureReturn ""
EndIf
EndProcedure
Procedure.i Clear(Dict.i)
Protected *D.TDict
If Dict > 0 And SelectElement(Dicts(), Dict-1)
*D = @Dicts()
Dim *D\Items(0)
Dicts()\Size = -1
EndIf
EndProcedure
EndModule
;---- TEST -----
CompilerIf #PB_Compiler_IsMainFile
Global dic, i, n
dic = Dict::New()
Dict::AddKeyValue(dic, "Lebanon", 14)
Dict::AddKeyValue(dic, "France", 26)
Dict::AddKeyValue(dic, "UAE", 37)
Dict::AddKeyValue(dic, "Italy", 48)
Dict::AddKeyValue(dic, "Germany", 53)
Dict::AddKeyValue(dic, "USA", 67)
i = Dict::ValueOfKey(dic, "GERMANY")
If i
Debug "GERMANY is presnet and its value is " + Str(i)
EndIf
; browsing thru
For i=0 To Dict::DictSize(dic)
Debug "key at index " + Str(i) + " = " + Dict::KeyAtIndex(dic, i)
Next
; getting all values
Global Dim k.s(0), Dim v.i(0)
Dict::GetKeys(dic, k())
For i=0 To ArraySize(k())
Debug "key at index " + Str(i) + " = " + k(i)
Next
Dict::GetValues(dic, v())
For i=0 To ArraySize(v())
Debug "item at index " + Str(i) + " has a value " + v(i)
Next
; clearing content
Dict::Clear(dic)
Debug "Dict size = " + Str( Dict::DictSize(dic) )
For i=1 To 100000
Dict::AddKeyValue(dic, Str(i), i)
Next
Debug "Dict size = " + Str( Dict::DictSize(dic) )
CompilerEndIf
CompilerIf #PB_Compiler_IsMainFile And #False
;;DisableDebugger
; if you think about increasing the nbr of slots for the map
; then think about increasing #ChunkSize in the module (though not sure if they are comparable!)
Global NewMap mp(512*2), dic, t, t1, t2, w, n = 1000000, m=1000
Global Dim a.s(n), Dim b.s(m), Dim check(m)
For i=0 To n
a(i) = Chr( Random(255, 1) ) + Str( i )
Next
RandomizeArray(a())
For i=0 To m
If i%2
b(i) = a(Random(n))
Else
b(i) = "xxxx" + Random(55555)
EndIf
Next
t=ElapsedMilliseconds()
For i=0 To n
mp( a(i) ) = i
Next
For i=0 To m
If FindMapElement(mp(), b(i))
check(i) + 1
Else
check(i) - 1
EndIf
Next
t1 = ElapsedMilliseconds() - t
t=ElapsedMilliseconds()
dic = Dict::New()
For i=0 To n
Dict::AddKeyValue(dic, a(i), i)
Next
For i=0 To m
If Dict::ValueOfKey( dic, b(i) )
check(i) + 1
Else
check(i) - 1
EndIf
Next
t2 = ElapsedMilliseconds() - t
; checking if Dict is working fine!
; a value of -2 (not found in either) ... good
; a value of +2 (found in both) ... good
; otherwise => non-matching, something is wrong in Dict!!!
For i=0 To m
If -2 < check(i) And check(i) < 2 : w+1 : EndIf
Next
MessageRequester("timing", "PB map = " + Str(t1) + " ... dic = " + Str(t2) + " ... non-matching cases " + Str(w))
CompilerEndIf