More Hashtables
Posted: Tue Dec 18, 2007 11:51 am
Here is my adaptation of hash tables
thanks for the members for the help, suggestions and ideas
collisions are handled...but no provision for reading data is given; I normally make sure that my key is unique, if this is not your case....then you will have to adapt the code for you....sorry!
thanks for the members for the help, suggestions and ideas
collisions are handled...but no provision for reading data is given; I normally make sure that my key is unique, if this is not your case....then you will have to adapt the code for you....sorry!
Code: Select all
;
; Amalgamated Hash with included garbage collector
; Original code ported from C
; Adapted locking mechanism from srod's hash example sources
; Adapted using CRC32 for hashing
; also added linkedlist capability for storing info (I suspect I stole that idea from srod too..thanks/sorry srod)
;
; Distributed freely
;
; T Agius
; Nov/2007
;
; #PROGTYPE control server/normal usage. Server implies multi-threading + more ram, so locks are performed
;
;Usage:
;
; InitGC() to init the hash & garbage collector
; HashAdd() -> Add key + data
; Hashget() -> get data using key
; HashUpdate() -> Update data in a given hash (works only for DIRECT
;
; General Notes:
; There are 3 methods for storing data depends of your situation
; 1) using flag #HF_DIRECT
; data will be stored in hash, and you are responsible for data management (eg a ptr to data)
; 2) using flag #HF_LINKEDLIST
; data will be added as needed in a single linkedlist (ei hash will be an index for your linkedlist)
; structure for LL needs to be fixed in size...no pointers
; 3) If no flag is used, data will be allocated using the internal garbage collector. Structure has to be
; fixed in size (ie no pointers!) but no need to save in 4 byte boundaries
;
; From my testing with 3) I save about 6% of memory on really large datasets (300k+ records) and works out to be the fastest
; The advantage of 2) is that you do not need to traverse the full data to find your records blindly. But then again, hashes are
; an indexing method, so in many cases traversing the records is not a usual thing.
; As seen from example provided, you can use the same hash for different datasets, just modify/add bit flags as needed. In example I
; used #HF_DIR and #HF_FILE you can change them to what you need
;
; If you need help, just let me know and I will see how to explain better (yes I suck at DOCS too)
;
#PROGTYPE = 0
; =====================================================================================
;- Defintions
#HASH_FILEBUFFER = 32766
#HASH_MINIMUM = 262128
#FFX_CLIENTMEMORY = 18000000
#FFX_SERVERMEMORY = 32000000
#FFX_MEMADD_CHUNK = 2000000
#HASH_SERVER_SIZE = 8820000
#HASH_NORMAL_SIZE = 1280000
#HASH_MIN_SIZE = 400000
#HF_BASIC = %00000000
#HF_NEW = %00000001
#HF_DIR = %00000010 ;<- ADAPT as needed
#HF_FILE = %00000100 ;<- ADAPT as needed
#HF_USED = %00001000
#HF_CHANGED = %00010000
#HF_LINKEDLIST = %00100000
#HF_FOUND = %01000000
#HF_DIRECT = %10000000
CompilerIf #PROGTYPE = 0
#FFX_MEMORY = #FFX_CLIENTMEMORY
#HASH_SIZE = #HASH_NORMAL_SIZE
CompilerElse
#FFX_MEMORY = #FFX_SERVERMEMORY
#HASH_SIZE = #HASH_SERVER_SIZE
CompilerEndIf
; =====================================================================================
;- Macros
Macro ReserveMemory( Size )
gMemPtr = *gMemBuffer + gMemPos
gMemOldPos = gMemPos
gMemPos + Size
While gMemPos > gMemMax
gMemMax + #FFX_MEMADD_CHUNK
*gMemBuffer = ReAllocateMemory( *gMemBuffer, gMemMax )
If Not *gMemBuffer
ProcedureReturn -1
EndIf
Wend
gMemPtr = *gMemBuffer + gMemOldPos
EndMacro
Macro CopyToBuffer( mydata, Size )
ReserveMemory( Size )
CopyMemory( mydata, gMemPtr, Size )
EndMacro
Macro LockHash()
CompilerIf #PROGTYPE = 1
LockMutex ( _HTmutex )
CompilerEndIf
EndMacro
Macro UnlockHash()
CompilerIf #PROGTYPE = 1
UnlockMutex ( _HTmutex )
CompilerEndIf
EndMacro
Macro ResetMemoryPtr( ptr )
gMemPos = ptr - *gMemBuffer
gMemPtr = *gMemBuffer + gMemPos
EndMacro
; =====================================================================================
;- Structures
Structure stHash
key.s
bFlag.w
ptr.l
EndStructure
Structure stUser
FileName.s{255}
Size.q
EndStructure
Structure stHashData
hash.l
user.stUser
EndStructure
; =====================================================================================
;- Global Variables
Global NewList llUser.stHashData()
Global _LLMutex = CreateMutex()
Global gszBuild.s = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
; Hashes
Global Dim gHash.stHash( #HASH_SIZE + 2 )
Global gFileBuffer.l
Global gMaxHashes.l
Global gLastHash.l
Global _HTmutex = CreateMutex()
; Garbage Collector
Global *gMemBuffer.l
Global gMemPos.l
Global gMemOldPos.l
Global gMemMax.l
Global gMemPtr.l
; =====================================================================================
;- Main Code
Procedure HashMake( InKey.l )
Protected hash.l
hash = CRC32Fingerprint( InKey, MemoryStringLength( InKey ) ) % gMaxHashes
If hash > 0
ProcedureReturn hash
EndIf
ProcedureReturn -hash
EndProcedure
Procedure HashAdd( key.l, Value.l, type.l = 0, Size.l = 0 )
Protected hash.l
Protected temp.s
hash = HashMake( key )
LockHash()
Repeat
If ( gHash ( hash )\bFlag & #HF_USED ) <> #HF_USED
gHash ( hash )\bFlag = type | #HF_USED
gHash ( hash )\key = PeekS ( key )
If ( type & #HF_DIRECT ) = #HF_DIRECT
gHash ( hash )\ptr = Value
ElseIf ( type & #HF_LINKEDLIST ) = #HF_LINKEDLIST
AddElement ( llUser() )
llUser()\hash = hash
gHash ( hash )\ptr = @llUser()
CopyMemory ( Value, @llUser()\user, Size )
Else
CopyToBuffer ( Value, Size )
gHash ( hash )\ptr = gMemPtr
EndIf
UnlockHash()
ProcedureReturn hash
Else
hash = ( hash + 1 ) % gMaxHashes
EndIf
ForEver
EndProcedure
Procedure HashUpdate( hash.l, Value.l, type.l = 0, Size.l = 0 )
gHash ( hash )\bFlag | type
CopyMemory ( Value, gHash ( hash )\ptr, Size )
EndProcedure
Procedure HashGet( key.l, type.l )
Protected hash.l
hash = HashMake( key )
Repeat
If ( gHash ( hash )\bFlag & #HF_USED ) <> #HF_USED
ProcedureReturn -1
ElseIf ( gHash ( hash )\bFlag & type ) = type
If CompareMemoryString ( key, @gHash ( hash )\key ) = 0
gLastHash = hash
ProcedureReturn gHash ( hash )\ptr
EndIf
EndIf
hash = ( hash + 1 ) % gMaxHashes
ForEver
EndProcedure
Procedure InitGC()
gMaxHashes = #HASH_SIZE
gMemMax = #FFX_MEMORY
*gMemBuffer = AllocateMemory( #FFX_CLIENTMEMORY )
If *gMemBuffer = 0
ProcedureReturn -1
EndIf
EndProcedure
; =====================================================================================
;- Test Code
Declare TestHash( dir.s )
Declare ListFiles( root.s )
Define.s dir.s
OpenConsole()
dir = ProgramParameter()
If dir = ""
dir = "C:\Program Files\PureBasic"
EndIf
TestHash( dir )
End
Procedure TestHash( dir.s )
Protected i.l
Protected tst.w
Protected vt.stUser
InitGC()
ListFiles( dir )
;First we list the dirs and their dates
tst = #HF_NEW | #HF_DIR | #HF_DIRECT
For i = 0 To gMaxHashes
If gHash( i )\bFlag & tst = tst
PrintN ( gHash( i )\key + FormatDate(" - %yyyy-%mm-%dd", gHash( i )\ptr) )
EndIf
Next
; Now we list the files
ForEach llUser()
;PrintN ( gHash( llUser()\hash )\key )
PrintN ( llUser()\user\FileName + " - [ " + StrQ(llUser()\user\Size) + " ]" )
Next
EndProcedure
Procedure ListFiles( root.s )
Protected temp.s
Protected FileName.s
Protected df.l
Protected ddate.l
Protected skip.l
Protected Flag.l
Protected vt.stUser
df = ExamineDirectory ( #PB_Any, root, "" )
If df > 0
While NextDirectoryEntry ( df )
If DirectoryEntryType ( df ) = #PB_DirectoryEntry_File
temp = DirectoryEntryName ( df )
FileName = GetFilePart( temp )
CopyMemory( @FileName, @vt\FileName, Len(FileName) + 1 )
temp = root + "\" + temp
vt\Size = FileSize( temp )
HashAdd ( @temp, @vt, #HF_FILE | #HF_LINKEDLIST | #HF_NEW, SizeOf ( stUser ) )
Else
temp = DirectoryEntryName ( df )
If temp <> "." And temp <> ".."
ddate = DirectoryEntryDate ( df, #PB_Date_Created )
temp = root + "\" + temp
HashAdd ( @temp, ddate, #HF_DIR | #HF_NEW | #HF_DIRECT )
ListFiles( temp )
EndIf
EndIf
Wend
EndIf
EndProcedure