thanks for the members for the help, suggestions and ideas
collisions are handled...but no provision for reading data is given; I normally make sure that my key is unique, if this is not your case....then you will have to adapt the code for you....sorry!
Code: Select all
;
; Amalgamated Hash with included garbage collector
; Original code ported from C
; Adapted locking mechanism from srod's hash example sources
; Adapted using CRC32 for hashing
; also added linkedlist capability for storing info (I suspect I stole that idea from srod too..thanks/sorry srod)
;
; Distributed freely
;
; T Agius
; Nov/2007
;
; #PROGTYPE control server/normal usage. Server implies multi-threading + more ram, so locks are performed
;
;Usage:
;
; InitGC() to init the hash & garbage collector
; HashAdd() -> Add key + data
; Hashget() -> get data using key
; HashUpdate() -> Update data in a given hash (works only for DIRECT
;
; General Notes:
; There are 3 methods for storing data depends of your situation
; 1) using flag #HF_DIRECT
; data will be stored in hash, and you are responsible for data management (eg a ptr to data)
; 2) using flag #HF_LINKEDLIST
; data will be added as needed in a single linkedlist (ei hash will be an index for your linkedlist)
; structure for LL needs to be fixed in size...no pointers
; 3) If no flag is used, data will be allocated using the internal garbage collector. Structure has to be
; fixed in size (ie no pointers!) but no need to save in 4 byte boundaries
;
; From my testing with 3) I save about 6% of memory on really large datasets (300k+ records) and works out to be the fastest
; The advantage of 2) is that you do not need to traverse the full data to find your records blindly. But then again, hashes are
; an indexing method, so in many cases traversing the records is not a usual thing.
; As seen from example provided, you can use the same hash for different datasets, just modify/add bit flags as needed. In example I
; used #HF_DIR and #HF_FILE you can change them to what you need
;
; If you need help, just let me know and I will see how to explain better (yes I suck at DOCS too)
;
#PROGTYPE = 0
; =====================================================================================
;- Defintions
#HASH_FILEBUFFER = 32766
#HASH_MINIMUM = 262128
#FFX_CLIENTMEMORY = 18000000
#FFX_SERVERMEMORY = 32000000
#FFX_MEMADD_CHUNK = 2000000
#HASH_SERVER_SIZE = 8820000
#HASH_NORMAL_SIZE = 1280000
#HASH_MIN_SIZE = 400000
#HF_BASIC = %00000000
#HF_NEW = %00000001
#HF_DIR = %00000010 ;<- ADAPT as needed
#HF_FILE = %00000100 ;<- ADAPT as needed
#HF_USED = %00001000
#HF_CHANGED = %00010000
#HF_LINKEDLIST = %00100000
#HF_FOUND = %01000000
#HF_DIRECT = %10000000
CompilerIf #PROGTYPE = 0
#FFX_MEMORY = #FFX_CLIENTMEMORY
#HASH_SIZE = #HASH_NORMAL_SIZE
CompilerElse
#FFX_MEMORY = #FFX_SERVERMEMORY
#HASH_SIZE = #HASH_SERVER_SIZE
CompilerEndIf
; =====================================================================================
;- Macros
Macro ReserveMemory( Size )
gMemPtr = *gMemBuffer + gMemPos
gMemOldPos = gMemPos
gMemPos + Size
While gMemPos > gMemMax
gMemMax + #FFX_MEMADD_CHUNK
*gMemBuffer = ReAllocateMemory( *gMemBuffer, gMemMax )
If Not *gMemBuffer
ProcedureReturn -1
EndIf
Wend
gMemPtr = *gMemBuffer + gMemOldPos
EndMacro
Macro CopyToBuffer( mydata, Size )
ReserveMemory( Size )
CopyMemory( mydata, gMemPtr, Size )
EndMacro
Macro LockHash()
CompilerIf #PROGTYPE = 1
LockMutex ( _HTmutex )
CompilerEndIf
EndMacro
Macro UnlockHash()
CompilerIf #PROGTYPE = 1
UnlockMutex ( _HTmutex )
CompilerEndIf
EndMacro
Macro ResetMemoryPtr( ptr )
gMemPos = ptr - *gMemBuffer
gMemPtr = *gMemBuffer + gMemPos
EndMacro
; =====================================================================================
;- Structures
Structure stHash
key.s
bFlag.w
ptr.l
EndStructure
Structure stUser
FileName.s{255}
Size.q
EndStructure
Structure stHashData
hash.l
user.stUser
EndStructure
; =====================================================================================
;- Global Variables
Global NewList llUser.stHashData()
Global _LLMutex = CreateMutex()
Global gszBuild.s = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
; Hashes
Global Dim gHash.stHash( #HASH_SIZE + 2 )
Global gFileBuffer.l
Global gMaxHashes.l
Global gLastHash.l
Global _HTmutex = CreateMutex()
; Garbage Collector
Global *gMemBuffer.l
Global gMemPos.l
Global gMemOldPos.l
Global gMemMax.l
Global gMemPtr.l
; =====================================================================================
;- Main Code
Procedure HashMake( InKey.l )
Protected hash.l
hash = CRC32Fingerprint( InKey, MemoryStringLength( InKey ) ) % gMaxHashes
If hash > 0
ProcedureReturn hash
EndIf
ProcedureReturn -hash
EndProcedure
Procedure HashAdd( key.l, Value.l, type.l = 0, Size.l = 0 )
Protected hash.l
Protected temp.s
hash = HashMake( key )
LockHash()
Repeat
If ( gHash ( hash )\bFlag & #HF_USED ) <> #HF_USED
gHash ( hash )\bFlag = type | #HF_USED
gHash ( hash )\key = PeekS ( key )
If ( type & #HF_DIRECT ) = #HF_DIRECT
gHash ( hash )\ptr = Value
ElseIf ( type & #HF_LINKEDLIST ) = #HF_LINKEDLIST
AddElement ( llUser() )
llUser()\hash = hash
gHash ( hash )\ptr = @llUser()
CopyMemory ( Value, @llUser()\user, Size )
Else
CopyToBuffer ( Value, Size )
gHash ( hash )\ptr = gMemPtr
EndIf
UnlockHash()
ProcedureReturn hash
Else
hash = ( hash + 1 ) % gMaxHashes
EndIf
ForEver
EndProcedure
Procedure HashUpdate( hash.l, Value.l, type.l = 0, Size.l = 0 )
gHash ( hash )\bFlag | type
CopyMemory ( Value, gHash ( hash )\ptr, Size )
EndProcedure
Procedure HashGet( key.l, type.l )
Protected hash.l
hash = HashMake( key )
Repeat
If ( gHash ( hash )\bFlag & #HF_USED ) <> #HF_USED
ProcedureReturn -1
ElseIf ( gHash ( hash )\bFlag & type ) = type
If CompareMemoryString ( key, @gHash ( hash )\key ) = 0
gLastHash = hash
ProcedureReturn gHash ( hash )\ptr
EndIf
EndIf
hash = ( hash + 1 ) % gMaxHashes
ForEver
EndProcedure
Procedure InitGC()
gMaxHashes = #HASH_SIZE
gMemMax = #FFX_MEMORY
*gMemBuffer = AllocateMemory( #FFX_CLIENTMEMORY )
If *gMemBuffer = 0
ProcedureReturn -1
EndIf
EndProcedure
; =====================================================================================
;- Test Code
Declare TestHash( dir.s )
Declare ListFiles( root.s )
Define.s dir.s
OpenConsole()
dir = ProgramParameter()
If dir = ""
dir = "C:\Program Files\PureBasic"
EndIf
TestHash( dir )
End
Procedure TestHash( dir.s )
Protected i.l
Protected tst.w
Protected vt.stUser
InitGC()
ListFiles( dir )
;First we list the dirs and their dates
tst = #HF_NEW | #HF_DIR | #HF_DIRECT
For i = 0 To gMaxHashes
If gHash( i )\bFlag & tst = tst
PrintN ( gHash( i )\key + FormatDate(" - %yyyy-%mm-%dd", gHash( i )\ptr) )
EndIf
Next
; Now we list the files
ForEach llUser()
;PrintN ( gHash( llUser()\hash )\key )
PrintN ( llUser()\user\FileName + " - [ " + StrQ(llUser()\user\Size) + " ]" )
Next
EndProcedure
Procedure ListFiles( root.s )
Protected temp.s
Protected FileName.s
Protected df.l
Protected ddate.l
Protected skip.l
Protected Flag.l
Protected vt.stUser
df = ExamineDirectory ( #PB_Any, root, "" )
If df > 0
While NextDirectoryEntry ( df )
If DirectoryEntryType ( df ) = #PB_DirectoryEntry_File
temp = DirectoryEntryName ( df )
FileName = GetFilePart( temp )
CopyMemory( @FileName, @vt\FileName, Len(FileName) + 1 )
temp = root + "\" + temp
vt\Size = FileSize( temp )
HashAdd ( @temp, @vt, #HF_FILE | #HF_LINKEDLIST | #HF_NEW, SizeOf ( stUser ) )
Else
temp = DirectoryEntryName ( df )
If temp <> "." And temp <> ".."
ddate = DirectoryEntryDate ( df, #PB_Date_Created )
temp = root + "\" + temp
HashAdd ( @temp, ddate, #HF_DIR | #HF_NEW | #HF_DIRECT )
ListFiles( temp )
EndIf
EndIf
Wend
EndIf
EndProcedure