PureBasic Forums - English

Posted: **Tue Dec 18, 2007 11:51 am**

Here is my adaptation of hash tables
thanks for the members for the help, suggestions and ideas
collisions are handled...but no provision for reading data is given; I normally make sure that my key is unique, if this is not your case....then you will have to adapt the code for you....sorry!

Code: Select all

;
; Amalgamated Hash with included garbage collector
; Original code ported from C
; Adapted locking mechanism from srod's hash example sources
; Adapted using CRC32 for hashing
; also added linkedlist capability for storing info (I suspect I stole that idea from srod too..thanks/sorry srod)
;
; Distributed freely
;
; T Agius
; Nov/2007

;
; #PROGTYPE control server/normal usage. Server implies multi-threading + more ram, so locks are performed
;
;Usage:
;
; InitGC() to init the hash & garbage collector
; HashAdd() -> Add key + data
; Hashget() -> get data using key
; HashUpdate() -> Update data in a given hash (works only for DIRECT
;
; General Notes:
; There are 3 methods for storing data depends of your situation
; 1) using flag #HF_DIRECT
;  data will be stored in hash, and you are responsible for data management (eg a ptr to data)
; 2) using flag #HF_LINKEDLIST
;  data will be added as needed in a single linkedlist (ei hash will be an index for your linkedlist)
;  structure for LL needs to be fixed in size...no pointers
; 3) If no flag is used, data will be allocated using the internal garbage collector. Structure has to be
;  fixed in size (ie no pointers!) but no need to save in 4 byte boundaries
; 
; From my testing with 3) I save about 6% of memory on really large datasets (300k+ records) and works out to be the fastest
; The advantage of 2) is that you do not need to traverse the full data to find your records blindly. But then again, hashes are
; an indexing method, so in many cases traversing the records is not a usual thing.
; As seen from example provided, you can use the same hash for different datasets, just modify/add bit flags as needed. In example I 
; used #HF_DIR and #HF_FILE you can change them to what you need
;
; If you need help, just let me know and I will see how to explain better (yes I suck at DOCS too)
;

#PROGTYPE = 0


; =====================================================================================
;- Defintions

#HASH_FILEBUFFER  = 32766
#HASH_MINIMUM     = 262128

#FFX_CLIENTMEMORY = 18000000
#FFX_SERVERMEMORY = 32000000
#FFX_MEMADD_CHUNK =  2000000

#HASH_SERVER_SIZE = 8820000
#HASH_NORMAL_SIZE = 1280000
#HASH_MIN_SIZE    = 400000

#HF_BASIC         = %00000000
#HF_NEW           = %00000001
#HF_DIR           = %00000010 ;<- ADAPT as needed
#HF_FILE          = %00000100 ;<- ADAPT as needed
#HF_USED          = %00001000
#HF_CHANGED       = %00010000
#HF_LINKEDLIST    = %00100000
#HF_FOUND         = %01000000
#HF_DIRECT        = %10000000

CompilerIf #PROGTYPE = 0
   #FFX_MEMORY = #FFX_CLIENTMEMORY
   #HASH_SIZE  = #HASH_NORMAL_SIZE
CompilerElse
   #FFX_MEMORY = #FFX_SERVERMEMORY
   #HASH_SIZE  = #HASH_SERVER_SIZE
CompilerEndIf

; =====================================================================================
;- Macros

Macro          ReserveMemory( Size )

   gMemPtr = *gMemBuffer + gMemPos
   gMemOldPos = gMemPos
   gMemPos + Size
   
   While gMemPos > gMemMax
      gMemMax + #FFX_MEMADD_CHUNK
      *gMemBuffer = ReAllocateMemory( *gMemBuffer, gMemMax )
       
      If Not *gMemBuffer
         ProcedureReturn -1
      EndIf
   Wend

   gMemPtr = *gMemBuffer + gMemOldPos

EndMacro

Macro          CopyToBuffer( mydata, Size )

   ReserveMemory( Size )
   CopyMemory( mydata, gMemPtr, Size )
   
EndMacro

Macro          LockHash()
   CompilerIf #PROGTYPE = 1
      LockMutex ( _HTmutex )
   CompilerEndIf
EndMacro

Macro          UnlockHash()
   CompilerIf #PROGTYPE = 1
      UnlockMutex ( _HTmutex )
   CompilerEndIf
EndMacro

Macro          ResetMemoryPtr( ptr )
   gMemPos = ptr - *gMemBuffer
   gMemPtr = *gMemBuffer + gMemPos
EndMacro


; =====================================================================================
;- Structures

Structure stHash
   key.s
   bFlag.w
   ptr.l
EndStructure

Structure      stUser
   FileName.s{255}
   Size.q
EndStructure

Structure      stHashData
   hash.l
   user.stUser
EndStructure


; =====================================================================================
;- Global Variables

Global NewList          llUser.stHashData()
Global                  _LLMutex = CreateMutex()
Global                  gszBuild.s   = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

; Hashes
Global Dim              gHash.stHash( #HASH_SIZE + 2 )

Global                  gFileBuffer.l
Global                  gMaxHashes.l
Global                  gLastHash.l
Global                  _HTmutex = CreateMutex()

; Garbage Collector
Global                  *gMemBuffer.l
Global                  gMemPos.l
Global                  gMemOldPos.l
Global                  gMemMax.l
Global                  gMemPtr.l

; =====================================================================================
;- Main Code


Procedure            HashMake( InKey.l )

   Protected         hash.l
   
   hash = CRC32Fingerprint( InKey, MemoryStringLength( InKey ) ) % gMaxHashes
   
   If hash > 0
      ProcedureReturn hash
   EndIf
   
   ProcedureReturn -hash

EndProcedure

Procedure            HashAdd( key.l, Value.l, type.l = 0, Size.l = 0 )

   Protected         hash.l
   Protected         temp.s
   
   hash = HashMake( key )
   LockHash()

   Repeat
      If ( gHash ( hash )\bFlag & #HF_USED ) <> #HF_USED
         gHash ( hash )\bFlag = type | #HF_USED
         gHash ( hash )\key = PeekS ( key )
         
         If ( type & #HF_DIRECT ) = #HF_DIRECT
            gHash ( hash )\ptr = Value
         ElseIf ( type & #HF_LINKEDLIST ) = #HF_LINKEDLIST
            AddElement ( llUser() )
            llUser()\hash = hash
            gHash ( hash )\ptr = @llUser()
            CopyMemory ( Value, @llUser()\user, Size )
         Else
            CopyToBuffer ( Value, Size )
            gHash ( hash )\ptr = gMemPtr
         EndIf
         
         UnlockHash()
         ProcedureReturn hash
      Else
         hash = ( hash + 1 ) % gMaxHashes
      EndIf
   ForEver

EndProcedure

Procedure            HashUpdate( hash.l, Value.l, type.l = 0, Size.l = 0 )

   gHash ( hash )\bFlag | type
   CopyMemory ( Value, gHash ( hash )\ptr, Size )

EndProcedure

Procedure            HashGet( key.l, type.l )

   Protected         hash.l
   
   hash = HashMake( key )
   
   Repeat

      If ( gHash ( hash )\bFlag & #HF_USED ) <> #HF_USED
         ProcedureReturn -1
      ElseIf ( gHash ( hash )\bFlag & type ) = type
         If CompareMemoryString ( key, @gHash ( hash )\key ) = 0
            gLastHash = hash
            ProcedureReturn gHash ( hash )\ptr
         EndIf
      EndIf

      hash = ( hash + 1 ) % gMaxHashes
      
   ForEver
   
EndProcedure

Procedure            InitGC()
   
   gMaxHashes = #HASH_SIZE
   gMemMax = #FFX_MEMORY
   *gMemBuffer = AllocateMemory( #FFX_CLIENTMEMORY )
   
   If *gMemBuffer = 0
      ProcedureReturn -1
   EndIf

EndProcedure

; =====================================================================================
;- Test Code

Declare           TestHash( dir.s )
Declare           ListFiles( root.s )

Define.s          dir.s
   
   OpenConsole()
   dir = ProgramParameter()
   If dir = ""
      dir = "C:\Program Files\PureBasic"
   EndIf
   
   TestHash( dir )
   End

Procedure         TestHash( dir.s )

   Protected      i.l
   Protected      tst.w
   Protected      vt.stUser

   InitGC()
   ListFiles( dir )
   ;First we list the dirs and their dates

   tst = #HF_NEW | #HF_DIR | #HF_DIRECT
   For i = 0 To gMaxHashes
      If gHash( i )\bFlag & tst = tst
         PrintN ( gHash( i )\key + FormatDate(" - %yyyy-%mm-%dd", gHash( i )\ptr) )
      EndIf
   Next
   
   ; Now we list the files
   ForEach llUser()
      ;PrintN ( gHash( llUser()\hash )\key )
      PrintN ( llUser()\user\FileName + " - [ " + StrQ(llUser()\user\Size) + " ]" )
   Next

EndProcedure

Procedure            ListFiles( root.s )

   Protected         temp.s
   Protected         FileName.s
   Protected         df.l
   Protected         ddate.l
   Protected         skip.l
   Protected         Flag.l
   Protected         vt.stUser
   
   df = ExamineDirectory ( #PB_Any, root, "" )

   If df > 0
      While NextDirectoryEntry ( df )
         If DirectoryEntryType ( df ) = #PB_DirectoryEntry_File
            temp = DirectoryEntryName ( df )
            FileName = GetFilePart( temp )
            CopyMemory( @FileName, @vt\FileName, Len(FileName) + 1 )
            temp = root + "\" + temp
            vt\Size = FileSize( temp )
            HashAdd ( @temp, @vt, #HF_FILE | #HF_LINKEDLIST | #HF_NEW, SizeOf ( stUser ) )
         Else
            temp = DirectoryEntryName ( df )
            If temp <> "." And temp <> ".."
               ddate = DirectoryEntryDate ( df, #PB_Date_Created )
               temp = root + "\" + temp
               HashAdd ( @temp, ddate, #HF_DIR | #HF_NEW | #HF_DIRECT )
               ListFiles( temp )
            EndIf
         EndIf
      Wend
   EndIf

EndProcedure

Posted: **Tue Dec 18, 2007 1:26 pm**

Thanks for your code!

It would be very nice, if can add a working example code showing the different (?) methods of using your hash table.
Thanks in advance!

Posted: **Tue Dec 18, 2007 4:25 pm**

there is already an example showing 2 methods.....
But OK, sicne you insist, I will add another example later

Posted: **Wed Dec 19, 2007 12:57 am**

Thanks for sharing.

PureBasic Forums - English

More Hashtables

More Hashtables