diff options
-rw-r--r-- | common/hashmap.cpp | 7 | ||||
-rw-r--r-- | common/hashmap.h | 150 |
2 files changed, 96 insertions, 61 deletions
diff --git a/common/hashmap.cpp b/common/hashmap.cpp index 0fb03ec3f8..d8b84f61a5 100644 --- a/common/hashmap.cpp +++ b/common/hashmap.cpp @@ -58,6 +58,7 @@ uint hashit_lower(const char *p) { #ifdef DEBUG_HASH_COLLISIONS static double g_collisions = 0, + g_dummyHits = 0, g_lookups = 0, g_collPerLook = 0, g_capacity = 0, @@ -66,9 +67,10 @@ static int g_max_capacity = 0, g_max_size = 0; static int g_totalHashmaps = 0; static int g_stats[4] = {0,0,0,0}; -void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele) { +void updateHashCollisionStats(int collisions, int dummyHits, int lookups, int arrsize, int nele) { g_collisions += collisions; g_lookups += lookups; + g_dummyHits += dummyHits; if (lookups) g_collPerLook += (double)collisions / (double)lookups; g_capacity += arrsize; @@ -87,9 +89,10 @@ void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele g_max_capacity = MAX(g_max_capacity, arrsize); g_max_size = MAX(g_max_size, nele); - fprintf(stdout, "%d hashmaps: colls %.1f; lookups %.1f; ratio %.3f%%; size %f (max: %d); capacity %f (max: %d)\n", + fprintf(stdout, "%d hashmaps: colls %.1f; dummies hit %.1f, lookups %.1f; ratio %.3f%%; size %f (max: %d); capacity %f (max: %d)\n", g_totalHashmaps, g_collisions / g_totalHashmaps, + g_dummyHits / g_totalHashmaps, g_lookups / g_totalHashmaps, 100 * g_collPerLook / g_totalHashmaps, g_size / g_totalHashmaps, g_max_size, diff --git a/common/hashmap.h b/common/hashmap.h index 16de7cdf54..fa613782cf 100644 --- a/common/hashmap.h +++ b/common/hashmap.h @@ -24,8 +24,7 @@ */ // The hash map (associative array) implementation in this file is -// based on the PyDict implementation of CPython. The erase() method -// is based on example code in the Wikipedia article on Hash tables. +// based on the PyDict implementation of CPython. #ifndef COMMON_HASHMAP_H #define COMMON_HASHMAP_H @@ -72,7 +71,8 @@ public: struct Node { const Key _key; Val _value; - Node(const Key &key) : _key(key), _value() {} + explicit Node(const Key &key) : _key(key), _value() {} + Node() : _key(), _value() {} }; enum { @@ -98,12 +98,14 @@ public: } void freeNode(Node *node) { - _nodePool.deleteChunk(node); + if (node && node != &_dummyNode) + _nodePool.deleteChunk(node); } Node **_storage; // hashtable of size arrsize. uint _mask; /**< Capacity of the HashMap minus one; must be a power of two of minus one */ uint _size; + uint _deleted; ///< Number of deleted elements (_dummyNodes) HashFunc _hash; EqualFunc _equal; @@ -111,8 +113,11 @@ public: // Default value, returned by the const getVal. const Val _defaultVal; + // Dummy node, used as marker for erased objects. + Node _dummyNode; + #ifdef DEBUG_HASH_COLLISIONS - mutable int _collisions, _lookups; + mutable int _collisions, _lookups, _dummyHits; #endif void assign(const HM_t &map); @@ -269,7 +274,7 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() #ifdef __PLAYSTATION2__ { #else - : _defaultVal() { + : _defaultVal(), _dummyNode() { #endif _mask = HASHMAP_MIN_CAPACITY - 1; _storage = new Node *[HASHMAP_MIN_CAPACITY]; @@ -277,10 +282,12 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *)); _size = 0; + _deleted = 0; #ifdef DEBUG_HASH_COLLISIONS _collisions = 0; _lookups = 0; + _dummyHits = 0; #endif } @@ -291,7 +298,12 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() */ template<class Key, class Val, class HashFunc, class EqualFunc> HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) : - _defaultVal() { + _defaultVal(), _dummyNode() { +#ifdef DEBUG_HASH_COLLISIONS + _collisions = 0; + _lookups = 0; + _dummyHits = 0; +#endif assign(map); } @@ -301,13 +313,12 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) : template<class Key, class Val, class HashFunc, class EqualFunc> HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() { for (uint ctr = 0; ctr <= _mask; ++ctr) - if (_storage[ctr] != NULL) - freeNode(_storage[ctr]); + freeNode(_storage[ctr]); delete[] _storage; #ifdef DEBUG_HASH_COLLISIONS - extern void updateHashCollisionStats(int, int, int, int); - updateHashCollisionStats(_collisions, _lookups, _mask+1, _size); + extern void updateHashCollisionStats(int, int, int, int, int); + updateHashCollisionStats(_collisions, _dummyHits, _lookups, _mask+1, _size); #endif } @@ -327,8 +338,12 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) { // Simply clone the map given to us, one by one. _size = 0; + _deleted = 0; for (uint ctr = 0; ctr <= _mask; ++ctr) { - if (map._storage[ctr] != NULL) { + if (map._storage[ctr] == &map._dummyNode) { + _storage[ctr] = &_dummyNode; + _deleted++; + } else if (map._storage[ctr] != NULL) { _storage[ctr] = allocNode(map._storage[ctr]->_key); _storage[ctr]->_value = map._storage[ctr]->_value; _size++; @@ -336,16 +351,15 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) { } // Perform a sanity check (to help track down hashmap corruption) assert(_size == map._size); + assert(_deleted == map._deleted); } template<class Key, class Val, class HashFunc, class EqualFunc> void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) { for (uint ctr = 0; ctr <= _mask; ++ctr) { - if (_storage[ctr] != NULL) { - freeNode(_storage[ctr]); - _storage[ctr] = NULL; - } + freeNode(_storage[ctr]); + _storage[ctr] = NULL; } #ifdef USE_HASHMAP_MEMORY_POOL @@ -362,6 +376,7 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) { } _size = 0; + _deleted = 0; } template<class Key, class Val, class HashFunc, class EqualFunc> @@ -374,6 +389,7 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) { // allocate a new array _size = 0; + _deleted = 0; _mask = newCapacity - 1; _storage = new Node *[newCapacity]; assert(_storage != NULL); @@ -381,7 +397,7 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) { // rehash all the old elements for (uint ctr = 0; ctr <= old_mask; ++ctr) { - if (old_storage[ctr] == NULL) + if (old_storage[ctr] == NULL || old_storage[ctr] == &_dummyNode) continue; // Insert the element from the old table into the new table. @@ -390,7 +406,7 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) { // don't have to call _equal(). const uint hash = _hash(old_storage[ctr]->_key); uint idx = hash & _mask; - for (uint perturb = hash; _storage[idx] != NULL; perturb >>= HASHMAP_PERTURB_SHIFT) { + for (uint perturb = hash; _storage[idx] != NULL && _storage[idx] != &_dummyNode; perturb >>= HASHMAP_PERTURB_SHIFT) { idx = (5 * idx + perturb + 1) & _mask; } @@ -412,7 +428,13 @@ int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const { const uint hash = _hash(key); uint ctr = hash & _mask; for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { - if (_storage[ctr] == NULL || _equal(_storage[ctr]->_key, key)) + if (_storage[ctr] == NULL) + break; + if (_storage[ctr] == &_dummyNode) { +#ifdef DEBUG_HASH_COLLISIONS + _dummyHits++; +#endif + } else if (_equal(_storage[ctr]->_key, key)) break; ctr = (5 * ctr + perturb + 1) & _mask; @@ -424,8 +446,8 @@ int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const { #ifdef DEBUG_HASH_COLLISIONS _lookups++; - fprintf(stderr, "collisions %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n", - _collisions, _lookups, ((double) _collisions / (double)_lookups), + fprintf(stderr, "collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n", + _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups), (const void *)this, _mask+1, _size); #endif @@ -434,16 +456,54 @@ int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const { template<class Key, class Val, class HashFunc, class EqualFunc> int HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key &key) { - uint ctr = lookup(key); + const uint hash = _hash(key); + uint ctr = hash & _mask; + const uint NONE_FOUND = _mask + 1; + uint first_free = NONE_FOUND; + bool found = false; + for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { + if (_storage[ctr] == NULL) + break; + if (_storage[ctr] == &_dummyNode) { +#ifdef DEBUG_HASH_COLLISIONS + _dummyHits++; +#endif + if (first_free != _mask + 1) + first_free = ctr; + } else if (_equal(_storage[ctr]->_key, key)) { + found = true; + break; + } - if (_storage[ctr] == NULL) { + ctr = (5 * ctr + perturb + 1) & _mask; + +#ifdef DEBUG_HASH_COLLISIONS + _collisions++; +#endif + } + +#ifdef DEBUG_HASH_COLLISIONS + _lookups++; + fprintf(stderr, "collisions %d, dummies hit %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n", + _collisions, _dummyHits, _lookups, ((double) _collisions / (double)_lookups), + (const void *)this, _mask+1, _size); +#endif + + if (!found && first_free != _mask + 1) + ctr = first_free; + + if (!found) { + if (_storage[ctr]) + _deleted--; _storage[ctr] = allocNode(key); assert(_storage[ctr] != NULL); _size++; // Keep the load factor below a certain threshold. + // Deleted nodes are also counted uint capacity = _mask + 1; - if (_size * HASHMAP_LOADFACTOR_DENOMINATOR > capacity * HASHMAP_LOADFACTOR_NUMERATOR) { + if ((_size + _deleted) * HASHMAP_LOADFACTOR_DENOMINATOR > + capacity * HASHMAP_LOADFACTOR_NUMERATOR) { capacity = capacity < 500 ? (capacity * 4) : (capacity * 2); expandStorage(capacity); ctr = lookup(key); @@ -496,44 +556,16 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &v template<class Key, class Val, class HashFunc, class EqualFunc> void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) { - // This is based on code in the Wikipedia article on Hash tables. - - const uint hash = _hash(key); - uint i = hash & _mask; - uint perturb; - - for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { - if (_storage[i] == NULL || _equal(_storage[i]->_key, key)) - break; - i = (5 * i + perturb + 1) & _mask; - } + uint ctr = lookup(key); + if (_storage[ctr] == NULL) + return; - if (_storage[i] == NULL) - return; // key wasn't present, so no work has to be done - - // If we remove a key, we must check all subsequent keys and possibly - // reinsert them. - uint j = i; - freeNode(_storage[i]); - _storage[i] = NULL; - for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { - // Look at the next table slot - j = (5 * j + perturb + 1) & _mask; - // If the next slot is empty, we are done - if (_storage[j] == NULL) - break; - // Compute the slot where the content of the next slot should normally be, - // assuming an empty table, and check whether we have to move it. - uint k = _hash(_storage[j]->_key) & _mask; - if ((j > i && (k <= i || k > j)) || - (j < i && (k <= i && k > j)) ) { - _storage[i] = _storage[j]; - i = j; - } - } - _storage[i] = NULL; + // If we remove a key, we replace it with a dummy node. + freeNode(_storage[ctr]); + _storage[ctr] = &_dummyNode; _size--; + _deleted++; return; } |