diff options
Diffstat (limited to 'common/hashmap.h')
-rw-r--r-- | common/hashmap.h | 165 |
1 files changed, 83 insertions, 82 deletions
diff --git a/common/hashmap.h b/common/hashmap.h index 980e03aa6e..81f5ee84b4 100644 --- a/common/hashmap.h +++ b/common/hashmap.h @@ -24,32 +24,8 @@ */ // The hash map (associative array) implementation in this file is -// based on code by Andrew Y. Ng, 1996: - -/* - * Copyright (c) 1998-2003 Massachusetts Institute of Technology. - * This code was developed as part of the Haystack research project - * (http://haystack.lcs.mit.edu/). Permission is hereby granted, - * free of charge, to any person obtaining a copy of this software - * and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, - * sublicense, and/or sell copies of the Software, and to permit - * persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +// based on the PyDict implementation of CPython. The erase() method +// is based on example code in the Wikipedia article on Hash tables. #ifndef COMMON_HASHMAP_H #define COMMON_HASHMAP_H @@ -74,11 +50,6 @@ namespace Common { -// The table sizes ideally are primes. We use a helper function to find -// suitable table sizes. -uint nextTableSize(uint x); - - // Enable the following #define if you want to check how many collisions the // code produces (many collisions indicate either a bad hash function, or a // hash table that is too small). @@ -113,9 +84,24 @@ public: Node(const Key &key) : _key(key), _value() {} }; + enum { + HASHMAP_PERTURB_SHIFT = 5, + HASHMAP_MIN_CAPACITY = 16, + + // The quotient of the next two constants controls how much the + // internal storage of the hashmap may fill up before being + // increased automatically. + // Note: the quotient of these two must be between and different + // from 0 and 1. + HASHMAP_LOADFACTOR_NUMERATOR = 2, + HASHMAP_LOADFACTOR_DENOMINATOR = 3, + + HASHMAP_MEMORYPOOL_SIZE = HASHMAP_MIN_CAPACITY * HASHMAP_LOADFACTOR_NUMERATOR / HASHMAP_LOADFACTOR_DENOMINATOR + }; + #ifdef USE_HASHMAP_MEMORY_POOL - MemoryPool _nodePool; + FixedSizeMemoryPool<sizeof(Node), HASHMAP_MEMORYPOOL_SIZE> _nodePool; Node *allocNode(const Key &key) { void* mem = _nodePool.malloc(); @@ -137,7 +123,7 @@ public: #endif Node **_storage; // hashtable of size arrsize. - uint _capacity; + uint _mask; /**< Capacity of the HashMap minus one; must be a power of two of minus one */ uint _size; HashFunc _hash; @@ -153,7 +139,7 @@ public: void assign(const HM_t &map); int lookup(const Key &key) const; int lookupAndCreateIfMissing(const Key &key); - void expand_array(uint newsize); + void expandStorage(uint newCapacity); template<class T> friend class IteratorImpl; @@ -175,7 +161,7 @@ public: NodeType *deref() const { assert(_hashmap != 0); - assert(_idx < _hashmap->_capacity); + assert(_idx <= _hashmap->_mask); Node *node = _hashmap->_storage[_idx]; assert(node != 0); return node; @@ -196,8 +182,8 @@ public: assert(_hashmap); do { _idx++; - } while (_idx < _hashmap->_capacity && _hashmap->_storage[_idx] == 0); - if (_idx >= _hashmap->_capacity) + } while (_idx <= _hashmap->_mask && _hashmap->_storage[_idx] == 0); + if (_idx > _hashmap->_mask) _idx = (uint)-1; return *this; @@ -247,7 +233,7 @@ public: iterator begin() { // Find and return the _key non-empty entry - for (uint ctr = 0; ctr < _capacity; ++ctr) { + for (uint ctr = 0; ctr <= _mask; ++ctr) { if (_storage[ctr]) return iterator(ctr, this); } @@ -259,7 +245,7 @@ public: const_iterator begin() const { // Find and return the first non-empty entry - for (uint ctr = 0; ctr < _capacity; ++ctr) { + for (uint ctr = 0; ctr <= _mask; ++ctr) { if (_storage[ctr]) return const_iterator(ctr, this); } @@ -298,14 +284,11 @@ public: */ template<class Key, class Val, class HashFunc, class EqualFunc> HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() : -#ifdef USE_HASHMAP_MEMORY_POOL - _nodePool(sizeof(Node)), -#endif _defaultVal() { - _capacity = nextTableSize(0); - _storage = new Node *[_capacity]; + _mask = HASHMAP_MIN_CAPACITY - 1; + _storage = new Node *[HASHMAP_MIN_CAPACITY]; assert(_storage != NULL); - memset(_storage, 0, _capacity * sizeof(Node *)); + memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *)); _size = 0; @@ -322,9 +305,6 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap() : */ template<class Key, class Val, class HashFunc, class EqualFunc> HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) : -#ifdef USE_HASHMAP_MEMORY_POOL - _nodePool(sizeof(Node)), -#endif _defaultVal() { assign(map); } @@ -334,14 +314,14 @@ HashMap<Key, Val, HashFunc, EqualFunc>::HashMap(const HM_t &map) : */ template<class Key, class Val, class HashFunc, class EqualFunc> HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() { - for (uint ctr = 0; ctr < _capacity; ++ctr) + for (uint ctr = 0; ctr <= _mask; ++ctr) if (_storage[ctr] != NULL) freeNode(_storage[ctr]); delete[] _storage; #ifdef DEBUG_HASH_COLLISIONS extern void updateHashCollisionStats(int, int, int, int); - updateHashCollisionStats(_collisions, _lookups, _capacity, _size); + updateHashCollisionStats(_collisions, _lookups, _mask+1, _size); #endif } @@ -354,14 +334,14 @@ HashMap<Key, Val, HashFunc, EqualFunc>::~HashMap() { */ template<class Key, class Val, class HashFunc, class EqualFunc> void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) { - _capacity = map._capacity; - _storage = new Node *[_capacity]; + _mask = map._mask; + _storage = new Node *[_mask+1]; assert(_storage != NULL); - memset(_storage, 0, _capacity * sizeof(Node *)); + memset(_storage, 0, (_mask+1) * sizeof(Node *)); // Simply clone the map given to us, one by one. _size = 0; - for (uint ctr = 0; ctr < _capacity; ++ctr) { + for (uint ctr = 0; ctr <= _mask; ++ctr) { if (map._storage[ctr] != NULL) { _storage[ctr] = allocNode(map._storage[ctr]->_key); _storage[ctr]->_value = map._storage[ctr]->_value; @@ -375,43 +355,46 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::assign(const HM_t &map) { template<class Key, class Val, class HashFunc, class EqualFunc> void HashMap<Key, Val, HashFunc, EqualFunc>::clear(bool shrinkArray) { - for (uint ctr = 0; ctr < _capacity; ++ctr) { + for (uint ctr = 0; ctr <= _mask; ++ctr) { if (_storage[ctr] != NULL) { freeNode(_storage[ctr]); _storage[ctr] = NULL; } } - if (shrinkArray && _capacity > nextTableSize(0)) { +#ifdef USE_HASHMAP_MEMORY_POOL + _nodePool.freeUnusedPages(); +#endif + + if (shrinkArray && _mask >= HASHMAP_MIN_CAPACITY) { delete[] _storage; - _capacity = nextTableSize(0); - _storage = new Node *[_capacity]; + _mask = HASHMAP_MIN_CAPACITY; + _storage = new Node *[HASHMAP_MIN_CAPACITY]; assert(_storage != NULL); - memset(_storage, 0, _capacity * sizeof(Node *)); + memset(_storage, 0, HASHMAP_MIN_CAPACITY * sizeof(Node *)); } _size = 0; } template<class Key, class Val, class HashFunc, class EqualFunc> -void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) { - assert(newsize > _capacity); - uint ctr, dex; +void HashMap<Key, Val, HashFunc, EqualFunc>::expandStorage(uint newCapacity) { + assert(newCapacity > _mask+1); const uint old_size = _size; - const uint old_capacity = _capacity; + const uint old_mask = _mask; Node **old_storage = _storage; // allocate a new array _size = 0; - _capacity = newsize; - _storage = new Node *[_capacity]; + _mask = newCapacity - 1; + _storage = new Node *[newCapacity]; assert(_storage != NULL); - memset(_storage, 0, _capacity * sizeof(Node *)); + memset(_storage, 0, newCapacity * sizeof(Node *)); // rehash all the old elements - for (ctr = 0; ctr < old_capacity; ++ctr) { + for (uint ctr = 0; ctr <= old_mask; ++ctr) { if (old_storage[ctr] == NULL) continue; @@ -419,12 +402,13 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) { // Since we know that no key exists twice in the old table, we // can do this slightly better than by calling lookup, since we // don't have to call _equal(). - dex = _hash(old_storage[ctr]->_key) % _capacity; - while (_storage[dex] != NULL) { - dex = (dex + 1) % _capacity; + const uint hash = _hash(old_storage[ctr]->_key); + uint idx = hash & _mask; + for (uint perturb = hash; _storage[idx] != NULL; perturb >>= HASHMAP_PERTURB_SHIFT) { + idx = (5 * idx + perturb + 1) & _mask; } - _storage[dex] = old_storage[ctr]; + _storage[idx] = old_storage[ctr]; _size++; } @@ -439,10 +423,13 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::expand_array(uint newsize) { template<class Key, class Val, class HashFunc, class EqualFunc> int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const { - uint ctr = _hash(key) % _capacity; + const uint hash = _hash(key); + uint ctr = hash & _mask; + for (uint perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { + if (_storage[ctr] == NULL || _equal(_storage[ctr]->_key, key)) + break; - while (_storage[ctr] != NULL && !_equal(_storage[ctr]->_key, key)) { - ctr = (ctr + 1) % _capacity; + ctr = (5 * ctr + perturb + 1) & _mask; #ifdef DEBUG_HASH_COLLISIONS _collisions++; @@ -453,7 +440,7 @@ int HashMap<Key, Val, HashFunc, EqualFunc>::lookup(const Key &key) const { _lookups++; fprintf(stderr, "collisions %d, lookups %d, ratio %f in HashMap %p; size %d num elements %d\n", _collisions, _lookups, ((double) _collisions / (double)_lookups), - (const void *)this, _capacity, _size); + (const void *)this, _mask+1, _size); #endif return ctr; @@ -467,9 +454,11 @@ int HashMap<Key, Val, HashFunc, EqualFunc>::lookupAndCreateIfMissing(const Key & _storage[ctr] = allocNode(key); _size++; - // Keep the load factor below 75%. - if (_size > _capacity * 75 / 100) { - expand_array(nextTableSize(_capacity)); + // Keep the load factor below a certain threshold. + uint capacity = _mask + 1; + if (_size * HASHMAP_LOADFACTOR_DENOMINATOR > capacity * HASHMAP_LOADFACTOR_NUMERATOR) { + capacity = capacity < 500 ? (capacity * 4) : (capacity * 2); + expandStorage(capacity); ctr = lookup(key); } } @@ -520,23 +509,35 @@ void HashMap<Key, Val, HashFunc, EqualFunc>::setVal(const Key &key, const Val &v template<class Key, class Val, class HashFunc, class EqualFunc> void HashMap<Key, Val, HashFunc, EqualFunc>::erase(const Key &key) { // This is based on code in the Wikipedia article on Hash tables. - uint i = lookup(key); + + const uint hash = _hash(key); + uint i = hash & _mask; + uint perturb; + + for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { + if (_storage[i] == NULL || _equal(_storage[i]->_key, key)) + break; + + i = (5 * i + perturb + 1) & _mask; + } + if (_storage[i] == NULL) return; // key wasn't present, so no work has to be done + // If we remove a key, we must check all subsequent keys and possibly // reinsert them. uint j = i; freeNode(_storage[i]); _storage[i] = NULL; - while (true) { + for (perturb = hash; ; perturb >>= HASHMAP_PERTURB_SHIFT) { // Look at the next table slot - j = (j + 1) % _capacity; + j = (5 * j + perturb + 1) & _mask; // If the next slot is empty, we are done if (_storage[j] == NULL) break; // Compute the slot where the content of the next slot should normally be, // assuming an empty table, and check whether we have to move it. - uint k = _hash(_storage[j]->_key) % _capacity; + uint k = _hash(_storage[j]->_key) & _mask; if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j)) ) { _storage[i] = _storage[j]; |