From 47429f219750033c011dadfd5e9106cd5bfcb5b9 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Wed, 20 Aug 2008 10:18:59 +0000 Subject: Extended HashMap debug output svn-id: r34051 --- common/hashmap.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'common/hashmap.cpp') diff --git a/common/hashmap.cpp b/common/hashmap.cpp index 4749234740..2bd1d48b92 100644 --- a/common/hashmap.cpp +++ b/common/hashmap.cpp @@ -89,5 +89,36 @@ uint nextTableSize(uint x) { return primes[i]; } +#ifdef DEBUG_HASH_COLLISIONS +static double + g_collisions = 0, + g_lookups = 0, + g_collPerLook = 0, + g_arrsize = 0, + g_nele = 0; +static int g_max_arrsize = 0, g_max_nele = 0; +static int g_totalHashmaps = 0; + +void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele) { + g_collisions += collisions; + g_lookups += lookups; + if (lookups) + g_collPerLook += (double)collisions / (double)lookups; + g_arrsize += arrsize; + g_nele += nele; + g_totalHashmaps++; + + g_max_arrsize = MAX(g_max_arrsize, arrsize); + g_max_nele = MAX(g_max_nele, nele); + + fprintf(stdout, "%d hashmaps: colls %.1f; lookups %.1f; ratio %.3f%%; size %f (max: %d); capacity %f (max: %d)\n", + g_totalHashmaps, + g_collisions / g_totalHashmaps, + g_lookups / g_totalHashmaps, + 100 * g_collPerLook / g_totalHashmaps, + g_nele / g_totalHashmaps, g_max_nele, + g_arrsize / g_totalHashmaps, g_max_arrsize); +} +#endif } // End of namespace Common -- cgit v1.2.3 From a79e9385a19530698e5c1cc1da39cb6b80cb9f74 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Wed, 20 Aug 2008 11:07:16 +0000 Subject: Unified member names in container/storage classes Array, HashMap and String: _storage, _size, _capacity svn-id: r34052 --- common/hashmap.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'common/hashmap.cpp') diff --git a/common/hashmap.cpp b/common/hashmap.cpp index 2bd1d48b92..99ca0713ee 100644 --- a/common/hashmap.cpp +++ b/common/hashmap.cpp @@ -94,9 +94,9 @@ static double g_collisions = 0, g_lookups = 0, g_collPerLook = 0, - g_arrsize = 0, - g_nele = 0; -static int g_max_arrsize = 0, g_max_nele = 0; + g_capacity = 0, + g_size = 0; +static int g_max_capacity = 0, g_max_size = 0; static int g_totalHashmaps = 0; void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele) { @@ -104,20 +104,20 @@ void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele g_lookups += lookups; if (lookups) g_collPerLook += (double)collisions / (double)lookups; - g_arrsize += arrsize; - g_nele += nele; + g_capacity += arrsize; + g_size += nele; g_totalHashmaps++; - g_max_arrsize = MAX(g_max_arrsize, arrsize); - g_max_nele = MAX(g_max_nele, nele); + g_max_capacity = MAX(g_max_capacity, arrsize); + g_max_size = MAX(g_max_size, nele); fprintf(stdout, "%d hashmaps: colls %.1f; lookups %.1f; ratio %.3f%%; size %f (max: %d); capacity %f (max: %d)\n", g_totalHashmaps, g_collisions / g_totalHashmaps, g_lookups / g_totalHashmaps, 100 * g_collPerLook / g_totalHashmaps, - g_nele / g_totalHashmaps, g_max_nele, - g_arrsize / g_totalHashmaps, g_max_arrsize); + g_size / g_totalHashmaps, g_max_size, + g_capacity / g_totalHashmaps, g_max_capacity); } #endif -- cgit v1.2.3 From 31ce5eb4961bec52c6d30d8a1566406b04da7b90 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Tue, 2 Sep 2008 11:34:12 +0000 Subject: Revised HashMap implementation svn-id: r34273 --- common/hashmap.cpp | 89 +++++++++++++++++++++++------------------------------- 1 file changed, 37 insertions(+), 52 deletions(-) (limited to 'common/hashmap.cpp') diff --git a/common/hashmap.cpp b/common/hashmap.cpp index 99ca0713ee..b8f2608901 100644 --- a/common/hashmap.cpp +++ b/common/hashmap.cpp @@ -24,69 +24,35 @@ */ // The hash map (associative array) implementation in this file is -// based on code by Andrew Y. Ng, 1996: - -/* - * Copyright (c) 1998-2003 Massachusetts Institute of Technology. - * This code was developed as part of the Haystack research project - * (http://haystack.lcs.mit.edu/). Permission is hereby granted, - * free of charge, to any person obtaining a copy of this software - * and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, - * sublicense, and/or sell copies of the Software, and to permit - * persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ +// based on the PyDict implementation of CPython. The erase() method +// is based on example code in the Wikipedia article on Hash tables. #include "common/hashmap.h" namespace Common { -// const char *: +// Hash function for strings, taken from CPython. uint hashit(const char *p) { - uint hash = 0; + uint hash = *p << 7; byte c; - while ((c = *p++)) - hash = (hash * 31 + c); - return hash; + int size = 0; + while ((c = *p++)) { + hash = (1000003 * hash) ^ c; + size++; + } + return hash ^ size; } +// Like hashit, but converts every char to lowercase before hashing. uint hashit_lower(const char *p) { - uint hash = 0; + uint hash = tolower(*p) << 7; byte c; - while ((c = *p++)) - hash = (hash * 31 + tolower(c)); - return hash; -} - -// The following table is taken from the GNU ISO C++ Library's hashtable.h file. -static const uint primes[] = { - 53ul, 97ul, 193ul, 389ul, 769ul, - 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, - 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, - 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, - 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, - 1610612741ul, 3221225473ul, 4294967291ul -}; - -uint nextTableSize(uint x) { - int i = 0; - while (x >= primes[i]) - i++; - return primes[i]; + int size = 0; + while ((c = *p++)) { + hash = (1000003 * hash) ^ tolower(c); + size++; + } + return hash ^ size; } #ifdef DEBUG_HASH_COLLISIONS @@ -98,6 +64,7 @@ static double g_size = 0; static int g_max_capacity = 0, g_max_size = 0; static int g_totalHashmaps = 0; +static int g_stats[4] = {0,0,0,0}; void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele) { g_collisions += collisions; @@ -108,6 +75,15 @@ void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele g_size += nele; g_totalHashmaps++; + if (3*nele <= 2*8) + g_stats[0]++; + if (3*nele <= 2*16) + g_stats[1]++; + if (3*nele <= 2*32) + g_stats[2]++; + if (3*nele <= 2*64) + g_stats[3]++; + g_max_capacity = MAX(g_max_capacity, arrsize); g_max_size = MAX(g_max_size, nele); @@ -118,6 +94,15 @@ void updateHashCollisionStats(int collisions, int lookups, int arrsize, int nele 100 * g_collPerLook / g_totalHashmaps, g_size / g_totalHashmaps, g_max_size, g_capacity / g_totalHashmaps, g_max_capacity); + fprintf(stdout, " %d less than %d; %d less than %d; %d less than %d; %d less than %d\n", + g_stats[0], 2*8/3, + g_stats[1],2*16/3, + g_stats[2],2*32/3, + g_stats[3],2*64/3); + + // TODO: + // * Should record the maximal size of the map during its lifetime, not that at its death + // * Should do some statistics: how many maps are less than 2/3*8, 2/3*16, 2/3*32, ... } #endif -- cgit v1.2.3