1 ////////////////////////////////////////////////////////////////////////////////
3 // Author: Andy Rushton
4 // Copyright: (c) Southampton University 1999-2004
5 // (c) Andy Rushton 2004-2009
6 // License: BSD License, see ../docs/license.html
8 ////////////////////////////////////////////////////////////////////////////////
14 ////////////////////////////////////////////////////////////////////////////////
15 // the element stored in the hash
17 template<typename K, typename T, typename H, typename E>
21 master_iterator<hash<K,T,H,E>, hash_element<K,T,H,E> > m_master;
22 std::pair<const K, T> m_value;
23 hash_element<K,T,H,E>* m_next;
26 hash_element(const hash<K,T,H,E>* owner, const K& key, const T& data, unsigned hash) :
27 m_master(owner,this), m_value(key,data), m_next(0), m_hash(hash)
31 hash_element(const hash<K,T,H,E>* owner, const std::pair<const K,T>& value, unsigned hash) :
32 m_master(owner,this), m_value(value), m_next(0), m_hash(hash)
42 const hash<K,T,H,E>* owner(void) const
44 return m_master.owner();
47 // generate the bin number from the hash value and the owner's number of bins
48 unsigned bin(void) const
50 return m_hash % (owner()->m_bins);
54 ////////////////////////////////////////////////////////////////////////////////
58 template<typename K, typename T, class H, class E, typename V>
59 hash_iterator<K,T,H,E,V>::hash_iterator(void)
63 // non-null constructor used from within the hash to construct a valid iterator
64 template<typename K, typename T, class H, class E, typename V>
65 hash_iterator<K,T,H,E,V>::hash_iterator(hash_element<K,T,H,E>* element) :
66 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(element->m_master)
70 // constructor used to create an end iterator
71 template<typename K, typename T, class H, class E, typename V>
72 hash_iterator<K,T,H,E,V>::hash_iterator(const hash<K,T,H,E>* owner) :
73 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(owner)
77 template<typename K, typename T, class H, class E, typename V>
78 hash_iterator<K,T,H,E,V>::hash_iterator(const safe_iterator<hash<K,T,H,E>, hash_element<K,T,H,E> >& iterator) :
79 safe_iterator<hash<K,T,H,E>,hash_element<K,T,H,E> >(iterator)
85 template<typename K, typename T, class H, class E, typename V>
86 hash_iterator<K,T,H,E,V>::~hash_iterator(void)
92 template<typename K, typename T, class H, class E, typename V>
93 TYPENAME hash_iterator<K,T,H,E,V>::const_iterator hash_iterator<K,T,H,E,V>::constify(void) const
95 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(*this);
98 template<typename K, typename T, class H, class E, typename V>
99 TYPENAME hash_iterator<K,T,H,E,V>::iterator hash_iterator<K,T,H,E,V>::deconstify(void) const
101 return hash_iterator<K,T,H,E,std::pair<const K,T> >(*this);
104 // increment operator looks for the next element in the table
105 // if there isn't one, then this becomes an end() iterator with m_bin = m_bins
106 template<typename K, typename T, class H, class E, typename V>
107 TYPENAME hash_iterator<K,T,H,E,V>::this_iterator& hash_iterator<K,T,H,E,V>::operator ++ (void)
108 throw(null_dereference,end_dereference)
110 this->assert_valid();
111 if (this->node()->m_next)
112 set(this->node()->m_next->m_master);
115 // failing that, subsequent hash values are tried until either an element is found or there are no more bins
116 // in which case it becomes an end() iterator
117 hash_element<K,T,H,E>* element = 0;
118 unsigned current_bin = this->node()->bin();
119 for(current_bin++; !element && (current_bin < this->owner()->m_bins); current_bin++)
120 element = this->owner()->m_values[current_bin];
122 set(element->m_master);
129 // post-increment is defined in terms of pre-increment
130 template<typename K, typename T, class H, class E, typename V>
131 TYPENAME hash_iterator<K,T,H,E,V>::this_iterator hash_iterator<K,T,H,E,V>::operator ++ (int)
132 throw(null_dereference,end_dereference)
134 hash_iterator<K,T,H,E,V> old(*this);
139 // two iterators are equal if they point to the same element
140 // both iterators must be non-null and belong to the same table
141 template<typename K, typename T, class H, class E, typename V>
142 bool hash_iterator<K,T,H,E,V>::operator == (const hash_iterator<K,T,H,E,V>& r) const
147 template<typename K, typename T, class H, class E, typename V>
148 bool hash_iterator<K,T,H,E,V>::operator != (const hash_iterator<K,T,H,E,V>& r) const
150 return !operator==(r);
153 template<typename K, typename T, class H, class E, typename V>
154 bool hash_iterator<K,T,H,E,V>::operator < (const hash_iterator<K,T,H,E,V>& r) const
156 return compare(r) < 0;
159 // iterator dereferencing is only legal on a non-null iterator
160 template<typename K, typename T, class H, class E, typename V>
161 V& hash_iterator<K,T,H,E,V>::operator*(void) const
162 throw(null_dereference,end_dereference)
164 this->assert_valid();
165 return this->node()->m_value;
168 template<typename K, typename T, class H, class E, typename V>
169 V* hash_iterator<K,T,H,E,V>::operator->(void) const
170 throw(null_dereference,end_dereference)
172 return &(operator*());
175 ////////////////////////////////////////////////////////////////////////////////
178 // totally arbitrary initial size used for auto-rehashed tables
179 static unsigned hash_default_bins = 127;
182 // tests whether the user wants auto-rehash
183 // sets the rehash point to be a loading of 1.0 by setting it to the number of bins
184 // uses the user's size unless this is zero, in which case implement the default
186 template<typename K, typename T, class H, class E>
187 hash<K,T,H,E>::hash(unsigned bins) :
188 m_rehash(bins), m_bins(bins > 0 ? bins : hash_default_bins), m_size(0), m_values(0)
190 m_values = new hash_element<K,T,H,E>*[m_bins];
191 for (unsigned i = 0; i < m_bins; i++)
195 template<typename K, typename T, class H, class E>
196 hash<K,T,H,E>::~hash(void)
198 // delete all the elements
200 // and delete the data structure
205 // as usual, implement the copy constructor i.t.o. the assignment operator
207 template<typename K, typename T, class H, class E>
208 hash<K,T,H,E>::hash(const hash<K,T,H,E>& right) :
209 m_rehash(right.m_rehash), m_bins(right.m_bins), m_size(0), m_values(0)
211 m_values = new hash_element<K,T,H,E>*[right.m_bins];
212 // copy the rehash behaviour as well as the size
213 for (unsigned i = 0; i < m_bins; i++)
218 // assignment operator
219 // this is done by copying the elements
220 // the source and target hashes can be different sizes
221 // the hash is self-copy safe, i.e. it is legal to say x = x;
223 template<typename K, typename T, class H, class E>
224 hash<K,T,H,E>& hash<K,T,H,E>::operator = (const hash<K,T,H,E>& r)
226 // make self-copy safe
227 if (&r == this) return *this;
228 // remove all the existing elements
230 // copy the elements across - remember that this is rehashing because the two
231 // tables can be different sizes so there is no quick way of doing this by
233 for (hash_iterator<K,T,H,E,const std::pair<const K,T> > i = r.begin(); i != r.end(); ++i)
234 insert(i->first, i->second);
238 // number of values in the hash
239 template<typename K, typename T, class H, class E>
240 bool hash<K,T,H,E>::empty(void) const
245 template<typename K, typename T, class H, class E>
246 unsigned hash<K,T,H,E>::size(void) const
252 template<typename K, typename T, class H, class E>
253 bool hash<K,T,H,E>::operator == (const hash<K,T,H,E>& right) const
255 // this table is the same as the right table if they are the same table!
256 if (&right == this) return true;
257 // they must be the same size to be equal
258 if (m_size != right.m_size) return false;
259 // now every key in this must be in right and have the same data
260 for (hash_iterator<K,T,H,E,const std::pair<const K,T> > i = begin(); i != end(); i++)
262 hash_iterator<K,T,H,E,const std::pair<const K,T> > found = right.find(i->first);
263 if (found == right.end()) return false;
264 if (!(i->second == found->second)) return false;
269 // set up the hash to auto-rehash at a specific size
270 // setting the rehash size to 0 forces manual rehashing
271 template<typename K, typename T, class H, class E>
272 void hash<K,T,H,E>::auto_rehash(void)
277 template<typename K, typename T, class H, class E>
278 void hash<K,T,H,E>::manual_rehash(void)
283 // the rehash function
284 // builds a new hash table and moves the elements (without copying) from the old to the new
285 // I store the un-modulused hash value in the element for more efficient rehashing
286 // passing 0 to the bins parameter does auto-rehashing
287 // passing any other value forces the number of bins
289 template<typename K, typename T, class H, class E>
290 void hash<K,T,H,E>::rehash(unsigned bins)
292 // user specified size: just take the user's value
293 // auto calculate: if the load is high, increase the size; else do nothing
294 unsigned new_bins = bins ? bins : m_bins;
295 if (bins == 0 && m_size > 0)
297 // these numbers are pretty arbitrary
298 // TODO - make them user-customisable?
299 float load = loading();
301 new_bins = (unsigned)(m_bins * load);
303 new_bins = m_bins * 2;
305 if (new_bins == m_bins) return;
306 // set the new rehashing point if auto-rehashing is on
307 if (m_rehash) m_rehash = new_bins;
308 // move aside the old structure
309 hash_element<K,T,H,E>** old_values = m_values;
310 unsigned old_bins = m_bins;
311 // create a replacement structure
312 m_values = new hash_element<K,T,H,E>*[new_bins];
313 for (unsigned i = 0; i < new_bins; i++)
316 // move all the old elements across, rehashing each one
317 for (unsigned j = 0; j < old_bins; j++)
321 // unhook from the old structure
322 hash_element<K,T,H,E>* current = old_values[j];
323 old_values[j] = current->m_next;
324 // rehash using the stored hash value
325 unsigned bin = current->bin();
326 // hook it into the new structure
327 current->m_next = m_values[bin];
328 m_values[bin] = current;
331 // now delete the old structure
335 // the loading is the average number of elements per bin
336 // this simplifies to the total elements divided by the number of bins
338 template<typename K, typename T, class H, class E>
339 float hash<K,T,H,E>::loading(void) const
341 return (float)m_size / (float)m_bins;
344 // remove all elements from the table
346 template<typename K, typename T, class H, class E>
347 void hash<K,T,H,E>::erase(void)
349 // unhook the list elements and destroy them
350 for (unsigned i = 0; i < m_bins; i++)
352 hash_element<K,T,H,E>* current = m_values[i];
355 hash_element<K,T,H,E>* next = current->m_next;
364 // test for whether a key is present in the table
366 template<typename K, typename T, class H, class E>
367 bool hash<K,T,H,E>::present(const K& key) const
369 return find(key) != end();
372 template<typename K, typename T, class H, class E>
373 TYPENAME hash<K,T,H,E>::size_type hash<K,T,H,E>::count(const K& key) const
375 return present() ? 1 : 0;
378 // add a key and data element to the table - defined in terms of the general-purpose pair insert function
380 template<typename K, typename T, class H, class E>
381 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::insert(const K& key, const T& data)
383 return insert(std::pair<const K,T>(key,data)).first;
386 // insert a key/data pair into the table
387 // this removes any old value with the same key since there is no multihash functionality
389 template<typename K, typename T, class H, class E>
390 std::pair<TYPENAME hash<K,T,H,E>::iterator, bool> hash<K,T,H,E>::insert(const std::pair<const K,T>& value)
392 // if auto-rehash is enabled, implement the auto-rehash before inserting the new value
393 // the table is rehashed if this insertion makes the loading exceed 1.0
394 if (m_rehash && (m_size >= m_rehash)) rehash();
395 // calculate the new hash value
396 unsigned hash_value_full = H()(value.first);
397 unsigned bin = hash_value_full % m_bins;
398 bool inserted = true;
399 // unhook any previous value with this key
400 // this has been inlined from erase(key) so that the hash value is not calculated twice
401 hash_element<K,T,H,E>* previous = 0;
402 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
404 // first check the full stored hash value
405 if (current->m_hash != hash_value_full) continue;
407 // next try the equality operator
408 if (!E()(current->m_value.first, value.first)) continue;
410 // unhook this value and destroy it
412 previous->m_next = current->m_next;
414 m_values[bin] = current->m_next;
418 // we've overwritten a previous value
421 // assume there can only be one match so we can give up now
424 // now hook in a new list element at the start of the list for this hash value
425 hash_element<K,T,H,E>* new_item = new hash_element<K,T,H,E>(this, value, hash_value_full);
426 new_item->m_next = m_values[bin];
427 m_values[bin] = new_item;
428 // increment the size count
430 // construct an iterator from the list node, and return whether inserted
431 return std::make_pair(hash_iterator<K,T,H,E,std::pair<const K,T> >(new_item), inserted);
434 // insert a key with an empty data field ready to be filled in later
436 template<typename K, typename T, class H, class E>
437 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::insert(const K& key)
439 return insert(key,T());
442 // remove a key from the table - return true if the key was found and removed, false if it wasn't present
444 template<typename K, typename T, class H, class E>
445 unsigned hash<K,T,H,E>::erase(const K& key)
447 unsigned hash_value_full = H()(key);
448 unsigned bin = hash_value_full % m_bins;
449 // scan the list for an element with this key
450 // need to keep a previous pointer because the lists are single-linked
451 hash_element<K,T,H,E>* previous = 0;
452 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
454 // first check the full stored hash value
455 if (current->m_hash != hash_value_full) continue;
457 // next try the equality operator
458 if (!E()(current->m_value.first, key)) continue;
460 // found this key, so unhook the element from the list
462 previous->m_next = current->m_next;
464 m_values[bin] = current->m_next;
467 // remember to maintain the size count
474 // remove an element from the hash table using an iterator (std::map equivalent)
475 template<typename K, typename T, class H, class E>
476 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::erase(TYPENAME hash<K,T,H,E>::iterator it)
478 // work out what the next iterator is in order to return it later
479 TYPENAME hash<K,T,H,E>::iterator next(it);
481 // we now need to find where this item is - made difficult by the use of
482 // single-linked lists which means I have to search through the bin from
483 // the top in order to unlink from the list.
484 unsigned hash_value_full = it.node()->m_hash;
485 unsigned bin = hash_value_full % m_bins;
486 // scan the list for this element
487 // need to keep a previous pointer because the lists are single-linked
488 hash_element<K,T,H,E>* previous = 0;
489 for (hash_element<K,T,H,E>* current = m_values[bin]; current; previous = current, current = current->m_next)
491 // direct test on the address of the element
492 if (current != it.node()) continue;
493 // found this iterator, so unhook the element from the list
495 previous->m_next = current->m_next;
497 m_values[bin] = current->m_next;
501 // remember to maintain the size count
508 template<typename K, typename T, class H, class E>
509 void hash<K,T,H,E>::clear(void)
514 // search for a key in the table and return an iterator to it
515 // if the search fails, returns an end() iterator
517 template<typename K, typename T, class H, class E>
518 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::find(const K& key) const
520 // scan the list for this key's hash value for the element with a matching key
521 unsigned hash_value_full = H()(key);
522 unsigned bin = hash_value_full % m_bins;
523 for (hash_element<K,T,H,E>* current = m_values[bin]; current; current = current->m_next)
525 if (current->m_hash == hash_value_full && E()(current->m_value.first, key))
526 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(current);
531 template<typename K, typename T, class H, class E>
532 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::find(const K& key)
534 // scan the list for this key's hash value for the element with a matching key
535 unsigned hash_value_full = H()(key);
536 unsigned bin = hash_value_full % m_bins;
537 for (hash_element<K,T,H,E>* current = m_values[bin]; current; current = current->m_next)
539 if (current->m_hash == hash_value_full && E()(current->m_value.first, key))
540 return hash_iterator<K,T,H,E,std::pair<const K,T> >(current);
545 // table lookup by key using the index operator[], returning a reference to the data field, not an iterator
546 // this is rather like the std::map's [] operator
547 // the difference is that I have a const and non-const version
548 // the const version will not create the element if not present already, but the non-const version will
549 // the non-const version is compatible with the behaviour of the map
551 template<typename K, typename T, class H, class E>
552 const T& hash<K,T,H,E>::operator[] (const K& key) const throw(std::out_of_range)
554 // this const version cannot change the hash, so has to raise an exception if the key is missing
555 hash_iterator<K,T,H,E,const std::pair<const K,T> > found = find(key);
557 throw std::out_of_range("key not found in stlplus::hash::operator[]");
558 return found->second;
561 template<typename K, typename T, class H, class E>
562 T& hash<K,T,H,E>::operator[] (const K& key)
564 // this non-const version can change the hash, so creates a new element if the key is missing
565 hash_iterator<K,T,H,E,std::pair<const K,T> > found = find(key);
568 return found->second;
573 template<typename K, typename T, class H, class E>
574 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::begin(void) const
576 // find the first element
577 for (unsigned bin = 0; bin < m_bins; bin++)
579 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(m_values[bin]);
580 // if the hash is empty, return the end iterator
584 template<typename K, typename T, class H, class E>
585 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::begin(void)
587 // find the first element
588 for (unsigned bin = 0; bin < m_bins; bin++)
590 return hash_iterator<K,T,H,E,std::pair<const K,T> >(m_values[bin]);
591 // if the hash is empty, return the end iterator
595 template<typename K, typename T, class H, class E>
596 TYPENAME hash<K,T,H,E>::const_iterator hash<K,T,H,E>::end(void) const
598 return hash_iterator<K,T,H,E,const std::pair<const K,T> >(this);
601 template<typename K, typename T, class H, class E>
602 TYPENAME hash<K,T,H,E>::iterator hash<K,T,H,E>::end(void)
604 return hash_iterator<K,T,H,E,std::pair<const K,T> >(this);
607 template<typename K, typename T, class H, class E>
608 void hash<K,T,H,E>::debug_report(std::ostream& str) const
610 // calculate some stats first
611 unsigned occupied = 0;
612 unsigned min_in_bin = m_size;
613 unsigned max_in_bin = 0;
614 for (unsigned i = 0; i < m_bins; i++)
616 if (m_values[i]) occupied++;
618 for (hash_element<K,T,H,E>* item = m_values[i]; item; item = item->m_next) count++;
619 if (count > max_in_bin) max_in_bin = count;
620 if (count < min_in_bin) min_in_bin = count;
622 // now print the table
623 str << "------------------------------------------------------------------------" << std::endl;
624 str << "| size: " << m_size << std::endl;
625 str << "| bins: " << m_bins << std::endl;
626 str << "| loading: " << loading() << " ";
628 str << "auto-rehash at " << m_rehash << std::endl;
630 str << "manual rehash" << std::endl;
631 str << "| occupied: " << occupied
632 << std::fixed << " (" << (100.0*(float)occupied/(float)m_bins) << "%)" << std::scientific
633 << ", min = " << min_in_bin << ", max = " << max_in_bin << std::endl;
634 str << "|-----------------------------------------------------------------------" << std::endl;
635 str << "| bin 0 1 2 3 4 5 6 7 8 9" << std::endl;
636 str << "| ---------------------------------------------------------------";
637 for (unsigned j = 0; j < m_bins; j++)
642 str << "| " << std::setw(6) << std::right << (j/10*10) << std::left << " |";
645 for (hash_element<K,T,H,E>* item = m_values[j]; item; item = item->m_next) count++;
649 str << std::setw(6) << std::right << count << std::left;
652 str << "------------------------------------------------------------------------" << std::endl;
655 ////////////////////////////////////////////////////////////////////////////////
657 } // end namespace stlplus