Commit 1620656c authored by Matthias Braun's avatar Matthias Braun
Browse files

new, faster hashset

[r8731]
parent 366480b2
#include <config.h>
#include "cpset.h"
#define HashSet cpset_t
#define HashSetIterator cpset_iterator_t
#define HashSetEntry cpset_hashset_entry_t
#define ValueType void*
#define NullValue NULL
#define DeletedValue ((void*)-1)
#define Hash(this,value) this->hash_function(value)
#define KeysEqual(this,key1,key2) this->cmp_function(key1, key2)
#define SetRangeEmpty(ptr,size) memset(ptr, 0, (size) * sizeof(cpset_hashset_entry_t))
#define hashset_init _cpset_init
#define hashset_init_size _cpset_init_size
#define hashset_destroy cpset_destroy
#define hashset_insert cpset_insert
#define hashset_remove cpset_remove
#define hashset_find cpset_find
#define hashset_size cpset_size
#define hashset_iterator_init cpset_iterator_init
#define hashset_iterator_next cpset_iterator_next
#define hashset_remove_iterator cpset_remove_iterator
#include "hashset.c"
void cpset_init(cpset_t *this, cpset_hash_function hash_function,
cpset_cmp_function cmp_function)
{
this->hash_function = hash_function;
this->cmp_function = cmp_function;
_cpset_init(this);
}
void cpset_init_size(cpset_t *this, cpset_hash_function hash_function,
cpset_cmp_function cmp_function, size_t expected_elems)
{
this->hash_function = hash_function;
this->cmp_function = cmp_function;
_cpset_init_size(this, expected_elems);
}
/**
* @file
* @date 16.03.2007
* @brief a set of pointers with a custom compare function
* @author Matthias Braun
* @version $Id$
*/
#ifndef _FIRM_CPSET_H_
#define _FIRM_CPSET_H_
/**
* The type of a cpset compare function.
*
* @param p1 pointer to an element
* @param p2 pointer to another element
*
* @return 1 if the elements are identically, zero else
*/
typedef int (*cpset_cmp_function) (const void *p1, const void *p2);
/**
* The type of a cpset hash function.
*
* @param p1 pointer to an element
* @param p2 pointer to another element
*
* @return 1 if the elements are identically, zero else
*/
typedef unsigned (*cpset_hash_function) (const void *obj);
#define HashSet cpset_t
#define HashSetIterator cpset_iterator_t
#define HashSetEntry cpset_hashset_entry_t
#define ValueType void*
#define ADDITIONAL_DATA cpset_cmp_function cmp_function; cpset_hash_function hash_function;
#include "hashset.h"
#undef ADDITIONAL_DATA
#undef ValueType
#undef HashSetEntry
#undef HashSetIterator
#undef HashSet
/**
* Initializes a cpset
*
* @param cpset Pointer to allocated space for the cpset
* @param cmp_function The compare function to use
*/
void cpset_init(cpset_t *cpset, cpset_hash_function hash_function,
cpset_cmp_function cmp_function);
/**
* Initializes a cpset
*
* @param cpset Pointer to allocated space for the cpset
* @param cmp_function The compare function to use
* @param expected_elements Number of elements expected in the cpset (rougly)
*/
void cpset_init_size(cpset_t *cpset, cpset_hash_function hash_function,
cpset_cmp_function cmp_function,
size_t expected_elements);
/**
* Destroys a cpset and frees the memory allocated for hashtable. The memory of
* the cpset itself is not freed.
*
* @param cpset Pointer to the cpset
*/
void cpset_destroy(cpset_t *cpset);
/**
* Inserts an element into a cpset.
*
* @param cpset Pointer to the cpset
* @param obj Element to insert into the cpset
* @returns The element itself or a pointer to an existing element
*/
void* cpset_insert(cpset_t *cpset, void *obj);
/**
* Removes an element from a cpset. Does nothing if the cpset doesn't contain the
* element.
*
* @param cpset Pointer to the cpset
* @param obj Pointer to remove from the cpset
*/
void cpset_remove(cpset_t *cpset, const void *obj);
/**
* Tests whether a cpset contains a pointer
*
* @param cpset Pointer to the cpset
* @param obj The pointer to find
* @returns An equivalent object to @p obj or NULL
*/
void *cpset_find(const cpset_t *cpset, const void *obj);
/**
* Returns the number of pointers contained in the cpset
*
* @param cpset Pointer to the cpset
* @returns Number of pointers contained in the cpset
*/
size_t cpset_size(const cpset_t *cpset);
/**
* Initializes a cpset iterator. Sets the iterator before the first element in
* the cpset.
*
* @param iterator Pointer to already allocated iterator memory
* @param cpset Pointer to the cpset
*/
void cpset_iterator_init(cpset_iterator_t *iterator, const cpset_t *cpset);
/**
* Advances the iterator and returns the current element or NULL if all elements
* in the cpset have been processed.
* @attention It is not allowed to use cpset_insert or cpset_remove while
* iterating over a cpset.
*
* @param iterator Pointer to the cpset iterator.
* @returns Next element in the cpset or NULL
*/
void *cpset_iterator_next(cpset_iterator_t *iterator);
/**
* Removed the element the iterator currently points to
*
* @param cpset Pointer to the cpset
* @param iterator Pointer to the cpset iterator.
*/
void cpset_remove_iterator(cpset_t *cpset, const cpset_iterator_t *iterator);
#endif
/**
* @file
* @date 17.03.2007
* @brief Geberic hashset implementation
* @author Matthias Braun, inspiration from densehash from google sparsehash
* package
* @version $Id$
*
*
* You have to specialize this file by defining:
*
* <ul>
* <li><b>HashSet</b> The name of the hashset type</li>
* <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
* <li><b>ValueType</b> The type of the stored data values</li>
* <li><b>NullValue</b> A special value representing no values</li>
* <li><b>DeletedValue</b> A special value representing deleted entries</li>
* <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
* </ul>
*
* Note that by default it is assumed that the data values themselfes are used
* as keys. However you can change that with additional defines:
*
* <ul>
* <li><b>KeyType</b> The type of the keys identifying data values.
* Defining this implies, that a data value contains
* more than just the key.</li>
* <li><b>GetKey(value)</b> Extracts the key from a data value</li>
* <li><b>KeysEqual(hashset,key1,key2)</b> Tests wether 2 keys are equal</li>
* <li><b>DO_REHASH</b> Instead of storing the hash-values, recalculate
* them on demand from the datavalues. (usefull if
* calculating the hash-values takes less time than
* a memory access)</li>
* </ul>
*
* You can further fine tune your hashset by defining the following:
*
* <ul>
* <li><b>JUMP(num_probes)</b> The probing method</li>
* <li><b>Alloc(count)</b> Allocates count hashset entries (NOT bytes)</li>
* <li><b>Free(ptr)</b> Frees a block of memory allocated by Alloc</li>
* <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
* the Null value</li>
* <li><b>ADDITIONAL_DATA<b> Additional fields appended to the hashset struct</li>
* </ul>
*/
#ifdef HashSet
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "bitfiddle.h"
#include "util.h"
/* quadratic probing */
#ifndef JUMP
#define JUMP(num_probes) (num_probes)
#endif
#ifndef Hash
#define ID_HASH
#define Hash(this,value) ((unsigned)(value))
#endif
#ifdef DO_REHASH
#define HashSetEntry ValueType
#define EntrySetHash(entry,new_hash)
#define EntryGetHash(this,entry) Hash(this,entry)
#define EntryGetValue(entry) (entry)
#else
#define EntryGetHash(this,entry) (entry).hash
#define EntrySetHash(entry,new_hash) (entry).hash = (new_hash)
#define EntryGetValue(entry) (entry).data
#endif
#ifndef Alloc
#include "xmalloc.h"
#define Alloc(size) (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
#define Free(ptr) free(ptr)
#endif
#ifdef ID_HASH
#define InsertReturnValue int
#define GetInsertReturnValue(entry,new) (new)
#else
#define InsertReturnValue ValueType
#define GetInsertReturnValue(entry,new) EntryGetValue(entry)
#endif
#ifndef KeyType
#define KeyType ValueType
#define GetKey(value) (value)
#define InitData(this,value,key) (value) = (key)
#endif
#ifndef ConstKeyType
#define ConstKeyType const KeyType
#endif
#ifndef EntrySetEmpty
#define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue
#endif
#ifndef EntrySetDeleted
#define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue
#endif
#ifndef EntryIsEmpty
#define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue)
#endif
#ifndef EntryIsDeleted
#define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue)
#endif
#ifndef SetRangeEmpty
#define SetRangeEmpty(ptr,size) \
{ \
size_t _i; \
size_t _size = (size); \
HashSetEntry *entries = (ptr); \
for(_i = 0; _i < _size; ++_i) { \
HashSetEntry *entry = & entries[_i]; \
EntrySetEmpty(*entry); \
} \
}
#endif
#ifndef HT_OCCUPANCY_FLT
/** how full before we double size */
#define HT_OCCUPANCY_FLT 0.5f
#endif
#ifndef HT_EMPTY_FLT
/** how empty before we half size */
#define HT_EMPTY_FLT (0.4f * (HT_OCCUPANCY_FLT))
#endif
#ifndef HT_MIN_BUCKETS
/** default smallest bucket size */
#define HT_MIN_BUCKETS 32
#endif
#define ILLEGAL_POS ((size_t)-1)
#ifndef hashset_init
#error You have to redefine hashset_init
#endif
#ifndef hashset_init_size
#error You have to redefine hashset_init_size
#endif
#ifndef hashset_destroy
#error You have to redefine hashset_destroy
#endif
#ifndef hashset_insert
#error You have to redefine hashset_insert
#endif
#ifndef hashset_remove
#error You have to redefine hashset_remove
#endif
#ifndef hashset_find
#error You have to redefine hashset_find
#endif
#ifndef hashset_size
#error You have to redefine hashset_size
#endif
#ifndef hashset_iterator_init
#error You have to redefine hashset_iterator_init
#endif
#ifndef hashset_iterator_next
#error You have to redefine hashset_iterator_next
#endif
#ifndef hashset_remove_iterator
#error You have to redefine hashset_remove_iterator
#endif
/**
* Returns the number of elements in the hashset
*/
size_t hashset_size(const HashSet *this)
{
return this->num_elements - this->num_deleted;
}
/**
* Inserts an element into a hashset without growing the set (you have to make
* sure there's enough room for that.
* @note also see comments for hashset_insert()
* @internal
*/
static inline
InsertReturnValue insert_nogrow(HashSet *this, KeyType key)
{
size_t num_probes = 0;
size_t num_buckets = this->num_buckets;
size_t hashmask = num_buckets - 1;
unsigned hash = Hash(this, key);
size_t bucknum = hash & hashmask;
size_t insert_pos = ILLEGAL_POS;
assert((num_buckets & (num_buckets - 1)) == 0);
while(1) {
HashSetEntry *entry = & this->entries[bucknum];
if(EntryIsEmpty(*entry)) {
size_t p;
HashSetEntry *nentry;
if(insert_pos != ILLEGAL_POS) {
p = insert_pos;
} else {
p = bucknum;
}
nentry = &this->entries[p];
InitData(this, EntryGetValue(*nentry), key);
EntrySetHash(*nentry, hash);
this->num_elements++;
return GetInsertReturnValue(*nentry, 1);
}
if(EntryIsDeleted(*entry)) {
if(insert_pos == ILLEGAL_POS)
insert_pos = bucknum;
} else if(EntryGetHash(this, *entry) == hash) {
if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
// Value already in the set, return it
return GetInsertReturnValue(*entry, 0);
}
}
++num_probes;
bucknum = (bucknum + JUMP(num_probes)) & hashmask;
assert(num_probes < num_buckets);
}
}
/**
* Inserts an element into a hashset under the assumption that the hashset
* contains no deleted entries and the element doesn't exist in the hashset yet.
* @internal
*/
static
void insert_new(HashSet *this, unsigned hash, ValueType value)
{
size_t num_probes = 0;
size_t num_buckets = this->num_buckets;
size_t hashmask = num_buckets - 1;
size_t bucknum = hash & hashmask;
size_t insert_pos = ILLEGAL_POS;
assert(value != NullValue);
while(1) {
HashSetEntry *entry = & this->entries[bucknum];
if(EntryIsEmpty(*entry)) {
size_t p;
HashSetEntry *nentry;
if(insert_pos != ILLEGAL_POS) {
p = insert_pos;
} else {
p = bucknum;
}
nentry = &this->entries[p];
EntryGetValue(*nentry) = value;
EntrySetHash(*nentry, hash);
this->num_elements++;
return;
}
assert(!EntryIsDeleted(*entry));
++num_probes;
bucknum = (bucknum + JUMP(num_probes)) & hashmask;
assert(num_probes < num_buckets);
}
}
/**
* calculate shrink and enlarge limits
* @internal
*/
static inline
void reset_thresholds(HashSet *this)
{
this->enlarge_threshold = (size_t) (this->num_buckets * HT_OCCUPANCY_FLT);
this->shrink_threshold = (size_t) (this->num_buckets * HT_EMPTY_FLT);
this->consider_shrink = 0;
}
/**
* Resize the hashset
* @internal
*/
static inline
void resize(HashSet *this, size_t new_size)
{
size_t num_buckets = this->num_buckets;
size_t i;
HashSetEntry *old_entries = this->entries;
HashSetEntry *new_entries;
/* allocate a new array with double size */
new_entries = Alloc(new_size);
SetRangeEmpty(new_entries, new_size);
/* use the new array */
this->entries = new_entries;
this->num_buckets = new_size;
this->num_elements = 0;
this->num_deleted = 0;
#ifndef NDEBUG
this->entries_version++;
#endif
reset_thresholds(this);
/* reinsert all elements */
for(i = 0; i < num_buckets; ++i) {
HashSetEntry *entry = & old_entries[i];
if(EntryIsEmpty(*entry) || EntryIsDeleted(*entry))
continue;
insert_new(this, EntryGetHash(this, *entry), EntryGetValue(*entry));
}
/* now we can free the old array */
Free(old_entries);
}
/**
* grow the hashset if adding 1 more elements would make it too crowded
* @internal
*/
static inline
void maybe_grow(HashSet *this)
{
size_t resize_to;
if(LIKELY(this->num_elements + 1 <= this->enlarge_threshold))
return;
/* double table size */
resize_to = this->num_buckets * 2;
resize(this, resize_to);
}
/**
* shrink the hashset if it is only sparsely filled
* @internal
*/
static inline
void maybe_shrink(HashSet *this)
{
size_t size;
size_t resize_to;
if(!this->consider_shrink)
return;
this->consider_shrink = 0;
size = hashset_size(this);
if(LIKELY(size > this->shrink_threshold))
return;
resize_to = ceil_po2(size);
if(resize_to < 4)
resize_to = 4;
resize(this, resize_to);
}
/**
* Insert an element into the hashset. If no element with key key exists yet,
* then a new one is created and initialized with the InitData function.
* Otherwise the exisiting element is returned (for hashs where key is equal to
* value, nothing is returned.)
*
* @param this the hashset
* @param key the key that identifies the data
* @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value)
*/
InsertReturnValue hashset_insert(HashSet *this, KeyType key)
{
#ifndef NDEBUG
this->entries_version++;
#endif
maybe_shrink(this);
maybe_grow(this);
return insert_nogrow(this, key);
}
/**
* Searchs for an element with key @p key.
*
* @param this the hashset
* @param key the key to search for
* @returns the found value or NullValue if nothing was found
*/
ValueType hashset_find(const HashSet *this, ConstKeyType key)
{
size_t num_probes = 0;
size_t num_buckets = this->num_buckets;
size_t hashmask = num_buckets - 1;
unsigned hash = Hash(this, key);
size_t bucknum = hash & hashmask;
while(1) {
HashSetEntry *entry = & this->entries[bucknum];
if(EntryIsEmpty(*entry)) {
return NullValue;
}
if(EntryIsDeleted(*entry)) {
// value is deleted
} else if(EntryGetHash(this, *entry) == hash) {
if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
// found the value
return EntryGetValue(*entry);
}
}
++num_probes;
bucknum = (bucknum + JUMP(num_probes)) & hashmask;
assert(num_probes < num_buckets);
}
}
/**
* Removes an element from a hashset. Does nothing if the set doesn't contain
* the element.
*
* @param this the hashset
* @param key key that identifies the data to remove
*/
void hashset_remove(HashSet *this, ConstKeyType key)
{
size_t num_probes = 0;
size_t num_buckets = this->num_buckets;
size_t hashmask = num_buckets - 1;
unsigned hash = Hash(this, key);
size_t bucknum = hash & hashmask;
#ifndef NDEBUG
this->entries_version++;
#endif
while(1) {
HashSetEntry *entry = & this->entries[bucknum];
if(EntryIsEmpty(*entry)) {
return;
}
if(EntryIsDeleted(*entry)) {
// entry is deleted
} else if(EntryGetHash(this, *entry) == hash) {
if(KeysEqual(this, GetKey(EntryGetValue(*entry)), key)) {
EntrySetDeleted(*entry);
this->num_deleted++;
this->consider_shrink = 1;
return;
}
}