Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Zwinkau
libfirm
Commits
1620656c
Commit
1620656c
authored
Mar 26, 2007
by
Matthias Braun
Browse files
new, faster hashset
[r8731]
parent
366480b2
Changes
4
Hide whitespace changes
Inline
Side-by-side
ir/adt/cpset.c
0 → 100644
View file @
1620656c
#include <config.h>
#include "cpset.h"
#define HashSet cpset_t
#define HashSetIterator cpset_iterator_t
#define HashSetEntry cpset_hashset_entry_t
#define ValueType void*
#define NullValue NULL
#define DeletedValue ((void*)-1)
#define Hash(this,value) this->hash_function(value)
#define KeysEqual(this,key1,key2) this->cmp_function(key1, key2)
#define SetRangeEmpty(ptr,size) memset(ptr, 0, (size) * sizeof(cpset_hashset_entry_t))
#define hashset_init _cpset_init
#define hashset_init_size _cpset_init_size
#define hashset_destroy cpset_destroy
#define hashset_insert cpset_insert
#define hashset_remove cpset_remove
#define hashset_find cpset_find
#define hashset_size cpset_size
#define hashset_iterator_init cpset_iterator_init
#define hashset_iterator_next cpset_iterator_next
#define hashset_remove_iterator cpset_remove_iterator
#include "hashset.c"
void
cpset_init
(
cpset_t
*
this
,
cpset_hash_function
hash_function
,
cpset_cmp_function
cmp_function
)
{
this
->
hash_function
=
hash_function
;
this
->
cmp_function
=
cmp_function
;
_cpset_init
(
this
);
}
void
cpset_init_size
(
cpset_t
*
this
,
cpset_hash_function
hash_function
,
cpset_cmp_function
cmp_function
,
size_t
expected_elems
)
{
this
->
hash_function
=
hash_function
;
this
->
cmp_function
=
cmp_function
;
_cpset_init_size
(
this
,
expected_elems
);
}
ir/adt/cpset.h
0 → 100644
View file @
1620656c
/**
* @file
* @date 16.03.2007
* @brief a set of pointers with a custom compare function
* @author Matthias Braun
* @version $Id$
*/
#ifndef _FIRM_CPSET_H_
#define _FIRM_CPSET_H_
/**
* The type of a cpset compare function.
*
* @param p1 pointer to an element
* @param p2 pointer to another element
*
* @return 1 if the elements are identically, zero else
*/
typedef
int
(
*
cpset_cmp_function
)
(
const
void
*
p1
,
const
void
*
p2
);
/**
* The type of a cpset hash function.
*
* @param p1 pointer to an element
* @param p2 pointer to another element
*
* @return 1 if the elements are identically, zero else
*/
typedef
unsigned
(
*
cpset_hash_function
)
(
const
void
*
obj
);
#define HashSet cpset_t
#define HashSetIterator cpset_iterator_t
#define HashSetEntry cpset_hashset_entry_t
#define ValueType void*
#define ADDITIONAL_DATA cpset_cmp_function cmp_function; cpset_hash_function hash_function;
#include "hashset.h"
#undef ADDITIONAL_DATA
#undef ValueType
#undef HashSetEntry
#undef HashSetIterator
#undef HashSet
/**
* Initializes a cpset
*
* @param cpset Pointer to allocated space for the cpset
* @param cmp_function The compare function to use
*/
void
cpset_init
(
cpset_t
*
cpset
,
cpset_hash_function
hash_function
,
cpset_cmp_function
cmp_function
);
/**
* Initializes a cpset
*
* @param cpset Pointer to allocated space for the cpset
* @param cmp_function The compare function to use
* @param expected_elements Number of elements expected in the cpset (rougly)
*/
void
cpset_init_size
(
cpset_t
*
cpset
,
cpset_hash_function
hash_function
,
cpset_cmp_function
cmp_function
,
size_t
expected_elements
);
/**
* Destroys a cpset and frees the memory allocated for hashtable. The memory of
* the cpset itself is not freed.
*
* @param cpset Pointer to the cpset
*/
void
cpset_destroy
(
cpset_t
*
cpset
);
/**
* Inserts an element into a cpset.
*
* @param cpset Pointer to the cpset
* @param obj Element to insert into the cpset
* @returns The element itself or a pointer to an existing element
*/
void
*
cpset_insert
(
cpset_t
*
cpset
,
void
*
obj
);
/**
* Removes an element from a cpset. Does nothing if the cpset doesn't contain the
* element.
*
* @param cpset Pointer to the cpset
* @param obj Pointer to remove from the cpset
*/
void
cpset_remove
(
cpset_t
*
cpset
,
const
void
*
obj
);
/**
* Tests whether a cpset contains a pointer
*
* @param cpset Pointer to the cpset
* @param obj The pointer to find
* @returns An equivalent object to @p obj or NULL
*/
void
*
cpset_find
(
const
cpset_t
*
cpset
,
const
void
*
obj
);
/**
* Returns the number of pointers contained in the cpset
*
* @param cpset Pointer to the cpset
* @returns Number of pointers contained in the cpset
*/
size_t
cpset_size
(
const
cpset_t
*
cpset
);
/**
* Initializes a cpset iterator. Sets the iterator before the first element in
* the cpset.
*
* @param iterator Pointer to already allocated iterator memory
* @param cpset Pointer to the cpset
*/
void
cpset_iterator_init
(
cpset_iterator_t
*
iterator
,
const
cpset_t
*
cpset
);
/**
* Advances the iterator and returns the current element or NULL if all elements
* in the cpset have been processed.
* @attention It is not allowed to use cpset_insert or cpset_remove while
* iterating over a cpset.
*
* @param iterator Pointer to the cpset iterator.
* @returns Next element in the cpset or NULL
*/
void
*
cpset_iterator_next
(
cpset_iterator_t
*
iterator
);
/**
* Removed the element the iterator currently points to
*
* @param cpset Pointer to the cpset
* @param iterator Pointer to the cpset iterator.
*/
void
cpset_remove_iterator
(
cpset_t
*
cpset
,
const
cpset_iterator_t
*
iterator
);
#endif
ir/adt/hashset.c
0 → 100644
View file @
1620656c
/**
* @file
* @date 17.03.2007
* @brief Geberic hashset implementation
* @author Matthias Braun, inspiration from densehash from google sparsehash
* package
* @version $Id$
*
*
* You have to specialize this file by defining:
*
* <ul>
* <li><b>HashSet</b> The name of the hashset type</li>
* <li><b>HashSetIterator</b> The name of the hashset iterator type</li>
* <li><b>ValueType</b> The type of the stored data values</li>
* <li><b>NullValue</b> A special value representing no values</li>
* <li><b>DeletedValue</b> A special value representing deleted entries</li>
* <li><b>Hash(hashset,key)</b> calculates the hash value for a given key</li>
* </ul>
*
* Note that by default it is assumed that the data values themselfes are used
* as keys. However you can change that with additional defines:
*
* <ul>
* <li><b>KeyType</b> The type of the keys identifying data values.
* Defining this implies, that a data value contains
* more than just the key.</li>
* <li><b>GetKey(value)</b> Extracts the key from a data value</li>
* <li><b>KeysEqual(hashset,key1,key2)</b> Tests wether 2 keys are equal</li>
* <li><b>DO_REHASH</b> Instead of storing the hash-values, recalculate
* them on demand from the datavalues. (usefull if
* calculating the hash-values takes less time than
* a memory access)</li>
* </ul>
*
* You can further fine tune your hashset by defining the following:
*
* <ul>
* <li><b>JUMP(num_probes)</b> The probing method</li>
* <li><b>Alloc(count)</b> Allocates count hashset entries (NOT bytes)</li>
* <li><b>Free(ptr)</b> Frees a block of memory allocated by Alloc</li>
* <li><b>SetRangeEmpty(ptr,count)</b> Efficiently sets a range of elements to
* the Null value</li>
* <li><b>ADDITIONAL_DATA<b> Additional fields appended to the hashset struct</li>
* </ul>
*/
#ifdef HashSet
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "bitfiddle.h"
#include "util.h"
/* quadratic probing */
#ifndef JUMP
#define JUMP(num_probes) (num_probes)
#endif
#ifndef Hash
#define ID_HASH
#define Hash(this,value) ((unsigned)(value))
#endif
#ifdef DO_REHASH
#define HashSetEntry ValueType
#define EntrySetHash(entry,new_hash)
#define EntryGetHash(this,entry) Hash(this,entry)
#define EntryGetValue(entry) (entry)
#else
#define EntryGetHash(this,entry) (entry).hash
#define EntrySetHash(entry,new_hash) (entry).hash = (new_hash)
#define EntryGetValue(entry) (entry).data
#endif
#ifndef Alloc
#include "xmalloc.h"
#define Alloc(size) (HashSetEntry*) xmalloc((size) * sizeof(HashSetEntry))
#define Free(ptr) free(ptr)
#endif
#ifdef ID_HASH
#define InsertReturnValue int
#define GetInsertReturnValue(entry,new) (new)
#else
#define InsertReturnValue ValueType
#define GetInsertReturnValue(entry,new) EntryGetValue(entry)
#endif
#ifndef KeyType
#define KeyType ValueType
#define GetKey(value) (value)
#define InitData(this,value,key) (value) = (key)
#endif
#ifndef ConstKeyType
#define ConstKeyType const KeyType
#endif
#ifndef EntrySetEmpty
#define EntrySetEmpty(entry) EntryGetValue(entry) = NullValue
#endif
#ifndef EntrySetDeleted
#define EntrySetDeleted(entry) EntryGetValue(entry) = DeletedValue
#endif
#ifndef EntryIsEmpty
#define EntryIsEmpty(entry) (EntryGetValue(entry) == NullValue)
#endif
#ifndef EntryIsDeleted
#define EntryIsDeleted(entry) (EntryGetValue(entry) == DeletedValue)
#endif
#ifndef SetRangeEmpty
#define SetRangeEmpty(ptr,size) \
{ \
size_t _i; \
size_t _size = (size); \
HashSetEntry *entries = (ptr); \
for(_i = 0; _i < _size; ++_i) { \
HashSetEntry *entry = & entries[_i]; \
EntrySetEmpty(*entry); \
} \
}
#endif
#ifndef HT_OCCUPANCY_FLT
/** how full before we double size */
#define HT_OCCUPANCY_FLT 0.5f
#endif
#ifndef HT_EMPTY_FLT
/** how empty before we half size */
#define HT_EMPTY_FLT (0.4f * (HT_OCCUPANCY_FLT))
#endif
#ifndef HT_MIN_BUCKETS
/** default smallest bucket size */
#define HT_MIN_BUCKETS 32
#endif
#define ILLEGAL_POS ((size_t)-1)
#ifndef hashset_init
#error You have to redefine hashset_init
#endif
#ifndef hashset_init_size
#error You have to redefine hashset_init_size
#endif
#ifndef hashset_destroy
#error You have to redefine hashset_destroy
#endif
#ifndef hashset_insert
#error You have to redefine hashset_insert
#endif
#ifndef hashset_remove
#error You have to redefine hashset_remove
#endif
#ifndef hashset_find
#error You have to redefine hashset_find
#endif
#ifndef hashset_size
#error You have to redefine hashset_size
#endif
#ifndef hashset_iterator_init
#error You have to redefine hashset_iterator_init
#endif
#ifndef hashset_iterator_next
#error You have to redefine hashset_iterator_next
#endif
#ifndef hashset_remove_iterator
#error You have to redefine hashset_remove_iterator
#endif
/**
* Returns the number of elements in the hashset
*/
size_t
hashset_size
(
const
HashSet
*
this
)
{
return
this
->
num_elements
-
this
->
num_deleted
;
}
/**
* Inserts an element into a hashset without growing the set (you have to make
* sure there's enough room for that.
* @note also see comments for hashset_insert()
* @internal
*/
static
inline
InsertReturnValue
insert_nogrow
(
HashSet
*
this
,
KeyType
key
)
{
size_t
num_probes
=
0
;
size_t
num_buckets
=
this
->
num_buckets
;
size_t
hashmask
=
num_buckets
-
1
;
unsigned
hash
=
Hash
(
this
,
key
);
size_t
bucknum
=
hash
&
hashmask
;
size_t
insert_pos
=
ILLEGAL_POS
;
assert
((
num_buckets
&
(
num_buckets
-
1
))
==
0
);
while
(
1
)
{
HashSetEntry
*
entry
=
&
this
->
entries
[
bucknum
];
if
(
EntryIsEmpty
(
*
entry
))
{
size_t
p
;
HashSetEntry
*
nentry
;
if
(
insert_pos
!=
ILLEGAL_POS
)
{
p
=
insert_pos
;
}
else
{
p
=
bucknum
;
}
nentry
=
&
this
->
entries
[
p
];
InitData
(
this
,
EntryGetValue
(
*
nentry
),
key
);
EntrySetHash
(
*
nentry
,
hash
);
this
->
num_elements
++
;
return
GetInsertReturnValue
(
*
nentry
,
1
);
}
if
(
EntryIsDeleted
(
*
entry
))
{
if
(
insert_pos
==
ILLEGAL_POS
)
insert_pos
=
bucknum
;
}
else
if
(
EntryGetHash
(
this
,
*
entry
)
==
hash
)
{
if
(
KeysEqual
(
this
,
GetKey
(
EntryGetValue
(
*
entry
)),
key
))
{
// Value already in the set, return it
return
GetInsertReturnValue
(
*
entry
,
0
);
}
}
++
num_probes
;
bucknum
=
(
bucknum
+
JUMP
(
num_probes
))
&
hashmask
;
assert
(
num_probes
<
num_buckets
);
}
}
/**
* Inserts an element into a hashset under the assumption that the hashset
* contains no deleted entries and the element doesn't exist in the hashset yet.
* @internal
*/
static
void
insert_new
(
HashSet
*
this
,
unsigned
hash
,
ValueType
value
)
{
size_t
num_probes
=
0
;
size_t
num_buckets
=
this
->
num_buckets
;
size_t
hashmask
=
num_buckets
-
1
;
size_t
bucknum
=
hash
&
hashmask
;
size_t
insert_pos
=
ILLEGAL_POS
;
assert
(
value
!=
NullValue
);
while
(
1
)
{
HashSetEntry
*
entry
=
&
this
->
entries
[
bucknum
];
if
(
EntryIsEmpty
(
*
entry
))
{
size_t
p
;
HashSetEntry
*
nentry
;
if
(
insert_pos
!=
ILLEGAL_POS
)
{
p
=
insert_pos
;
}
else
{
p
=
bucknum
;
}
nentry
=
&
this
->
entries
[
p
];
EntryGetValue
(
*
nentry
)
=
value
;
EntrySetHash
(
*
nentry
,
hash
);
this
->
num_elements
++
;
return
;
}
assert
(
!
EntryIsDeleted
(
*
entry
));
++
num_probes
;
bucknum
=
(
bucknum
+
JUMP
(
num_probes
))
&
hashmask
;
assert
(
num_probes
<
num_buckets
);
}
}
/**
* calculate shrink and enlarge limits
* @internal
*/
static
inline
void
reset_thresholds
(
HashSet
*
this
)
{
this
->
enlarge_threshold
=
(
size_t
)
(
this
->
num_buckets
*
HT_OCCUPANCY_FLT
);
this
->
shrink_threshold
=
(
size_t
)
(
this
->
num_buckets
*
HT_EMPTY_FLT
);
this
->
consider_shrink
=
0
;
}
/**
* Resize the hashset
* @internal
*/
static
inline
void
resize
(
HashSet
*
this
,
size_t
new_size
)
{
size_t
num_buckets
=
this
->
num_buckets
;
size_t
i
;
HashSetEntry
*
old_entries
=
this
->
entries
;
HashSetEntry
*
new_entries
;
/* allocate a new array with double size */
new_entries
=
Alloc
(
new_size
);
SetRangeEmpty
(
new_entries
,
new_size
);
/* use the new array */
this
->
entries
=
new_entries
;
this
->
num_buckets
=
new_size
;
this
->
num_elements
=
0
;
this
->
num_deleted
=
0
;
#ifndef NDEBUG
this
->
entries_version
++
;
#endif
reset_thresholds
(
this
);
/* reinsert all elements */
for
(
i
=
0
;
i
<
num_buckets
;
++
i
)
{
HashSetEntry
*
entry
=
&
old_entries
[
i
];
if
(
EntryIsEmpty
(
*
entry
)
||
EntryIsDeleted
(
*
entry
))
continue
;
insert_new
(
this
,
EntryGetHash
(
this
,
*
entry
),
EntryGetValue
(
*
entry
));
}
/* now we can free the old array */
Free
(
old_entries
);
}
/**
* grow the hashset if adding 1 more elements would make it too crowded
* @internal
*/
static
inline
void
maybe_grow
(
HashSet
*
this
)
{
size_t
resize_to
;
if
(
LIKELY
(
this
->
num_elements
+
1
<=
this
->
enlarge_threshold
))
return
;
/* double table size */
resize_to
=
this
->
num_buckets
*
2
;
resize
(
this
,
resize_to
);
}
/**
* shrink the hashset if it is only sparsely filled
* @internal
*/
static
inline
void
maybe_shrink
(
HashSet
*
this
)
{
size_t
size
;
size_t
resize_to
;
if
(
!
this
->
consider_shrink
)
return
;
this
->
consider_shrink
=
0
;
size
=
hashset_size
(
this
);
if
(
LIKELY
(
size
>
this
->
shrink_threshold
))
return
;
resize_to
=
ceil_po2
(
size
);
if
(
resize_to
<
4
)
resize_to
=
4
;
resize
(
this
,
resize_to
);
}
/**
* Insert an element into the hashset. If no element with key key exists yet,
* then a new one is created and initialized with the InitData function.
* Otherwise the exisiting element is returned (for hashs where key is equal to
* value, nothing is returned.)
*
* @param this the hashset
* @param key the key that identifies the data
* @returns the existing or newly created data element (or nothing in case of hashs where keys are the while value)
*/
InsertReturnValue
hashset_insert
(
HashSet
*
this
,
KeyType
key
)
{
#ifndef NDEBUG
this
->
entries_version
++
;
#endif
maybe_shrink
(
this
);
maybe_grow
(
this
);
return
insert_nogrow
(
this
,
key
);
}
/**
* Searchs for an element with key @p key.
*
* @param this the hashset
* @param key the key to search for
* @returns the found value or NullValue if nothing was found
*/
ValueType
hashset_find
(
const
HashSet
*
this
,
ConstKeyType
key
)
{
size_t
num_probes
=
0
;
size_t
num_buckets
=
this
->
num_buckets
;
size_t
hashmask
=
num_buckets
-
1
;
unsigned
hash
=
Hash
(
this
,
key
);
size_t
bucknum
=
hash
&
hashmask
;
while
(
1
)
{
HashSetEntry
*
entry
=
&
this
->
entries
[
bucknum
];
if
(
EntryIsEmpty
(
*
entry
))
{
return
NullValue
;
}
if
(
EntryIsDeleted
(
*
entry
))
{
// value is deleted
}
else
if
(
EntryGetHash
(
this
,
*
entry
)
==
hash
)
{
if
(
KeysEqual
(
this
,
GetKey
(
EntryGetValue
(
*
entry
)),
key
))
{
// found the value
return
EntryGetValue
(
*
entry
);
}
}
++
num_probes
;
bucknum
=
(
bucknum
+
JUMP
(
num_probes
))
&
hashmask
;
assert
(
num_probes
<
num_buckets
);
}
}
/**
* Removes an element from a hashset. Does nothing if the set doesn't contain
* the element.
*
* @param this the hashset
* @param key key that identifies the data to remove
*/
void
hashset_remove
(
HashSet
*
this
,
ConstKeyType
key
)
{
size_t
num_probes
=
0
;
size_t
num_buckets
=
this
->
num_buckets
;
size_t
hashmask
=
num_buckets
-
1
;
unsigned
hash
=
Hash
(
this
,
key
);
size_t
bucknum
=
hash
&
hashmask
;
#ifndef NDEBUG
this
->
entries_version
++
;
#endif
while
(
1
)
{
HashSetEntry
*
entry
=
&
this
->
entries
[
bucknum
];
if
(
EntryIsEmpty
(
*
entry
))
{
return
;
}
if
(
EntryIsDeleted
(
*
entry
))
{
// entry is deleted
}
else
if
(
EntryGetHash
(
this
,
*
entry
)
==
hash
)
{
if
(
KeysEqual
(
this
,
GetKey
(
EntryGetValue
(
*
entry
)),
key
))
{
EntrySetDeleted
(
*
entry
);
this
->
num_deleted
++
;
this
->
consider_shrink
=
1
;
return
;
}
}