Commit 66752b5a authored by Matthias Braun's avatar Matthias Braun
Browse files

rename popcnt to popcount; avoid inline assembly in favor of gcc builtin functions

[r27291]
parent 1287307f
......@@ -770,7 +770,7 @@ int tarval_is_single_bit(tarval *tv);
*
* @return number of set bits or -1 on error
*/
int get_tarval_popcnt(tarval *tv);
int get_tarval_popcount(tarval *tv);
/**
* Return the number of the lowest set bit in a given (integer) tarval.
......
......@@ -73,14 +73,18 @@ static inline int add_saturated(int x, int y)
* @param x A 32-bit word.
* @return The number of bits set in x.
*/
static inline unsigned popcnt(unsigned x)
static inline unsigned popcount(unsigned x)
{
#if defined(__GNUC__) && __GNUC__ >= 4
return __builtin_popcount(x);
#else
x -= ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x + (x >> 4)) & 0x0f0f0f0f;
x += x >> 8;
x += x >> 16;
return x & 0x3f;
#endif
}
/**
......@@ -90,15 +94,10 @@ static inline unsigned popcnt(unsigned x)
*/
static inline unsigned nlz(unsigned x)
{
#ifdef USE_X86_ASSEMBLY
unsigned res;
#if defined(__GNUC__) && __GNUC__ >= 4
if(x == 0)
return 32;
__asm__("bsrl %1,%0"
: "=r" (res)
: "r" (x));
return 31 - res;
return __builtin_clz(x);
#else
unsigned y;
int n = 32;
......@@ -119,15 +118,10 @@ static inline unsigned nlz(unsigned x)
*/
static inline unsigned ntz(unsigned x)
{
#ifdef USE_X86_ASSEMBLY
unsigned res;
#if defined(__GNUC__) && __GNUC__ >= 4
if(x == 0)
return 32;
__asm__("bsfl %1,%0"
: "=r" (res)
: "r" (x));
return res;
return __builtin_ctz(x);
#else
return HACKDEL_WORDSIZE - nlz(~x & (x - 1));
#endif
......@@ -163,7 +157,7 @@ static inline unsigned ntz(unsigned x)
*/
static inline unsigned floor_po2(unsigned x)
{
#ifdef USE_X86_ASSEMBLY // in this case nlz is fast
#if defined(__GNUC__) && __GNUC__ >= 4 // in this case nlz is fast
if(x == 0)
return 0;
// note that x != 0 here, so nlz(x) < 32!
......@@ -189,7 +183,7 @@ static inline unsigned ceil_po2(unsigned x)
return 0;
assert(x < (1U << 31));
#ifdef USE_X86_ASSEMBLY // in this case nlz is fast
#if defined(__GNUC__) && __GNUC__ >= 4 // in this case nlz is fast
// note that x != 0 here!
return 0x80000000U >> (nlz(x-1) - 1);
#else
......
......@@ -313,7 +313,7 @@ static inline bitset_pos_t _bitset_next(const bitset_t *bs,
* @param bs The bitset.
* @return The number of bits set in the bitset.
*/
static inline unsigned bitset_popcnt(const bitset_t *bs)
static inline unsigned bitset_popcount(const bitset_t *bs)
{
bitset_pos_t i;
bitset_unit_t *unit;
......
......@@ -110,7 +110,7 @@ static inline unsigned _bitset_std_inside_ntz(bitset_unit_t *unit_ptr)
* @param unit_ptr The pointer to a unit.
* @return The number of bits set in the unit.
*/
#define _bitset_inside_pop(unit_ptr) (popcnt(*unit_ptr))
#define _bitset_inside_pop(unit_ptr) (popcount(*unit_ptr))
#define _BITSET_BINOP_UNITS_INC 1
......
......@@ -42,6 +42,7 @@
#include <assert.h>
#include "bitset.h"
#include "bitfiddle.h"
#include "obst.h"
/** The base type for raw bitsets. */
......@@ -218,16 +219,14 @@ static inline int rbitset_is_set(const unsigned *bitset, unsigned pos)
* @param bitset the bitset
* @param size size of the bitset
*/
static inline unsigned rbitset_popcnt(const unsigned *bitset, unsigned size)
static inline unsigned rbitset_popcount(const unsigned *bitset, unsigned size)
{
unsigned pos;
unsigned n = BITSET_SIZE_ELEMS(size);
unsigned res = 0;
const unsigned *elem = bitset;
for (pos = 0; pos < n; ++pos) {
res += _bitset_inside_pop(elem);
elem++;
res += popcount(bitset[pos]);
}
return res;
......@@ -264,7 +263,7 @@ static inline unsigned rbitset_next(const unsigned *bitset, unsigned pos, int se
if (!set)
mask = ~mask;
elem ^= mask;
p = _bitset_inside_ntz_value(elem & ~in_elem_mask);
p = ntz(elem & ~in_elem_mask);
/* If there is a bit set in the current elem, exit. */
if (p < BITS_PER_ELEM) {
......@@ -276,7 +275,7 @@ static inline unsigned rbitset_next(const unsigned *bitset, unsigned pos, int se
elem_pos++;
elem = bitset[elem_pos] ^ mask;
p = _bitset_inside_ntz_value(elem);
p = ntz(elem);
if (p < BITS_PER_ELEM) {
return elem_pos * BITS_PER_ELEM + p;
}
......
......@@ -167,7 +167,7 @@ static void pair_up_operands(const be_chordal_alloc_env_t *alloc_env, be_insn_t
bitset_clear_all(bs);
bitset_copy(bs, op->regs);
bitset_and(bs, out_op->regs);
n_total = bitset_popcnt(op->regs) + bitset_popcnt(out_op->regs);
n_total = bitset_popcount(op->regs) + bitset_popcount(out_op->regs);
if (!bitset_is_empty(bs) && n_total < smallest_n_regs) {
smallest = i;
......
......@@ -477,7 +477,7 @@ no_stable_set:
}
/* transfer the best set into the qn */
qn->mis_size = 1+safe_count+bitset_popcnt(best);
qn->mis_size = 1+safe_count+bitset_popcount(best);
qn->mis_costs = safe_costs+best_weight;
qn->mis[0] = ou->nodes[0]; /* the root is always in a max stable set */
next = 1;
......
......@@ -300,7 +300,7 @@ static void incur_constraint_costs(co2_t *env, const ir_node *irn, col_cost_pair
unsigned n_constr = 0;
unsigned i;
n_constr = rbitset_popcnt(req->limited, n_regs);
n_constr = rbitset_popcount(req->limited, n_regs);
for (i = 0; i < n_regs; ++i) {
if (rbitset_is_set(req->limited, i)) {
col_costs[i].costs = add_saturated(col_costs[i].costs, costs / n_constr);
......@@ -629,7 +629,7 @@ static void node_color_badness(co2_cloud_irn_t *ci, int *badness)
co2_irn_t *ni = get_co2_irn(env, irn);
admissible_colors(env, ni, bs);
if (bitset_popcnt(bs) == 1) {
if (bitset_popcount(bs) == 1) {
bitset_pos_t c = bitset_next_set(bs, 0);
badness[c] += ci->costs;
}
......@@ -796,7 +796,7 @@ static void populate_cloud(co2_t *env, co2_cloud_t *cloud, affinity_node_t *a, i
ci->costs = costs;
cloud->costs += costs;
cloud->n_constr += is_constrained(env, &ci->inh);
cloud->freedom += bitset_popcnt(get_adm(env, &ci->inh));
cloud->freedom += bitset_popcount(get_adm(env, &ci->inh));
cloud->max_degree = MAX(cloud->max_degree, ci->inh.aff->degree);
cloud->n_memb++;
......
......@@ -174,7 +174,7 @@ static void dbg_admissible_colors(const co_mst_env_t *env, const co_mst_irn_t *n
bitset_pos_t idx;
(void) env;
if (bitset_popcnt(node->adm_colors) < 1)
if (bitset_popcount(node->adm_colors) < 1)
fprintf(stderr, "no admissible colors?!?");
else {
bitset_foreach(node->adm_colors, idx) {
......@@ -405,7 +405,7 @@ static void *co_mst_irn_init(ir_phase *ph, const ir_node *irn, void *old)
bitset_andnot(res->adm_colors, env->ignore_regs);
/* compute the constraint factor */
res->constr_factor = (real_t) (1 + env->n_regs - bitset_popcnt(res->adm_colors)) / env->n_regs;
res->constr_factor = (real_t) (1 + env->n_regs - bitset_popcount(res->adm_colors)) / env->n_regs;
/* set the number of interfering affinity neighbours to -1, they are calculated later */
res->int_aff_neigh = -1;
......@@ -588,7 +588,7 @@ static void aff_chunk_assure_weight(co_mst_env_t *env, aff_chunk_t *c)
c->color_affinity[i].cost *= (REAL(1.0) / ARR_LEN(c->n));
c->weight = w;
// c->weight = bitset_popcnt(c->nodes);
// c->weight = bitset_popcount(c->nodes);
c->weight_consistent = 1;
}
}
......
......@@ -579,7 +579,7 @@ int co_solve_ilp2(copy_opt_t *co)
my.normal_colors = bitset_alloca(arch_register_class_n_regs(co->cls));
bitset_clear_all(my.normal_colors);
arch_put_non_ignore_regs(co->cls, my.normal_colors);
my.n_colors = bitset_popcnt(my.normal_colors);
my.n_colors = bitset_popcount(my.normal_colors);
ienv = new_ilp_env(co, ilp2_build, ilp2_apply, &my);
......
......@@ -363,7 +363,7 @@ static int ou_max_ind_set_costs(unit_t *ou)
/* Exact Algorithm: Brute force */
curr = bitset_alloca(unsafe_count);
bitset_set_all(curr);
while ((max = bitset_popcnt(curr)) != 0) {
while ((max = bitset_popcount(curr)) != 0) {
/* check if curr is a stable set */
for (i=bitset_next_set(curr, 0); i!=-1; i=bitset_next_set(curr, i+1))
for (o=bitset_next_set(curr, i+1); o!=-1; o=bitset_next_set(curr, o+1)) /* !!!!! difference to qnode_max_ind_set(): NOT (curr, i) */
......
......@@ -256,7 +256,7 @@ static void find_neighbour_walker(ir_node *bl, void *data)
last_element.element = get_last_sub_clique(ifg, live, my_live, irn);
/* check and add still living nodes */
if (bitset_popcnt(my_live) > 1)
if (bitset_popcount(my_live) > 1)
{
if (last_element.element)
{
......@@ -309,7 +309,7 @@ static void find_neighbour_walker(ir_node *bl, void *data)
}
else
{
if (bitset_popcnt(my_live) == 1) /* there is only one node left */
if (bitset_popcount(my_live) == 1) /* there is only one node left */
{
if (last_element.element)
{
......
......@@ -930,5 +930,5 @@ unsigned be_put_ignore_regs(const be_irg_t *birg, const arch_register_class_t *c
bitset_flip_all(bs);
be_abi_put_ignore_regs(birg->abi, cls, bs);
return bitset_popcnt(bs);
return bitset_popcount(bs);
}
......@@ -252,7 +252,7 @@ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
return;
penalty *= NEIGHBOR_FACTOR;
n_allowed = rbitset_popcnt(limited, n_regs);
n_allowed = rbitset_popcount(limited, n_regs);
if (n_allowed > 1) {
/* only create a very weak penalty if multiple regs are allowed */
penalty = (penalty * 0.8f) / n_allowed;
......@@ -314,7 +314,7 @@ static void check_defs(const ir_nodeset_t *live_nodes, float weight,
int arity = get_irn_arity(insn);
int i;
float factor = 1.0f / rbitset_popcnt(&req->other_same, arity);
float factor = 1.0f / rbitset_popcount(&req->other_same, arity);
for (i = 0; i < arity; ++i) {
ir_node *op;
unsigned r;
......
......@@ -1894,14 +1894,14 @@ static serialization_t *compute_best_admissible_serialization(rss_t *rss, ir_nod
be simultaneously alive with u
*/
bitset_copy(bs_tmp, bs_vdesc);
mu1 = bitset_popcnt(bitset_and(bs_tmp, bs_sv));
mu1 = bitset_popcount(bitset_and(bs_tmp, bs_sv));
/*
mu2 = | accum_desc_all_pkiller(u) without descendants(v) |
*/
if (is_pkiller) {
bitset_copy(bs_tmp, bs_ukilldesc);
mu2 = bitset_popcnt(bitset_andnot(bs_tmp, bs_vdesc));
mu2 = bitset_popcount(bitset_andnot(bs_tmp, bs_vdesc));
}
else {
mu2 = 0;
......@@ -2000,7 +2000,7 @@ static void perform_value_serialization_heuristic(rss_t *rss)
arch_put_non_ignore_regs(rss->cls, arch_nonign_bs);
be_abi_put_ignore_regs(rss->abi, rss->cls, abi_ign_bs);
bitset_andnot(arch_nonign_bs, abi_ign_bs);
available_regs = bitset_popcnt(arch_nonign_bs);
available_regs = bitset_popcount(arch_nonign_bs);
//num_live = pset_count(rss->live_block);
//available_regs -= num_live < available_regs ? num_live : 0;
......
......@@ -1743,7 +1743,7 @@ int tarval_is_single_bit(tarval *tv)
/*
* Return the number of set bits in a given (integer) tarval.
*/
int get_tarval_popcnt(tarval *tv)
int get_tarval_popcount(tarval *tv)
{
int i, l;
int bits;
......@@ -1755,7 +1755,7 @@ int get_tarval_popcnt(tarval *tv)
for (bits = 0, i = l - 1; i >= 0; --i) {
unsigned char v = get_tarval_sub_bits(tv, (unsigned)i);
bits += popcnt(v);
bits += popcount(v);
}
return bits;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment