Commit ad82bcf8 authored by Matthias Braun's avatar Matthias Braun
Browse files

add compare and swap builtin, implement it for ia32, sparc(leon)

parent 7081df40
...@@ -348,7 +348,8 @@ typedef enum ir_builtin_kind { ...@@ -348,7 +348,8 @@ typedef enum ir_builtin_kind {
ir_bk_outport, /**< out port */ ir_bk_outport, /**< out port */
ir_bk_inner_trampoline, /**< address of a trampoline for GCC inner functions */ ir_bk_inner_trampoline, /**< address of a trampoline for GCC inner functions */
ir_bk_saturating_increment, /**< saturating increment */ ir_bk_saturating_increment, /**< saturating increment */
ir_bk_last = ir_bk_saturating_increment, ir_bk_compare_swap, /**< compare exchange (aka. compare and swap) */
ir_bk_last = ir_bk_compare_swap,
} ir_builtin_kind; } ir_builtin_kind;
/** /**
......
...@@ -1222,6 +1222,7 @@ static ir_node *gen_Builtin(ir_node *node) ...@@ -1222,6 +1222,7 @@ static ir_node *gen_Builtin(ir_node *node)
case ir_bk_inport: case ir_bk_inport:
case ir_bk_inner_trampoline: case ir_bk_inner_trampoline:
case ir_bk_saturating_increment: case ir_bk_saturating_increment:
case ir_bk_compare_swap:
break; break;
} }
panic("Builtin %s not implemented", get_builtin_kind_name(kind)); panic("Builtin %s not implemented", get_builtin_kind_name(kind));
...@@ -1256,6 +1257,7 @@ static ir_node *gen_Proj_Builtin(ir_node *proj) ...@@ -1256,6 +1257,7 @@ static ir_node *gen_Proj_Builtin(ir_node *proj)
case ir_bk_inport: case ir_bk_inport:
case ir_bk_inner_trampoline: case ir_bk_inner_trampoline:
case ir_bk_saturating_increment: case ir_bk_saturating_increment:
case ir_bk_compare_swap:
break; break;
} }
panic("Builtin %s not implemented", get_builtin_kind_name(kind)); panic("Builtin %s not implemented", get_builtin_kind_name(kind));
......
...@@ -1822,6 +1822,8 @@ static void ia32_lower_for_target(void) ...@@ -1822,6 +1822,8 @@ static void ia32_lower_for_target(void)
supported[s++] = ir_bk_saturating_increment; supported[s++] = ir_bk_saturating_increment;
if (ia32_cg_config.use_popcnt) if (ia32_cg_config.use_popcnt)
supported[s++] = ir_bk_popcount; supported[s++] = ir_bk_popcount;
if (ia32_cg_config.use_cmpxchg)
supported[s++] = ir_bk_compare_swap;
assert(s < ARRAY_SIZE(supported)); assert(s < ARRAY_SIZE(supported));
lower_builtins(s, supported); lower_builtins(s, supported);
......
...@@ -899,6 +899,7 @@ void ia32_setup_cg_config(void) ...@@ -899,6 +899,7 @@ void ia32_setup_cg_config(void)
c->use_3dnow_prefetch = flags(arch, arch_feature_3DNow); c->use_3dnow_prefetch = flags(arch, arch_feature_3DNow);
c->use_popcnt = flags(arch, arch_feature_popcnt); c->use_popcnt = flags(arch, arch_feature_popcnt);
c->use_bswap = (arch & arch_mask) >= arch_i486; c->use_bswap = (arch & arch_mask) >= arch_i486;
c->use_cmpxchg = (arch & arch_mask) != arch_i386;
c->optimize_cc = opt_cc; c->optimize_cc = opt_cc;
c->use_unsafe_floatconv = opt_unsafe_floatconv; c->use_unsafe_floatconv = opt_unsafe_floatconv;
c->emit_machcode = emit_machcode; c->emit_machcode = emit_machcode;
......
...@@ -67,6 +67,8 @@ typedef struct { ...@@ -67,6 +67,8 @@ typedef struct {
unsigned use_popcnt:1; unsigned use_popcnt:1;
/** use i486 instructions */ /** use i486 instructions */
unsigned use_bswap:1; unsigned use_bswap:1;
/** use cmpxchg */
unsigned use_cmpxchg:1;
/** optimize calling convention where possible */ /** optimize calling convention where possible */
unsigned optimize_cc:1; unsigned optimize_cc:1;
/** /**
......
...@@ -1251,6 +1251,18 @@ Bswap16 => { ...@@ -1251,6 +1251,18 @@ Bswap16 => {
mode => $mode_gp, mode => $mode_gp,
}, },
CmpXChgMem => {
irn_flags => [ "rematerializable" ],
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "eax", "gp" ],
out => [ "eax", "flags", "none" ] },
ins => [ "base", "index", "mem", "old", "new" ],
outs => [ "res", "flags", "M" ],
emit => "lock cmpxchg%M %#S4, %AM",
latency => 2,
modified_flags => $status_flags
},
# #
# BreakPoint # BreakPoint
# #
......
...@@ -4975,6 +4975,34 @@ static ir_node *gen_saturating_increment(ir_node *node) ...@@ -4975,6 +4975,34 @@ static ir_node *gen_saturating_increment(ir_node *node)
return sbb; return sbb;
} }
static ir_node *gen_compare_swap(ir_node *node)
{
dbg_info *dbgi = get_irn_dbg_info(node);
ir_node *block = be_transform_node(get_nodes_block(node));
ir_node *ptr = get_Builtin_param(node, 0);
ir_node *old = get_Builtin_param(node, 1);
ir_node *new = get_Builtin_param(node, 2);
ir_node *mem = get_Builtin_mem(node);
ir_node *new_old = be_transform_node(old);
ir_node *new_new = be_transform_node(new);
ir_node *new_mem = be_transform_node(mem);
ir_mode *mode = get_irn_mode(new);
assert(get_irn_mode(old) == mode);
ia32_address_t addr;
create_transformed_address_mode(&addr, ptr, ia32_create_am_normal);
ir_node *base = addr.base;
ir_node *idx = addr.index;
ir_node *new_node = new_bd_ia32_CmpXChgMem(dbgi, block, base, idx, new_mem,
new_old, new_new);
set_irn_pinned(new_node, get_irn_pinned(node));
set_ia32_op_type(new_node, ia32_AddrModeD);
set_ia32_ls_mode(new_node, mode);
set_address(new_node, &addr);
SET_IA32_ORIG_NODE(new_node, node);
return new_node;
}
/** /**
* Transform a builtin inner trampoline * Transform a builtin inner trampoline
*/ */
...@@ -5085,6 +5113,8 @@ static ir_node *gen_Builtin(ir_node *node) ...@@ -5085,6 +5113,8 @@ static ir_node *gen_Builtin(ir_node *node)
return gen_inner_trampoline(node); return gen_inner_trampoline(node);
case ir_bk_saturating_increment: case ir_bk_saturating_increment:
return gen_saturating_increment(node); return gen_saturating_increment(node);
case ir_bk_compare_swap:
return gen_compare_swap(node);
} }
panic("Builtin %s not implemented", get_builtin_kind_name(kind)); panic("Builtin %s not implemented", get_builtin_kind_name(kind));
} }
...@@ -5130,6 +5160,14 @@ static ir_node *gen_Proj_Builtin(ir_node *proj) ...@@ -5130,6 +5160,14 @@ static ir_node *gen_Proj_Builtin(ir_node *proj)
assert(get_Proj_proj(proj) == pn_Builtin_M); assert(get_Proj_proj(proj) == pn_Builtin_M);
return get_Tuple_pred(new_node, 0); return get_Tuple_pred(new_node, 0);
} }
case ir_bk_compare_swap:
assert(is_ia32_CmpXChgMem(new_node));
if (get_Proj_proj(proj) == pn_Builtin_M) {
return new_r_Proj(new_node, mode_M, pn_ia32_CmpXChgMem_M);
} else {
assert(get_Proj_proj(proj) == pn_Builtin_max+1);
return new_r_Proj(new_node, mode_Iu, pn_ia32_CmpXChgMem_res);
}
} }
panic("Builtin %s not implemented", get_builtin_kind_name(kind)); panic("Builtin %s not implemented", get_builtin_kind_name(kind));
} }
......
...@@ -425,19 +425,24 @@ static void sparc_handle_intrinsics(void) ...@@ -425,19 +425,24 @@ static void sparc_handle_intrinsics(void)
static void sparc_setup_cg_config(void) static void sparc_setup_cg_config(void)
{ {
bool has_fpu; memset(&sparc_cg_config, 0, sizeof(sparc_cg_config));
bool has_fpu = false;
switch ((sparc_cpu_t)cpu) { switch ((sparc_cpu_t)cpu) {
case cpu_v8plus:
sparc_cg_config.use_cas = true;
has_fpu = false;
break;
case cpu_leon: case cpu_leon:
sparc_cg_config.use_cas = true;
has_fpu = true;
break;
case cpu_hypersparc: case cpu_hypersparc:
has_fpu = true; has_fpu = true;
break; break;
case cpu_v8plus:
case cpu_supersparc: case cpu_supersparc:
case cpu_generic: case cpu_generic:
has_fpu = false; has_fpu = false;
break; break;
default:
panic("sparc: invalid architecture selected");
} }
if (use_softfloat) if (use_softfloat)
...@@ -503,6 +508,8 @@ static void sparc_lower_for_target(void) ...@@ -503,6 +508,8 @@ static void sparc_lower_for_target(void)
ir_builtin_kind supported[8]; ir_builtin_kind supported[8];
size_t s = 0; size_t s = 0;
supported[s++] = ir_bk_saturating_increment; supported[s++] = ir_bk_saturating_increment;
if (sparc_cg_config.use_cas)
supported[s++] = ir_bk_compare_swap;
assert(s < ARRAY_SIZE(supported)); assert(s < ARRAY_SIZE(supported));
lower_builtins(s, supported); lower_builtins(s, supported);
......
...@@ -20,6 +20,7 @@ typedef struct calling_convention_t calling_convention_t; ...@@ -20,6 +20,7 @@ typedef struct calling_convention_t calling_convention_t;
typedef struct sparc_codegen_config_t { typedef struct sparc_codegen_config_t {
bool use_fpu; bool use_fpu;
bool use_cas;
} sparc_codegen_config_t; } sparc_codegen_config_t;
extern sparc_codegen_config_t sparc_cg_config; extern sparc_codegen_config_t sparc_cg_config;
......
...@@ -686,6 +686,12 @@ void sparc_emitf(ir_node const *const node, char const *fmt, ...) ...@@ -686,6 +686,12 @@ void sparc_emitf(ir_node const *const node, char const *fmt, ...)
break; break;
} }
case 'X': {
unsigned const num = va_arg(ap, unsigned);
be_emit_irprintf("%X", num);
break;
}
default: default:
unknown: unknown:
panic("unknown format conversion in sparc_emitf()"); panic("unknown format conversion in sparc_emitf()");
...@@ -855,6 +861,31 @@ static void emit_sparc_Call(const ir_node *node) ...@@ -855,6 +861,31 @@ static void emit_sparc_Call(const ir_node *node)
} }
} }
static void emit_sparc_Cas(const ir_node *node)
{
/* custom emiter is just here to check for should_be_same constraint
* which isn't guaranteed to be fulfilled in current firm backend */
if (arch_get_irn_register_out(node, pn_sparc_Cas_res) !=
arch_get_irn_register_in(node, n_sparc_Cas_new)) {
panic("sparc: should_be_same in Cas not fulfilled");
}
/* except for some patched gaisler binutils nobody understands cas
* in v8/leon mode, so we encode the cas in binary form */
#if 0
sparc_emitf(node, "cas [%S0], %S1, %S2");
#else
const arch_register_t *reg_new
= arch_get_irn_register_in(node, n_sparc_Cas_new);
const arch_register_t *reg_ptr
= arch_get_irn_register_in(node, n_sparc_Cas_ptr);
const arch_register_t *reg_old
= arch_get_irn_register_in(node, n_sparc_Cas_old);
uint32_t encoding = 3u<<30 | (reg_new->encoding<<25) | (0x3C << 19)
| (reg_ptr->encoding<<14) | (0x80<<5) | (reg_old->encoding);
sparc_emitf(node, ".long 0x%X /* cas [%S0], %S1, %S2", encoding);
#endif
}
static void emit_be_Perm(const ir_node *irn) static void emit_be_Perm(const ir_node *irn)
{ {
ir_mode *mode = get_irn_mode(get_irn_n(irn, 0)); ir_mode *mode = get_irn_mode(get_irn_n(irn, 0));
...@@ -1350,6 +1381,7 @@ static void sparc_register_emitters(void) ...@@ -1350,6 +1381,7 @@ static void sparc_register_emitters(void)
be_set_emitter(op_sparc_Ba, emit_sparc_Ba); be_set_emitter(op_sparc_Ba, emit_sparc_Ba);
be_set_emitter(op_sparc_Bicc, emit_sparc_Bicc); be_set_emitter(op_sparc_Bicc, emit_sparc_Bicc);
be_set_emitter(op_sparc_Call, emit_sparc_Call); be_set_emitter(op_sparc_Call, emit_sparc_Call);
be_set_emitter(op_sparc_Cas, emit_sparc_Cas);
be_set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr); be_set_emitter(op_sparc_FrameAddr, emit_sparc_FrameAddr);
be_set_emitter(op_sparc_Restore, emit_sparc_Restore); be_set_emitter(op_sparc_Restore, emit_sparc_Restore);
be_set_emitter(op_sparc_Return, emit_sparc_Return); be_set_emitter(op_sparc_Return, emit_sparc_Return);
......
...@@ -40,6 +40,87 @@ ...@@ -40,6 +40,87 @@
#include "bespillslots.h" #include "bespillslots.h"
#include "bestack.h" #include "bestack.h"
#include "beirgmod.h" #include "beirgmod.h"
#include "error.h"
static int get_first_same(const arch_register_req_t *req)
{
const unsigned other = req->other_same;
for (int i = 0; i < 32; ++i) {
if (other & (1U << i))
return i;
}
panic("same position not found");
}
/**
* Insert copies for all ia32 nodes where the should_be_same requirement
* is not fulfilled.
* Transform Sub into Neg -- Add if IN2 == OUT
*/
static void assure_should_be_same_requirements(ir_node *node)
{
ir_node *block = get_nodes_block(node);
/* check all OUT requirements, if there is a should_be_same */
be_foreach_out(node, i) {
const arch_register_req_t *req = arch_get_irn_register_req_out(node, i);
if (!arch_register_req_is(req, should_be_same))
continue;
int same_pos = get_first_same(req);
/* get in and out register */
const arch_register_t *out_reg = arch_get_irn_register_out(node, i);
ir_node *in_node = get_irn_n(node, same_pos);
const arch_register_t *in_reg = arch_get_irn_register(in_node);
/* requirement already fulfilled? */
if (in_reg == out_reg)
continue;
assert(in_reg->reg_class == out_reg->reg_class);
/* check if any other input operands uses the out register */
ir_node *uses_out_reg = NULL;
int uses_out_reg_pos = -1;
for (int i2 = 0, arity = get_irn_arity(node); i2 < arity; ++i2) {
ir_node *in = get_irn_n(node, i2);
if (!mode_is_data(get_irn_mode(in)))
continue;
const arch_register_t *other_in_reg = arch_get_irn_register(in);
if (other_in_reg != out_reg)
continue;
if (uses_out_reg != NULL && in != uses_out_reg) {
panic("invalid register allocation");
}
uses_out_reg = in;
if (uses_out_reg_pos >= 0)
uses_out_reg_pos = -1; /* multiple inputs... */
else
uses_out_reg_pos = i2;
}
/* no-one else is using the out reg, we can simply copy it
* (the register can't be live since the operation will override it
* anyway) */
if (uses_out_reg == NULL) {
ir_node *copy = be_new_Copy(block, in_node);
/* destination is the out register */
arch_set_irn_register(copy, out_reg);
/* insert copy before the node into the schedule */
sched_add_before(node, copy);
/* set copy as in */
set_irn_n(node, same_pos, copy);
continue;
}
panic("Unresolved should_be_same constraint");
}
}
static ir_heights_t *heights; static ir_heights_t *heights;
...@@ -642,6 +723,23 @@ static void sparc_set_frame_entity(ir_node *node, ir_entity *entity) ...@@ -642,6 +723,23 @@ static void sparc_set_frame_entity(ir_node *node, ir_entity *entity)
} }
} }
/** returns true if the should_be_same constraints of a node must be
* fulfilled */
static bool has_must_be_same(const ir_node *node)
{
return is_sparc_Cas(node);
}
static void fix_constraints_walker(ir_node *block, void *env)
{
(void)env;
sched_foreach_safe(block, irn) {
if (!has_must_be_same(irn))
continue;
assure_should_be_same_requirements(irn);
}
}
void sparc_finish_graph(ir_graph *irg) void sparc_finish_graph(ir_graph *irg)
{ {
be_stack_layout_t *stack_layout = be_get_irg_stack_layout(irg); be_stack_layout_t *stack_layout = be_get_irg_stack_layout(irg);
...@@ -686,5 +784,7 @@ void sparc_finish_graph(ir_graph *irg) ...@@ -686,5 +784,7 @@ void sparc_finish_graph(ir_graph *irg)
heights_free(heights); heights_free(heights);
irg_block_walk_graph(irg, NULL, fix_constraints_walker, NULL);
be_remove_dead_nodes_from_schedule(irg); be_remove_dead_nodes_from_schedule(irg);
} }
...@@ -726,6 +726,28 @@ UDiv => { ...@@ -726,6 +726,28 @@ UDiv => {
constructors => \%div_operand_constructors, constructors => \%div_operand_constructors,
}, },
Stbar => {
op_flags => [ "uses_memory" ],
state => "exc_pinned",
ins => [ "mem" ],
outs => [ "M" ],
reg_req => { in => [ "none" ], out => [ "none" ] },
emit => "stbar",
mode => "mode_M",
},
Cas => {
op_flags => [ "uses_memory" ],
state => "exc_pinned",
ins => [ "ptr", "old", "new", "mem" ],
outs => [ "res", "M" ],
reg_req => { in => [ "gp", "gp", "gp", "none" ],
out => [ "in_r3", "none" ] },
# TODO: we need a must-be-same constraint for the CAS
# for now we use a custom emitter which at least panics if constraints
# are not fulfilled
},
fcmp => { fcmp => {
irn_flags => [ "rematerializable" ], irn_flags => [ "rematerializable" ],
emit => "fcmp%FM %S0, %S1", emit => "fcmp%FM %S0, %S1",
......
...@@ -2388,6 +2388,35 @@ static ir_node *gen_saturating_increment(ir_node *node) ...@@ -2388,6 +2388,35 @@ static ir_node *gen_saturating_increment(ir_node *node)
return sbb; return sbb;
} }
static ir_node *gen_compare_swap(ir_node *node)
{
dbg_info *dbgi = get_irn_dbg_info(node);
ir_node *block = be_transform_node(get_nodes_block(node));
ir_node *ptr = get_Builtin_param(node, 0);
ir_node *new_ptr = be_transform_node(ptr);
ir_node *old = get_Builtin_param(node, 1);
ir_node *new_old = be_transform_node(old);
ir_node *new = get_Builtin_param(node, 2);
ir_node *new_new = be_transform_node(new);
ir_node *mem = get_Builtin_mem(node);
ir_node *new_mem = be_transform_node(mem);
ir_node *stbar = new_bd_sparc_Stbar(dbgi, block, new_mem);
ir_node *cas = new_bd_sparc_Cas(dbgi, block, new_ptr, new_old,
new_new, stbar);
op_pin_state pinned = get_irn_pinned(node);
set_irn_pinned(stbar, pinned);
set_irn_pinned(cas, pinned);
ir_mode *mode = get_irn_mode(old);
assert(get_irn_mode(new) == mode);
if ((!mode_is_int(mode) && !mode_is_reference(mode))
|| get_mode_size_bits(mode) != 32) {
panic("sparc: compare and swap only allowed for 32bit values");
}
return cas;
}
/** /**
* Transform Builtin node. * Transform Builtin node.
*/ */
...@@ -2396,22 +2425,26 @@ static ir_node *gen_Builtin(ir_node *node) ...@@ -2396,22 +2425,26 @@ static ir_node *gen_Builtin(ir_node *node)
ir_builtin_kind kind = get_Builtin_kind(node); ir_builtin_kind kind = get_Builtin_kind(node);
switch (kind) { switch (kind) {
case ir_bk_trap: case ir_bk_bswap:
case ir_bk_debugbreak:
case ir_bk_return_address:
case ir_bk_frame_address:
case ir_bk_prefetch:
case ir_bk_ffs:
case ir_bk_clz: case ir_bk_clz:
case ir_bk_ctz: case ir_bk_ctz:
case ir_bk_ffs:
case ir_bk_parity: case ir_bk_parity:
case ir_bk_popcount: case ir_bk_popcount:
case ir_bk_bswap: case ir_bk_prefetch:
panic("builtin not lowered(%+F)", node);
case ir_bk_trap:
case ir_bk_debugbreak:
case ir_bk_return_address:
case ir_bk_frame_address:
case ir_bk_outport: case ir_bk_outport:
case ir_bk_inport: case ir_bk_inport:
case ir_bk_inner_trampoline: case ir_bk_inner_trampoline:
/* Should not occur in backend. */ /* not supported */
break; break;
case ir_bk_compare_swap:
return gen_compare_swap(node);
case ir_bk_saturating_increment: case ir_bk_saturating_increment:
return gen_saturating_increment(node); return gen_saturating_increment(node);
} }
...@@ -2423,9 +2456,10 @@ static ir_node *gen_Builtin(ir_node *node) ...@@ -2423,9 +2456,10 @@ static ir_node *gen_Builtin(ir_node *node)
*/ */
static ir_node *gen_Proj_Builtin(ir_node *proj) static ir_node *gen_Proj_Builtin(ir_node *proj)
{ {
ir_node *node = get_Proj_pred(proj); ir_node *pred = get_Proj_pred(proj);
ir_node *new_node = be_transform_node(node); ir_node *new_pred = be_transform_node(pred);
ir_builtin_kind kind = get_Builtin_kind(node); ir_builtin_kind kind = get_Builtin_kind(pred);
long pn = get_Proj_proj(proj);
switch (kind) { switch (kind) {
case ir_bk_return_address: case ir_bk_return_address:
...@@ -2442,11 +2476,18 @@ static ir_node *gen_Proj_Builtin(ir_node *proj) ...@@ -2442,11 +2476,18 @@ static ir_node *gen_Proj_Builtin(ir_node *proj)
case ir_bk_outport: case ir_bk_outport:
case ir_bk_inport: case ir_bk_inport:
case ir_bk_inner_trampoline: case ir_bk_inner_trampoline:
/* Should not occur in backend. */ /* not supported / should be lowered */
break; break;
case ir_bk_saturating_increment: case ir_bk_saturating_increment:
assert(get_Proj_proj(proj) == pn_Builtin_max+1); assert(pn == pn_Builtin_max+1);
return new_node; return new_pred;
case ir_bk_compare_swap:
if (pn == pn_Builtin_M) {
return new_r_Proj(new_pred, mode_M, pn_sparc_Cas_M);
} else {
assert(pn == pn_Builtin_max+1);
return new_r_Proj(new_pred, mode_gp, pn_sparc_Cas_res);
}
} }
panic("Builtin %s not implemented", get_builtin_kind_name(kind)); panic("Builtin %s not implemented", get_builtin_kind_name(kind));