Commit bed91301 authored by Christoph Mallon's avatar Christoph Mallon
Browse files

be: Wire stack nodes after code selection.

This resolves problems with hidden dependencies during code selection, which cause dependency cycles and therefore fixes backend/scheduled.c.
Also it grants the code selection slightly more freedom by not arbitrarily restricting the order by stack dependencies.
parent f32dbcf9
......@@ -245,6 +245,7 @@ sub_sp => {
state => "pinned",
in_reqs => "...",
out_reqs => [ "rsp:I", "gp", "mem" ],
ins => [ "stack" ],
outs => [ "stack", "addr", "M" ],
attr_type => "amd64_binop_addr_attr_t",
attr => "const amd64_binop_addr_attr_t *attr_init",
......
......@@ -37,10 +37,9 @@
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
static ir_mode *mode_gp;
static x86_cconv_t *current_cconv = NULL;
static pmap *node_to_stack;
static be_stackorder_t *stackorder;
static ir_mode *mode_gp;
static x86_cconv_t *current_cconv = NULL;
static be_stack_env_t stack_env;
/** we don't have a concept of aliasing registers, so enumerate them
* manually for the asm nodes. */
......@@ -1448,26 +1447,6 @@ static ir_node *gen_Proj_Start(ir_node *node)
panic("unexpected Start Proj: %u", pn);
}
static ir_node *get_stack_pointer_for(ir_node *node)
{
/* get predecessor in stack_order list */
ir_node *stack_pred = be_get_stack_pred(stackorder, node);
if (stack_pred == NULL) {
/* first stack user in the current block. We can simply use the
* initial sp_proj for it */
ir_graph *irg = get_irn_irg(node);
return get_initial_sp(irg);
}
be_transform_node(stack_pred);
ir_node *stack = pmap_get(ir_node, node_to_stack, stack_pred);
if (stack == NULL) {
return get_stack_pointer_for(stack_pred);
}
return stack;
}
static ir_node *gen_Return(ir_node *node)
{
ir_graph *irg = get_irn_irg(node);
......@@ -1475,7 +1454,6 @@ static ir_node *gen_Return(ir_node *node)
dbg_info *dbgi = get_irn_dbg_info(node);
ir_node *mem = get_Return_mem(node);
ir_node *new_mem = be_transform_node(mem);
ir_node *sp = get_stack_pointer_for(node);
size_t n_res = get_Return_n_ress(node);
x86_cconv_t *cconv = current_cconv;
......@@ -1490,7 +1468,7 @@ static ir_node *gen_Return(ir_node *node)
in[n_amd64_ret_mem] = new_mem;
reqs[n_amd64_ret_mem] = arch_memory_req;
in[n_amd64_ret_stack] = sp;
in[n_amd64_ret_stack] = get_initial_sp(irg);
reqs[n_amd64_ret_stack] = amd64_registers[REG_RSP].single_req;
/* result values */
......@@ -1514,6 +1492,7 @@ static ir_node *gen_Return(ir_node *node)
assert(p == n_ins);
ir_node *const ret = new_bd_amd64_ret(dbgi, new_block, n_ins, in, reqs);
be_stack_record_chain(&stack_env, ret, n_amd64_ret_stack, NULL);
return ret;
}
......@@ -1538,7 +1517,6 @@ static ir_node *gen_Call(ir_node *node)
ir_node **in = ALLOCAN(ir_node*, max_inputs);
int in_arity = 0;
int sync_arity = 0;
ir_node *new_frame = get_stack_pointer_for(node);
assert(n_params == cconv->n_parameters);
......@@ -1547,10 +1525,8 @@ static ir_node *gen_Call(ir_node *node)
/* stack pointer input */
/* construct an IncSP -> we have to always be sure that the stack is
* aligned even if we don't push arguments on it */
const arch_register_t *sp_reg = &amd64_registers[REG_RSP];
ir_node *incsp = amd64_new_IncSP(new_block, new_frame,
cconv->callframe_size,
AMD64_PO2_STACK_ALIGNMENT);
ir_node *const stack = get_initial_sp(irg);
ir_node *const callframe = amd64_new_IncSP(new_block, stack, cconv->callframe_size, AMD64_PO2_STACK_ALIGNMENT);
/* match callee */
amd64_addr_t addr;
......@@ -1620,8 +1596,8 @@ static ir_node *gen_Call(ir_node *node)
sync_ins[sync_arity++] = be_transform_node(mem);
no_call_mem:
in_req[in_arity] = sp_reg->single_req;
in[in_arity] = incsp;
in_req[in_arity] = amd64_registers[REG_RSP].single_req;
in[in_arity] = callframe;
++in_arity;
/* vararg calls need the number of SSE registers used */
......@@ -1662,7 +1638,7 @@ no_call_mem:
attr.base.addr.index_input = NO_INPUT;
attr.base.insn_mode = INSN_MODE_64;
ir_node *const nomem = get_irg_no_mem(irg);
ir_node *const in[] = { new_value, incsp, nomem };
ir_node *const in[] = { new_value, callframe, nomem };
ir_node *const store = mode_is_float(mode) ?
new_bd_amd64_movs_store_xmm(dbgi, new_block, ARRAY_SIZE(in), in, xmm_reg_mem_reqs, &attr) :
new_bd_amd64_mov_store( dbgi, new_block, ARRAY_SIZE(in), in, reg_reg_mem_reqs, &attr);
......@@ -1708,7 +1684,7 @@ no_call_mem:
/* create output register reqs */
arch_set_irn_register_req_out(call, pn_amd64_call_M, arch_memory_req);
arch_copy_irn_out_info(call, pn_amd64_call_stack, incsp);
arch_copy_irn_out_info(call, pn_amd64_call_stack, callframe);
arch_register_class_t const *const flags = &amd64_reg_classes[CLASS_amd64_flags];
arch_set_irn_register_req_out(call, pn_amd64_call_flags, flags->class_req);
......@@ -1737,13 +1713,8 @@ no_call_mem:
/* IncSP to destroy the call stackframe */
ir_node *const call_stack = be_new_Proj(call, pn_amd64_call_stack);
incsp = amd64_new_IncSP(new_block, call_stack, -cconv->callframe_size, 0);
/* if we are the last IncSP producer in a block then we have to keep
* the stack value.
* Note: This here keeps all producers which is more than necessary */
keep_alive(incsp);
pmap_insert(node_to_stack, node, incsp);
ir_node *const incsp = amd64_new_IncSP(new_block, call_stack, -cconv->callframe_size, 0);
be_stack_record_chain(&stack_env, callframe, n_be_IncSP_pred, incsp);
x86_free_calling_convention(cconv);
return call;
......@@ -2335,7 +2306,6 @@ static ir_node *gen_Alloc(ir_node *node)
ir_node *size = get_Alloc_size(node);
ir_node *mem = get_Alloc_mem(node);
ir_node *new_mem = be_transform_node(mem);
ir_node *sp = get_stack_pointer_for(node);
const arch_register_req_t **reqs;
amd64_binop_addr_attr_t attr;
......@@ -2345,7 +2315,9 @@ static ir_node *gen_Alloc(ir_node *node)
ir_node *subsp;
ir_node *in[3];
unsigned arity = 0;
in[arity++] = sp;
ir_graph *const irg = get_irn_irg(node);
in[arity++] = get_initial_sp(irg);
if (is_Const(size)) {
ir_tarval *tv = get_Const_tarval(size);
......@@ -2363,8 +2335,7 @@ static ir_node *gen_Alloc(ir_node *node)
subsp = new_bd_amd64_sub_sp(dbgi, new_block, arity, in, reqs, &attr);
ir_node *const stack_proj = be_new_Proj_reg(subsp, pn_amd64_sub_sp_stack, &amd64_registers[REG_RSP]);
keep_alive(stack_proj);
pmap_insert(node_to_stack, node, stack_proj);
be_stack_record_chain(&stack_env, subsp, n_amd64_sub_sp_stack, stack_proj);
return subsp;
}
......@@ -2836,10 +2807,9 @@ void amd64_transform_graph(ir_graph *irg)
| IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES);
amd64_register_transformers();
mode_gp = mode_Lu;
node_to_stack = pmap_create();
mode_gp = mode_Lu;
stackorder = be_collect_stacknodes(irg);
be_stack_init(&stack_env);
ir_entity *entity = get_irg_entity(irg);
ir_type *mtp = get_entity_type(entity);
current_cconv = amd64_decide_calling_convention(mtp, irg);
......@@ -2857,9 +2827,7 @@ void amd64_transform_graph(ir_graph *irg)
heights_free(heights);
heights = NULL;
be_free_stackorder(stackorder);
pmap_destroy(node_to_stack);
node_to_stack = NULL;
be_stack_finish(&stack_env);
ir_type *frame_type = get_irg_frame_type(irg);
if (get_type_state(frame_type) == layout_undefined)
......
......@@ -40,9 +40,8 @@
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
static const arch_register_t *sp_reg = &arm_registers[REG_SP];
static be_stackorder_t *stackorder;
static be_stack_env_t stack_env;
static calling_convention_t *cconv = NULL;
static pmap *node_to_stack;
static const arch_register_t *const callee_saves[] = {
&arm_registers[REG_R4],
......@@ -73,6 +72,11 @@ static const arch_register_t *const caller_saves[] = {
&arm_registers[REG_F7],
};
static ir_node *get_initial_sp(ir_graph *const irg)
{
return be_get_Start_proj(irg, &arm_registers[REG_SP]);
}
void arm_gen_vals_from_word(uint32_t value, arm_vals *result)
{
/* TODO: not optimal yet, as we only "shift" the value and don't take
......@@ -1481,7 +1485,7 @@ static ir_node *gen_Proj_Start(ir_node *node)
return new_r_Bad(irg, mode_T);
case pn_Start_P_frame_base:
return be_get_Start_proj(irg, &arm_registers[REG_SP]);
return get_initial_sp(irg);
}
panic("unexpected start proj: %u", pn);
}
......@@ -1721,26 +1725,6 @@ static ir_node *gen_Start(ir_node *node)
return be_new_Start(irg, outs);
}
static ir_node *get_stack_pointer_for(ir_node *node)
{
/* get predecessor in stack_order list */
ir_node *stack_pred = be_get_stack_pred(stackorder, node);
if (stack_pred == NULL) {
/* first stack user in the current block. We can simply use the
* initial sp_proj for it */
ir_graph *irg = get_irn_irg(node);
return be_get_Start_proj(irg, &arm_registers[REG_SP]);
}
be_transform_node(stack_pred);
ir_node *stack = pmap_get(ir_node, node_to_stack, stack_pred);
if (stack == NULL) {
return get_stack_pointer_for(stack_pred);
}
return stack;
}
/**
* transform a Return node into epilogue code + return statement
*/
......@@ -1751,7 +1735,6 @@ static ir_node *gen_Return(ir_node *node)
ir_node *mem = get_Return_mem(node);
ir_node *new_mem = be_transform_node(mem);
unsigned n_callee_saves = ARRAY_SIZE(callee_saves);
ir_node *sp = get_stack_pointer_for(node);
unsigned n_res = get_Return_n_ress(node);
ir_graph *irg = get_irn_irg(node);
......@@ -1764,7 +1747,7 @@ static ir_node *gen_Return(ir_node *node)
in[n_arm_Return_mem] = new_mem;
reqs[n_arm_Return_mem] = arch_memory_req;
in[n_arm_Return_sp] = sp;
in[n_arm_Return_sp] = get_initial_sp(irg);
reqs[n_arm_Return_sp] = sp_reg->single_req;
/* result values */
......@@ -1788,6 +1771,7 @@ static ir_node *gen_Return(ir_node *node)
assert(p == n_ins);
ir_node *const ret = new_bd_arm_Return(dbgi, new_block, n_ins, in, reqs);
be_stack_record_chain(&stack_env, ret, n_arm_Return_sp, NULL);
return ret;
}
......@@ -1820,13 +1804,11 @@ static ir_node *gen_Call(ir_node *node)
in_req[mem_pos] = arch_memory_req;
/* stack pointer (create parameter stackframe + align stack)
* Note that we always need an IncSP to ensure stack alignment */
ir_node *new_frame = get_stack_pointer_for(node);
ir_node *incsp = be_new_IncSP(sp_reg, new_block, new_frame,
cconv->param_stack_size,
ARM_PO2_STACK_ALIGNMENT);
ir_node *const new_frame = get_initial_sp(irg);
ir_node *const callframe = be_new_IncSP(sp_reg, new_block, new_frame, cconv->param_stack_size, ARM_PO2_STACK_ALIGNMENT);
int sp_pos = in_arity++;
in_req[sp_pos] = sp_reg->single_req;
in[sp_pos] = incsp;
in[sp_pos] = callframe;
/* parameters */
for (size_t p = 0; p < n_params; ++p) {
......@@ -1871,14 +1853,9 @@ static ir_node *gen_Call(ir_node *node)
}
/* create a parameter frame if necessary */
ir_node *str;
if (mode_is_float(mode)) {
str = new_bd_arm_Stf(dbgi, new_block, incsp, new_value, new_mem,
mode, NULL, 0, param->offset, true);
} else {
str = new_bd_arm_Str(dbgi, new_block, incsp, new_value, new_mem,
mode, NULL, 0, param->offset, true);
}
ir_node *const str = mode_is_float(mode) ?
new_bd_arm_Stf(dbgi, new_block, callframe, new_value, new_mem, mode, NULL, 0, param->offset, true) :
new_bd_arm_Str(dbgi, new_block, callframe, new_value, new_mem, mode, NULL, 0, param->offset, true);
sync_ins[sync_arity++] = str;
}
......@@ -1923,7 +1900,7 @@ static ir_node *gen_Call(ir_node *node)
/* create output register reqs */
arch_set_irn_register_req_out(res, pn_arm_Bl_M, arch_memory_req);
arch_copy_irn_out_info(res, pn_arm_Bl_stack, incsp);
arch_copy_irn_out_info(res, pn_arm_Bl_stack, callframe);
for (size_t o = 0; o < n_caller_saves; ++o) {
const arch_register_t *reg = caller_saves[o];
......@@ -1935,13 +1912,8 @@ static ir_node *gen_Call(ir_node *node)
/* IncSP to destroy the call stackframe */
ir_node *const call_stack = be_new_Proj(res, pn_arm_Bl_stack);
incsp = be_new_IncSP(sp_reg, new_block, call_stack, -cconv->param_stack_size, 0);
/* if we are the last IncSP producer in a block then we have to keep
* the stack value.
* Note: This here keeps all producers which is more than necessary */
keep_alive(incsp);
pmap_insert(node_to_stack, node, incsp);
ir_node *const incsp = be_new_IncSP(sp_reg, new_block, call_stack, -cconv->param_stack_size, 0);
be_stack_record_chain(&stack_env, callframe, n_be_IncSP_pred, incsp);
arm_free_calling_convention(cconv);
return res;
......@@ -2074,10 +2046,8 @@ void arm_transform_graph(ir_graph *irg)
}
arm_register_transformers();
node_to_stack = pmap_create();
assert(cconv == NULL);
stackorder = be_collect_stacknodes(irg);
be_stack_init(&stack_env);
ir_entity *entity = get_irg_entity(irg);
cconv = arm_decide_calling_convention(irg, get_entity_type(entity));
create_stacklayout(irg);
......@@ -2085,8 +2055,7 @@ void arm_transform_graph(ir_graph *irg)
be_transform_graph(irg, NULL);
be_free_stackorder(stackorder);
stackorder = NULL;
be_stack_finish(&stack_env);
arm_free_calling_convention(cconv);
cconv = NULL;
......@@ -2095,9 +2064,6 @@ void arm_transform_graph(ir_graph *irg)
if (get_type_state(frame_type) == layout_undefined) {
default_layout_compound_type(frame_type);
}
pmap_destroy(node_to_stack);
node_to_stack = NULL;
}
void arm_init_transform(void)
......
......@@ -552,53 +552,6 @@ void be_map_exc_node_to_runtime_call(ir_node *node, ir_mode *res_mode,
turn_into_tuple(node, n_operands, tuple_in);
}
/**
* Link the node into its block list as a new head.
*/
static void collect_node(ir_node *node)
{
ir_node *block = get_nodes_block(node);
ir_node *old = (ir_node*)get_irn_link(block);
set_irn_link(node, old);
set_irn_link(block, node);
}
/**
* Post-walker: link all nodes that probably access the stack into lists of their block.
*/
static void link_ops_in_block_walker(ir_node *node, void *data)
{
(void) data;
switch (get_irn_opcode(node)) {
case iro_Return:
case iro_Call:
collect_node(node);
break;
case iro_Alloc:
/** all non-stack alloc nodes should be lowered before the backend */
collect_node(node);
break;
case iro_Free:
collect_node(node);
break;
case iro_Builtin:
if (get_Builtin_kind(node) == ir_bk_return_address) {
ir_node *const param = get_Builtin_param(node, 0);
long const value = get_Const_long(param); /* must be Const */
if (value > 0) {
/* not the return address of the current function:
* we need the stack pointer for the frame climbing */
collect_node(node);
}
}
break;
default:
break;
}
}
static ir_heights_t *heights;
/**
......@@ -614,20 +567,44 @@ static int dependent_on(const ir_node *n1, const ir_node *n2)
return heights_reachable_in_block(heights, n1, n2);
}
struct be_stack_change_t {
ir_node *before;
unsigned pos;
ir_node *after;
};
/**
* Classical qsort() comparison function behavior:
*
* 0 if both elements are equal, no node depend on the other
* 0 if both elements are equal, no node depend on the other
* +1 if first depends on second (first is greater)
* -1 if second depends on first (second is greater)
*/
static int cmp_call_dependency(const void *c1, const void *c2)
static int cmp_stack_dependency(const void *c1, const void *c2)
{
const ir_node *n1 = *(const ir_node **) c1;
const ir_node *n2 = *(const ir_node **) c2;
if (dependent_on(n1, n2))
be_stack_change_t const *const s1 = (be_stack_change_t const*)c1;
be_stack_change_t const *const s2 = (be_stack_change_t const*)c2;
/* Sort blockwise. */
ir_node *const b1 = get_nodes_block(s1->before);
ir_node *const b2 = get_nodes_block(s2->before);
if (b1 != b2)
return get_irn_idx(b2) - get_irn_idx(b1);
/* If one change chain does not produce a new value, it must be the last. */
ir_node *const n1 = s1->after;
if (!n1)
return 1;
if (dependent_on(n2, n1))
ir_node *const n2 = s2->after;
if (!n2)
return -1;
/* If one change chain is data dependent on the other, it must come later.
* The after nodes cannot be dependent on each other, because they are unused.
* So compare after of one with before of the other. */
if (dependent_on(n1, s2->before))
return 1;
if (dependent_on(n2, s1->before))
return -1;
/* The nodes have no depth order, but we need a total order because qsort()
......@@ -646,69 +623,51 @@ static int cmp_call_dependency(const void *c1, const void *c2)
return get_irn_idx(n2) - get_irn_idx(n1);
}
/**
* Block-walker: sorts dependencies and remember them into a phase
*/
static void process_ops_in_block(ir_node *block, void *data)
void be_stack_init(be_stack_env_t *const env)
{
ir_nodemap *const map = (ir_nodemap*)data;
ir_node **nodes = NEW_ARR_F(ir_node*, 0);
for (ir_node *node = block; (node = (ir_node*)get_irn_link(node));) {
ARR_APP1(ir_node*, nodes, node);
}
unsigned const n_nodes = ARR_LEN(nodes);
if (n_nodes != 0) {
/* order nodes according to their data dependencies */
QSORT(nodes, n_nodes, cmp_call_dependency);
/* remember the calculated dependency into a phase */
for (unsigned n = n_nodes - 1; n > 0; --n) {
ir_node *const node = nodes[n];
ir_node *const pred = nodes[n - 1];
ir_nodemap_insert(map, node, pred);
}
}
DEL_ARR_F(nodes);
env->changes = NEW_ARR_F(be_stack_change_t, 0);
}
struct be_stackorder_t {
ir_nodemap stack_order; /**< a phase to handle stack dependencies. */
};
be_stackorder_t *be_collect_stacknodes(ir_graph *irg)
void be_stack_record_chain(be_stack_env_t *const env, ir_node *const before, unsigned const pos, ir_node *const after)
{
be_stackorder_t *env = XMALLOCZ(be_stackorder_t);
assert(!after || get_nodes_block(after) == get_nodes_block(before));
ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
/* collect all potential^stack accessing nodes */
irg_walk_graph(irg, firm_clear_link, link_ops_in_block_walker, NULL);
ir_nodemap_init(&env->stack_order, irg);
/* use heights to create a total order for those nodes: this order is stored
* in the created phase */
heights = heights_new(irg);
irg_block_walk_graph(irg, NULL, process_ops_in_block, &env->stack_order);
heights_free(heights);
ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
return env;
be_stack_change_t const change = { before, pos, after };
ARR_APP1(be_stack_change_t, env->changes, change);
/* FIXME: This should be not be necessary, but not keeping the till now unused
* stack nodes triggers problems with out edges, because they get deactivated
* before be_stack_finish() is called. It should suffice to keep the last
* stack producer per block in be_stack_finish(). */
if (after)
keep_alive(after);
}
ir_node *be_get_stack_pred(const be_stackorder_t *env, const ir_node *node)
void be_stack_finish(be_stack_env_t *const env)
{
return ir_nodemap_get(ir_node, &env->stack_order, node);
}
be_stack_change_t *const changes = env->changes;
env->changes = NULL;
void be_free_stackorder(be_stackorder_t *env)
{
ir_nodemap_destroy(&env->stack_order);
free(env);
unsigned const n_changes = ARR_LEN(changes);
if (n_changes != 0) {
/* Order the stack changes according to their data dependencies. */
ir_graph *const irg = get_irn_irg(changes[0].before);
heights = heights_new(irg);
QSORT(changes, n_changes, cmp_stack_dependency);
heights_free(heights);
/* Wire the stack change chains within each block, i.e. connect before of
* each change to after of its predecessor. */
ir_node *prev_block = NULL;
for (unsigned n = n_changes; n-- != 0;) {
be_stack_change_t const *const c = &changes[n];
ir_node *const block = get_nodes_block(c->before);
if (block == prev_block)
set_irn_n(c[1].before, c[1].pos, c[0].after);
prev_block = block;
}
}
DEL_ARR_F(changes);
}
static void create_stores_for_type(ir_graph *irg, ir_type *type)
......
......@@ -15,8 +15,6 @@
#include "be_types.h"
#include "firm_types.h"
typedef struct be_stackorder_t be_stackorder_t;
/**
* A callback to pre-transform some nodes before the transformation starts.
*/
......@@ -109,26 +107,44 @@ void be_map_exc_node_to_runtime_call(ir_node *node, ir_mode *res_mode,
long pn_M, long pn_X_regular,
long pn_X_except, long pn_res);
typedef struct be_stack_change_t be_stack_change_t;
typedef struct be_stack_env_t {
be_stack_change_t *changes;
} be_stack_env_t;
/**
* In the normal firm representation some nodes like pure calls, builtins
* have no memory inputs+outputs. However in the backend these sometimes have to
* access the stack to work and therefore suddenly need to be enqueued into the
* memory edge again.
* This API creates a possible order to enqueue them so we can be sure to create
* a legal dependency graph when transforming them.
* Initialize a stack change environment.
*
* Usually architectures use a machine stack to store local information, e.g.
* arguments of function calls. This concept is not present in the middleend,
* appears during code selection and causes chains of stack changes, which may
* not be interleaved. To prevent interleaving, the instruction scheduler has
* to be aware of these chains or a total order has to be established for them
* beforehand. This interface performs the latter.
* The change chains are recorded during code selection and wired afterwards.
*
* @param env The stack environment to initialize.
*/
be_stackorder_t *be_collect_stacknodes(ir_graph *irg);
void be_stack_init(be_stack_env_t *env);
/**
* return node that should produce the predecessor stack node in a block.
* returns NULL if there's no predecessor in the current block.
* Record one stack change chain.
*
* @param before The first node of the stack change chain.
* @param pos The operand number of the stack of @p before.
* @param after The stack value produced by this change, or NULL for the last
* change, e.g. return.
*/