Commit 7c4e33eb authored by Michael Beck's avatar Michael Beck
Browse files

- moved the imul mem,imm32 splitting into peephole optimizations

- instead of issuing a rep ret, issue a ret 0 as recommended in k10 optimization manual

[r19344]
parent e9e2900d
......@@ -87,14 +87,15 @@ typedef struct {
be_node_attr_t node_attr; /**< base attributes of every be node. */
int num_ret_vals; /**< number of return values */
unsigned pop; /**< number of bytes that should be popped */
int emit_pop; /**< if set, emit pop bytes, even if pop = 0 */
} be_return_attr_t;
/** The be_IncSP attribute type. */
typedef struct {
be_node_attr_t node_attr; /**< base attributes of every be node. */
int offset; /**< The offset by which the stack shall be expanded/shrinked. */
int align; /**< wether stack should be aligned after the
IncSP */
int align; /**< whether stack should be aligned after the
IncSP */
} be_incsp_attr_t;
/** The be_Frame attribute type. */
......@@ -107,9 +108,9 @@ typedef struct {
/** The be_Call attribute type. */
typedef struct {
be_node_attr_t node_attr; /**< base attributes of every be node. */
ir_entity *ent; /**< The called entity if this is a static call. */
ir_entity *ent; /**< The called entity if this is a static call. */
unsigned pop;
ir_type *call_tp; /**< The call type, copied from the original Call node. */
ir_type *call_tp; /**< The call type, copied from the original Call node. */
} be_call_attr_t;
typedef struct {
......@@ -214,6 +215,8 @@ static int Return_cmp_attr(ir_node *a, ir_node *b) {
return 1;
if (a_attr->pop != b_attr->pop)
return 1;
if (a_attr->emit_pop != b_attr->emit_pop)
return 1;
return _node_cmp_attr(&a_attr->node_attr, &b_attr->node_attr);
}
......@@ -697,25 +700,36 @@ ir_node *be_new_Return(dbg_info *dbg, ir_graph *irg, ir_node *block, int n_res,
a = get_irn_attr(res);
a->num_ret_vals = n_res;
a->pop = pop;
a->emit_pop = 0;
return res;
}
/* Returns the number of real returns values */
int be_Return_get_n_rets(const ir_node *ret)
{
int be_Return_get_n_rets(const ir_node *ret) {
const be_return_attr_t *a = get_irn_generic_attr_const(ret);
return a->num_ret_vals;
}
unsigned be_Return_get_pop(const ir_node *ret)
{
/* return the number of bytes that should be popped from stack when executing the Return. */
unsigned be_Return_get_pop(const ir_node *ret) {
const be_return_attr_t *a = get_irn_generic_attr_const(ret);
return a->pop;
}
int be_Return_append_node(ir_node *ret, ir_node *node)
{
/* return non-zero, if number of popped bytes must be always emitted */
int be_Return_get_emit_pop(const ir_node *ret) {
const be_return_attr_t *a = get_irn_generic_attr_const(ret);
return a->emit_pop;
}
/* return non-zero, if number of popped bytes must be always emitted */
void be_Return_set_emit_pop(ir_node *ret, int emit_pop) {
be_return_attr_t *a = get_irn_generic_attr(ret);
a->emit_pop = emit_pop;
}
int be_Return_append_node(ir_node *ret, ir_node *node) {
int pos;
pos = add_irn_n(ret, node);
......
......@@ -361,6 +361,20 @@ int be_Return_get_n_rets(const ir_node *ret);
*/
unsigned be_Return_get_pop(const ir_node *ret);
/**
* Return non-zero, if number of popped bytes must be always emitted.
*
* @param ret the be_Return node
*/
int be_Return_get_emit_pop(const ir_node *ret);
/**
* Set the emit_pop flag.
*
* @param ret the be_Return node
*/
void be_Return_set_emit_pop(ir_node *ret, int emit_pop);
/** appends a node to the return node, returns the position of the node */
int be_Return_append_node(ir_node *ret, ir_node *node);
......
......@@ -1809,7 +1809,7 @@ static void emit_be_Return(const ir_node *node)
be_emit_cstring("\tret");
pop = be_Return_get_pop(node);
if(pop > 0) {
if (pop > 0 || be_Return_get_emit_pop(node)) {
be_emit_irprintf(" $%d", pop);
}
be_emit_finish_line_gas(node);
......
......@@ -345,9 +345,15 @@ static void peephole_ia32_Return(ir_node *node) {
}
}
/* yep, return is the first real instruction in this block */
#if 0
/* add an rep prefix to the return */
rep = new_rd_ia32_RepPrefix(get_irn_dbg_info(node), current_ir_graph, block);
keep_alive(rep);
sched_add_before(node, rep);
#else
/* ensure, that the 3 byte return is generated */
be_Return_set_emit_pop(node, 1);
#endif
}
/* only optimize up to 48 stores behind IncSPs */
......@@ -876,12 +882,68 @@ exchange:
be_peephole_after_exchange(res);
}
/**
* Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
*/
static void peephole_ia32_Imul_split(ir_node *imul) {
const ir_node *right = get_irn_n(imul, n_ia32_IMul_right);
const arch_register_t *reg;
ir_node *load, *block, *base, *index, *mem, *res, *noreg;
dbg_info *dbgi;
ir_graph *irg;
if (! is_ia32_Immediate(right) || get_ia32_op_type(imul) != ia32_AddrModeS) {
/* no memory, imm form ignore */
return;
}
/* we need a free register */
reg = get_free_gp_reg();
if (reg == NULL)
return;
/* fine, we can rebuild it */
dbgi = get_irn_dbg_info(imul);
block = get_nodes_block(imul);
irg = current_ir_graph;
base = get_irn_n(imul, n_ia32_IMul_base);
index = get_irn_n(imul, n_ia32_IMul_index);
mem = get_irn_n(imul, n_ia32_IMul_mem);
load = new_rd_ia32_Load(dbgi, irg, block, base, index, mem);
/* copy all attributes */
set_irn_pinned(load, get_irn_pinned(imul));
set_ia32_op_type(load, ia32_AddrModeS);
set_ia32_ls_mode(load, get_ia32_ls_mode(imul));
set_ia32_am_scale(load, get_ia32_am_scale(imul));
set_ia32_am_sc(load, get_ia32_am_sc(imul));
set_ia32_am_offs_int(load, get_ia32_am_offs_int(imul));
if (is_ia32_am_sc_sign(imul))
set_ia32_am_sc_sign(load);
if (is_ia32_use_frame(imul))
set_ia32_use_frame(load);
set_ia32_frame_ent(load, get_ia32_frame_ent(imul));
sched_add_before(imul, load);
mem = new_rd_Proj(dbgi, irg, block, load, mode_M, pn_ia32_Load_M);
res = new_rd_Proj(dbgi, irg, block, load, mode_Iu, pn_ia32_Load_res);
arch_set_irn_register(arch_env, res, reg);
be_peephole_after_exchange(res);
set_irn_n(imul, n_ia32_IMul_mem, mem);
noreg = get_irn_n(imul, n_ia32_IMul_left);
set_irn_n(imul, n_ia32_IMul_left, res);
set_ia32_op_type(imul, ia32_Normal);
}
/**
* Register a peephole optimisation function.
*/
static void register_peephole_optimisation(ir_op *op, peephole_opt_func func) {
assert(op->ops.generic == NULL);
op->ops.generic = (void*) func;
op->ops.generic = (op_func)func;
}
/* Perform peephole-optimizations. */
......@@ -899,6 +961,8 @@ void ia32_peephole_optimization(ia32_code_gen_t *new_cg)
register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test);
register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test);
register_peephole_optimisation(op_be_Return, peephole_ia32_Return);
if (! ia32_cg_config.use_imul_mem_imm32)
register_peephole_optimisation(op_ia32_IMul, peephole_ia32_Imul_split);
be_peephole_opt(cg->birg);
}
......
......@@ -1216,7 +1216,6 @@ static ir_node *gen_Mul(ir_node *node) {
ir_node *op1 = get_Mul_left(node);
ir_node *op2 = get_Mul_right(node);
ir_mode *mode = get_irn_mode(node);
unsigned flags;
if (mode_is_float(mode)) {
if (ia32_cg_config.use_sse2)
......@@ -1226,14 +1225,9 @@ static ir_node *gen_Mul(ir_node *node) {
return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
match_commutative | match_am);
}
/* for the lower 32bit of the result it doesn't matter whether we use
* signed or unsigned multiplication so we use IMul as it has fewer
* constraints */
flags = match_commutative | match_am | match_mode_neutral | match_immediate;
if (ia32_cg_config.use_imul_mem_imm32)
flags |= match_am_and_immediates;
return gen_binop(node, op1, op2, new_rd_ia32_IMul, flags);
return gen_binop(node, op1, op2, new_rd_ia32_IMul,
match_commutative | match_am | match_mode_neutral |
match_immediate | match_am_and_immediates);
}
/**
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment