Commit 080b413c authored by Matthias Braun's avatar Matthias Braun
Browse files

peephole optimisation: reuse zero flag from previous ops if possible

[r16392]
parent 883aa841
......@@ -710,8 +710,10 @@ static int determine_final_pnc(const ir_node *node, int flags_pos,
pnc = get_mirrored_pnc(pnc);
pnc |= ia32_pn_Cmp_float;
} else {
#if 0
assert(is_ia32_Cmp(flags) || is_ia32_Test(flags)
|| is_ia32_Cmp8Bit(flags) || is_ia32_Test8Bit(flags));
#endif
flags_attr = get_ia32_attr_const(flags);
if(flags_attr->data.ins_permuted)
......
......@@ -46,8 +46,8 @@ enum {
* need the result) */
enum {
pn_ia32_res = 0,
pn_ia32_mem = 1,
pn_ia32_flags = 2,
pn_ia32_flags = 1,
pn_ia32_mem = 2,
pn_ia32_add1 = 3,
pn_ia32_add2 = 4
};
......
......@@ -139,6 +139,162 @@ static void peephole_ia32_Store(ir_node *node)
}
#endif
static int produces_zero_flag(ir_node *node, int pn)
{
ir_node *count;
const ia32_immediate_attr_t *imm_attr;
if(!is_ia32_irn(node))
return 0;
if(pn >= 0) {
if(pn != pn_ia32_res)
return 0;
}
switch(get_ia32_irn_opcode(node)) {
case iro_ia32_Add:
case iro_ia32_Adc:
case iro_ia32_And:
case iro_ia32_Or:
case iro_ia32_Xor:
case iro_ia32_Sub:
case iro_ia32_Sbb:
case iro_ia32_Neg:
case iro_ia32_Inc:
case iro_ia32_Dec:
return 1;
case iro_ia32_ShlD:
case iro_ia32_ShrD:
case iro_ia32_Shl:
case iro_ia32_Shr:
case iro_ia32_Sar:
assert(n_ia32_ShlD_count == n_ia32_ShrD_count);
assert(n_ia32_Shl_count == n_ia32_Shr_count
&& n_ia32_Shl_count == n_ia32_Sar_count);
if(is_ia32_ShlD(node) || is_ia32_ShrD(node)) {
count = get_irn_n(node, n_ia32_ShlD_count);
} else {
count = get_irn_n(node, n_ia32_Shl_count);
}
/* when shift count is zero the flags are not affected, so we can only
* do this for constants != 0 */
if(!is_ia32_Immediate(count))
return 0;
imm_attr = get_ia32_immediate_attr_const(count);
if(imm_attr->symconst != NULL)
return 0;
if((imm_attr->offset & 0x1f) == 0)
return 0;
return 1;
default:
break;
}
return 0;
}
static ir_node *turn_into_mode_t(ir_node *node)
{
ir_node *block;
ir_node *res_proj;
ir_node *new_node;
const arch_register_t *reg;
if(get_irn_mode(node) == mode_T)
return node;
assert(get_irn_mode(node) == mode_Iu);
new_node = exact_copy(node);
set_irn_mode(new_node, mode_T);
block = get_nodes_block(new_node);
res_proj = new_r_Proj(current_ir_graph, block, new_node, mode_Iu,
pn_ia32_res);
reg = arch_get_irn_register(arch_env, node);
arch_set_irn_register(arch_env, res_proj, reg);
be_peephole_before_exchange(node, res_proj);
sched_add_before(node, new_node);
sched_remove(node);
exchange(node, res_proj);
be_peephole_after_exchange(res_proj);
return new_node;
}
static void peephole_ia32_Test(ir_node *node)
{
ir_node *left = get_irn_n(node, n_ia32_Test_left);
ir_node *right = get_irn_n(node, n_ia32_Test_right);
ir_node *flags_proj;
ir_node *block;
ir_mode *flags_mode;
int pn = -1;
ir_node *schedpoint;
const ir_edge_t *edge;
assert(n_ia32_Test_left == n_ia32_Test8Bit_left
&& n_ia32_Test_right == n_ia32_Test8Bit_right);
/* we need a test for 0 */
if(left != right)
return;
block = get_nodes_block(node);
if(get_nodes_block(left) != block)
return;
if(is_Proj(left)) {
pn = get_Proj_proj(left);
left = get_Proj_pred(left);
}
/* walk schedule up and abort when we find left or some other node destroys
the flags */
schedpoint = sched_prev(node);
while(schedpoint != left) {
schedpoint = sched_prev(schedpoint);
if(arch_irn_is(arch_env, schedpoint, modify_flags))
return;
if(schedpoint == block)
panic("couldn't find left");
}
/* make sure only Lg/Eq tests are used */
foreach_out_edge(node, edge) {
ir_node *user = get_edge_src_irn(edge);
int pnc = get_ia32_condcode(user);
if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg) {
return;
}
}
if(!produces_zero_flag(left, pn))
return;
left = turn_into_mode_t(left);
ir_fprintf(stderr, "Optimizing test(x,x) %+F (-> %+F)\n", node, left);
flags_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
flags_proj = new_r_Proj(current_ir_graph, block, left, flags_mode,
pn_ia32_flags);
arch_set_irn_register(arch_env, flags_proj, &ia32_flags_regs[REG_EFLAGS]);
assert(get_irn_mode(node) != mode_T);
be_peephole_before_exchange(node, flags_proj);
exchange(node, flags_proj);
sched_remove(node);
be_peephole_after_exchange(flags_proj);
}
// only optimize up to 48 stores behind IncSPs
#define MAXPUSH_OPTIMIZE 48
......@@ -659,6 +815,8 @@ void ia32_peephole_optimization(ia32_code_gen_t *new_cg)
//register_peephole_optimisation(op_ia32_Store, peephole_ia32_Store);
register_peephole_optimisation(op_be_IncSP, peephole_be_IncSP);
register_peephole_optimisation(op_ia32_Lea, peephole_ia32_Lea);
register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test);
register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test);
be_peephole_opt(cg->birg);
}
......
......@@ -370,7 +370,8 @@ ProduceVal => {
Add => {
irn_flags => "R",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ], out => [ "in_r4 in_r5", "none", "flags" ] },
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ],
out => [ "in_r4 in_r5", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right" ],
emit => '. add%M %binop',
am => "full,binary",
......@@ -406,7 +407,8 @@ AddMem8Bit => {
Adc => {
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "gp", "gp", "flags" ], out => [ "in_r4 in_r5" ] },
reg_req => { in => [ "gp", "gp", "none", "gp", "gp", "flags" ],
out => [ "in_r4 in_r5", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right", "eflags" ],
emit => '. adc%M %binop',
am => "full,binary",
......@@ -431,10 +433,11 @@ Mul => {
# we should not rematrialize this node. It produces 2 results and has
# very strict constrains
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "eax", "gp" ], out => [ "eax", "edx", "none" ] },
reg_req => { in => [ "gp", "gp", "none", "eax", "gp" ],
out => [ "eax", "edx", "none" ] },
ins => [ "base", "index", "mem", "val_high", "val_low" ],
emit => '. mul%M %unop4',
outs => [ "EAX", "EDX", "M" ],
outs => [ "res_low", "res_high", "M" ],
am => "source,binary",
latency => 10,
units => [ "GP" ],
......@@ -456,9 +459,9 @@ IMul => {
# TODO: adjust out requirements for the 3 operand form
# (no need for should_be_same then)
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ],
out => [ "in_r4 in_r5", "none", "flags" ] },
out => [ "in_r4 in_r5", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right" ],
outs => [ "res", "M", "flags" ],
outs => [ "res", "flags", "M" ],
am => "source,binary",
latency => 5,
units => [ "GP" ],
......@@ -469,10 +472,11 @@ IMul => {
IMul1OP => {
irn_flags => "R",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "eax", "gp" ], out => [ "eax", "edx", "none" ] },
reg_req => { in => [ "gp", "gp", "none", "eax", "gp" ],
out => [ "eax", "edx", "none" ] },
ins => [ "base", "index", "mem", "val_high", "val_low" ],
emit => '. imul%M %unop4',
outs => [ "EAX", "EDX", "M" ],
outs => [ "res_low", "res_high", "M" ],
am => "source,binary",
latency => 5,
units => [ "GP" ],
......@@ -480,19 +484,19 @@ IMul1OP => {
},
l_IMul => {
# we should not rematrialize this node. It produces 2 results and has
# very strict constrains
op_flags => "C",
cmp_attr => "return 1;",
outs => [ "EAX", "EDX", "M" ],
outs => [ "res_low", "res_high", "M" ],
arity => 2
},
And => {
irn_flags => "R",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ], out => [ "in_r4 in_r5" ] },
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ],
out => [ "in_r4 in_r5", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right" ],
outs => [ "res", "flags", "M" ],
op_modes => "commutative | am | immediate | mode_neutral",
am => "full,binary",
emit => '. and%M %binop',
......@@ -529,8 +533,10 @@ AndMem8Bit => {
Or => {
irn_flags => "R",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ], out => [ "in_r4 in_r5" ] },
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ],
out => [ "in_r4 in_r5", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right" ],
outs => [ "res", "flags", "M" ],
am => "full,binary",
emit => '. or%M %binop',
units => [ "GP" ],
......@@ -566,8 +572,10 @@ OrMem8Bit => {
Xor => {
irn_flags => "R",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ], out => [ "in_r4 in_r5" ] },
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ],
out => [ "in_r4 in_r5", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right" ],
outs => [ "res", "flags", "M" ],
am => "full,binary",
emit => '. xor%M %binop',
units => [ "GP" ],
......@@ -605,8 +613,10 @@ XorMem8Bit => {
Sub => {
irn_flags => "R",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ], out => [ "in_r4", "none", "flags" ] },
reg_req => { in => [ "gp", "gp", "none", "gp", "gp" ],
out => [ "in_r4", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right" ],
outs => [ "res", "flags", "M" ],
am => "full,binary",
emit => '. sub%M %binop',
units => [ "GP" ],
......@@ -641,8 +651,10 @@ SubMem8Bit => {
Sbb => {
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "gp", "gp", "flags" ], out => [ "in_r4 !in_r5" ] },
reg_req => { in => [ "gp", "gp", "none", "gp", "gp", "flags" ],
out => [ "in_r4 !in_r5", "flags", "none" ] },
ins => [ "base", "index", "mem", "left", "right", "eflags" ],
outs => [ "res", "flags", "M" ],
am => "full,binary",
emit => '. sbb%M %binop',
units => [ "GP" ],
......@@ -664,9 +676,10 @@ l_Sbb => {
IDiv => {
op_flags => "F|L",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "eax", "edx", "gp" ], out => [ "eax", "none", "none", "edx" ] },
reg_req => { in => [ "gp", "gp", "none", "eax", "edx", "gp" ],
out => [ "eax", "flags", "none", "edx" ] },
ins => [ "base", "index", "mem", "left_low", "left_high", "right" ],
outs => [ "div_res", "M", "unused", "mod_res" ],
outs => [ "div_res", "flags", "M", "mod_res" ],
am => "source,ternary",
emit => ". idiv%M %unop5",
latency => 25,
......@@ -677,9 +690,10 @@ IDiv => {
Div => {
op_flags => "F|L",
state => "exc_pinned",
reg_req => { in => [ "gp", "gp", "none", "eax", "edx", "gp" ], out => [ "eax", "none", "none", "edx" ] },
reg_req => { in => [ "gp", "gp", "none", "eax", "edx", "gp" ],
out => [ "eax", "flags", "none", "edx" ] },
ins => [ "base", "index", "mem", "left_low", "left_high", "right" ],
outs => [ "div_res", "M", "unused", "mod_res" ],
outs => [ "div_res", "flags", "M", "mod_res" ],
am => "source,ternary",
emit => ". div%M %unop5",
latency => 25,
......@@ -689,8 +703,10 @@ Div => {
Shl => {
irn_flags => "R",
reg_req => { in => [ "gp", "ecx" ], out => [ "in_r1 !in_r2" ] },
ins => [ "left", "right" ],
reg_req => { in => [ "gp", "ecx" ],
out => [ "in_r1 !in_r2", "flags" ] },
ins => [ "val", "count" ],
outs => [ "res", "flags" ],
emit => '. shl %SB1, %S0',
units => [ "GP" ],
latency => 1,
......@@ -711,16 +727,17 @@ ShlMem => {
},
l_ShlDep => {
cmp_attr => "return 1;",
ins => [ "left", "right", "dep" ],
# value, cnt, dependency
arity => 3
cmp_attr => "return 1;",
ins => [ "val", "count", "dep" ],
arity => 3
},
ShlD => {
irn_flags => "R",
reg_req => { in => [ "gp", "gp", "ecx" ], out => [ "in_r1 !in_r2 !in_r3" ] },
ins => [ "left_high", "left_low", "right" ],
reg_req => { in => [ "gp", "gp", "ecx" ],
out => [ "in_r1 !in_r2 !in_r3", "flags" ] },
ins => [ "val_high", "val_low", "count" ],
outs => [ "res", "flags" ],
emit => ". shld%M %SB2, %S1, %D0",
latency => 6,
units => [ "GP" ],
......@@ -730,14 +747,16 @@ ShlD => {
l_ShlD => {
cmp_attr => "return 1;",
ins => [ "high", "low", "count" ],
ins => [ "val_high", "val_low", "count" ],
arity => 3,
},
Shr => {
irn_flags => "R",
reg_req => { in => [ "gp", "ecx" ], out => [ "in_r1 !in_r2" ] },
reg_req => { in => [ "gp", "ecx" ],
out => [ "in_r1 !in_r2", "flags" ] },
ins => [ "val", "count" ],
outs => [ "res", "flags" ],
emit => '. shr %SB1, %S0',
units => [ "GP" ],
mode => $mode_gp,
......@@ -759,15 +778,16 @@ ShrMem => {
l_ShrDep => {
cmp_attr => "return 1;",
ins => [ "left", "right", "dep" ],
# value, cnt, dependency
ins => [ "val", "count", "dep" ],
arity => 3
},
ShrD => {
irn_flags => "R",
reg_req => { in => [ "gp", "gp", "ecx" ], out => [ "in_r1 !in_r2 !in_r3" ] },
ins => [ "left_high", "left_low", "right" ],
reg_req => { in => [ "gp", "gp", "ecx" ],
out => [ "in_r1 !in_r2 !in_r3", "flags" ] },
ins => [ "val_high", "val_low", "count" ],
outs => [ "res", "flags" ],
emit => ". shrd%M %SB2, %S1, %D0",
latency => 6,
units => [ "GP" ],
......@@ -778,13 +798,15 @@ ShrD => {
l_ShrD => {
cmp_attr => "return 1;",
arity => 3,
ins => [ "high", "low", "count" ],
ins => [ "val_high", "val_low", "count" ],
},
Sar => {
irn_flags => "R",
reg_req => { in => [ "gp", "ecx" ], out => [ "in_r1 !in_r2" ] },
reg_req => { in => [ "gp", "ecx" ],
out => [ "in_r1 !in_r2", "flags" ] },
ins => [ "val", "count" ],
outs => [ "res", "flags" ],
emit => '. sar %SB1, %S0',
units => [ "GP" ],
latency => 1,
......@@ -806,15 +828,16 @@ SarMem => {
l_SarDep => {
cmp_attr => "return 1;",
ins => [ "left", "right", "dep" ],
# value, cnt, dependency
ins => [ "val", "count", "dep" ],
arity => 3
},
Ror => {
irn_flags => "R",
reg_req => { in => [ "gp", "ecx" ], out => [ "in_r1 !in_r2" ] },
reg_req => { in => [ "gp", "ecx" ],
out => [ "in_r1 !in_r2", "flags" ] },
ins => [ "val", "count" ],
outs => [ "res", "flags" ],
emit => '. ror %SB1, %S0',
units => [ "GP" ],
latency => 1,
......@@ -836,8 +859,10 @@ RorMem => {
Rol => {
irn_flags => "R",
reg_req => { in => [ "gp", "ecx" ], out => [ "in_r1 !in_r2" ] },
reg_req => { in => [ "gp", "ecx" ],
out => [ "in_r1 !in_r2", "flags" ] },
ins => [ "val", "count" ],
outs => [ "res", "flags" ],
emit => '. rol %SB1, %S0',
units => [ "GP" ],
latency => 1,
......@@ -861,9 +886,11 @@ RolMem => {
Neg => {
irn_flags => "R",
reg_req => { in => [ "gp" ], out => [ "in_r1" ] },
reg_req => { in => [ "gp" ],
out => [ "in_r1", "flags" ] },
emit => '. neg %S0',
ins => [ "val" ],
outs => [ "res", "flags" ],
units => [ "GP" ],
latency => 1,
mode => $mode_gp,
......@@ -894,7 +921,9 @@ Minus64Bit => {
Inc => {
irn_flags => "R",
reg_req => { in => [ "gp" ], out => [ "in_r1" ] },
reg_req => { in => [ "gp" ],
out => [ "in_r1", "flags" ] },
outs => [ "res", "flags" ],
emit => '. inc %S0',
units => [ "GP" ],
mode => $mode_gp,
......@@ -916,7 +945,9 @@ IncMem => {
Dec => {
irn_flags => "R",
reg_req => { in => [ "gp" ], out => [ "in_r1" ] },
reg_req => { in => [ "gp" ],
out => [ "in_r1", "flags" ] },
outs => [ "res", "flags" ],
emit => '. dec %S0',
units => [ "GP" ],
mode => $mode_gp,
......@@ -938,12 +969,15 @@ DecMem => {
Not => {
irn_flags => "R",
reg_req => { in => [ "gp" ], out => [ "in_r1" ] },
reg_req => { in => [ "gp" ],
out => [ "in_r1", "flags" ] },
ins => [ "val" ],
outs => [ "res", "flags" ],
emit => '. not %S0',
units => [ "GP" ],
latency => 1,
mode => $mode_gp,
# no flags modified
},
NotMem => {
......@@ -955,6 +989,7 @@ NotMem => {
units => [ "GP" ],
latency => 1,
mode => "mode_M",
# no flags modified
},
# other operations
......
......@@ -1167,7 +1167,7 @@ static ir_node *gen_Mulh(ir_node *node)
ir_mode *mode = get_irn_mode(node);
ir_node *op1 = get_Mulh_left(node);
ir_node *op2 = get_Mulh_right(node);
ir_node *proj_EDX;
ir_node *proj_res_high;
ir_node *new_node;
ia32_address_mode_t am;
ia32_address_t *addr = &am.addr;
......@@ -1197,11 +1197,11 @@ static ir_node *gen_Mulh(ir_node *node)
fix_mem_proj(new_node, &am);
assert(pn_ia32_IMul1OP_EDX == pn_ia32_Mul_EDX);
proj_EDX = new_rd_Proj(dbgi, irg, block, new_node,
mode_Iu, pn_ia32_IMul1OP_EDX);
assert(pn_ia32_IMul1OP_res_high == pn_ia32_Mul_res_high);
proj_res_high = new_rd_Proj(dbgi, irg, block, new_node,
mode_Iu, pn_ia32_IMul1OP_res_high);
return proj_EDX;
return proj_res_high;
}
......@@ -3939,8 +3939,8 @@ static ir_node *gen_lowered_Store(ir_node *node, construct_store_func func)
static ir_node *gen_ia32_l_ShlDep(ir_node *node)
{
ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_left);
ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_right);
ir_node *left = get_irn_n(node, n_ia32_l_ShlDep_val);
ir_node *right = get_irn_n(node, n_ia32_l_ShlDep_count);
return gen_shift_binop(node, left, right, new_rd_ia32_Shl,
match_immediate | match_mode_neutral);
......@@ -3948,16 +3948,16 @@ static ir_node *gen_ia32_l_ShlDep(ir_node *node)
static ir_node *gen_ia32_l_ShrDep(ir_node *node)
{
ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_left);
ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_right);
ir_node *left = get_irn_n(node, n_ia32_l_ShrDep_val);
ir_node *right = get_irn_n(node, n_ia32_l_ShrDep_count);
return gen_shift_binop(node, left, right, new_rd_ia32_Shr,
match_immediate);
}
static ir_node *gen_ia32_l_SarDep(ir_node *node)
{
ir_node *left = get_irn_n(node, n_ia32_l_SarDep_left);
ir_node *right = get_irn_n(node, n_ia32_l_SarDep_right);
ir_node *left = get_irn_n(node, n_ia32_l_SarDep_val);
ir_node *right = get_irn_n(node, n_ia32_l_SarDep_count);
return gen_shift_binop(node, left, right, new_rd_ia32_Sar,
match_immediate);
}
......@@ -4142,16 +4142,16 @@ static ir_node *gen_lowered_64bit_shifts(ir_node *node, ir_node *high,
static ir_node *gen_ia32_l_ShlD(ir_node *node)
{
ir_node *high = get_irn_n(node, n_ia32_l_ShlD_high);
ir_node *low = get_irn_n(node, n_ia32_l_ShlD_low);
ir_node *high = get_irn_n(node, n_ia32_l_ShlD_val_high);
ir_node *low = get_irn_n(node, n_ia32_l_ShlD_val_low);
ir_node *count = get_irn_n(node, n_ia32_l_ShlD_count);
return gen_lowered_64bit_shifts(node, high, low, count);
}
static ir_node *gen_ia32_l_ShrD(ir_node *node)
{
ir_node *high = get_irn_n(node, n_ia32_l_ShrD_high);