Commit c0a9a341 authored by Christoph Mallon's avatar Christoph Mallon
Browse files

amd64: Add peephole optzimization 'lea' -> 'add'.

parent 821ab9a4
......@@ -14,6 +14,8 @@ libFirm 1.22.1 (2016-01-07)
* Improve handling of negative overflow in float to int tarval conversion
* Improve matching of immediates during instruction selection (amd64)
* Add peephole optimization 'mov $0, %r' -> 'xorl %r, %r' (amd64)
* Add peephole optimization 'lea c(%r), %r' -> 'add $c, %r'
* Add peephole optimization 'lea (%r1, %r2), %r1' -> 'add %r2, %r1' and the commutated case (amd64)
* Bugfixes
libFirm 1.22.0 (2015-12-31)
......
......@@ -10,10 +10,80 @@
#include "amd64_optimize.h"
#include "amd64_new_nodes.h"
#include "amd64_transform.h"
#include "benode.h"
#include "bepeephole.h"
#include "besched.h"
#include "gen_amd64_regalloc_if.h"
#include "util.h"
static void make_add(ir_node *const node, size_t const n_in, ir_node *const *const in, arch_register_req_t const **const reqs, amd64_binop_addr_attr_t const *const attr, arch_register_t const *const oreg)
{
dbg_info *const dbgi = get_irn_dbg_info(node);
ir_node *const block = get_nodes_block(node);
ir_node *const add = new_bd_amd64_add(dbgi, block, n_in, in, reqs, attr);
arch_set_irn_register_req_out(add, 0, &amd64_requirement_gp_same_0);
sched_add_before(node, add);
ir_node *const res = be_new_Proj_reg(add, pn_amd64_add_res, oreg);
be_peephole_exchange(node, res);
}
static void peephole_amd64_lea(ir_node *const node)
{
if (be_peephole_get_value(REG_EFLAGS))
return;
arch_register_t const *const oreg = arch_get_irn_register_out(node, pn_amd64_lea_res);
amd64_addr_attr_t const *const attr = get_amd64_addr_attr_const(node);
amd64_addr_t const *const addr = &attr->addr;
if (addr->variant == X86_ADDR_BASE) {
/* lea c(%r), %r -> add $c, %r */
ir_node *const base = get_irn_n(node, addr->base_input);
if (oreg == arch_get_irn_register(base)) {
amd64_binop_addr_attr_t const add_attr = {
.base = {
.base = { .op_mode = AMD64_OP_REG_IMM, },
.size = attr->size,
.addr = {
.base_input = 0,
.variant = X86_ADDR_REG,
},
},
.u.immediate = addr->immediate,
};
ir_node *const in[] = { base };
make_add(node, ARRAY_SIZE(in), in, reg_reqs, &add_attr, oreg);
}
} else if (addr->variant == X86_ADDR_BASE_INDEX && addr->log_scale == 0 && !addr->immediate.entity && addr->immediate.offset == 0) {
ir_node *l;
ir_node *r;
ir_node *const base = get_irn_n(node, addr->base_input);
ir_node *const idx = get_irn_n(node, addr->index_input);
if (oreg == arch_get_irn_register(base)) {
/* lea (%r1, %r2), %r1 -> add %r2, %r1 */
l = base;
r = idx;
goto add_reg_reg;
} else if (oreg == arch_get_irn_register(idx)) {
/* lea (%r1, %r2), %r2 -> add %r1, %r2 */
l = idx;
r = base;
add_reg_reg:;
amd64_binop_addr_attr_t const add_attr = {
.base = {
.base = { .op_mode = AMD64_OP_REG_REG, },
.size = attr->size,
.addr = {
.base_input = 0,
.variant = X86_ADDR_REG,
},
},
};
ir_node *const in[] = { l, r };
make_add(node, ARRAY_SIZE(in), in, amd64_reg_reg_reqs, &add_attr, oreg);
}
}
}
static void peephole_amd64_mov_imm(ir_node *const node)
{
......@@ -38,6 +108,7 @@ static void peephole_amd64_mov_imm(ir_node *const node)
void amd64_peephole_optimization(ir_graph *const irg)
{
ir_clear_opcodes_generic_func();
register_peephole_optimization(op_amd64_lea, peephole_amd64_lea);
register_peephole_optimization(op_amd64_mov_imm, peephole_amd64_mov_imm);
be_peephole_opt(irg);
}
......@@ -107,7 +107,7 @@ const x86_asm_constraint_list_t amd64_asm_constraints = {
#define BIT(x) (1u << x)
static const arch_register_req_t amd64_requirement_gp_same_0 = {
arch_register_req_t const amd64_requirement_gp_same_0 = {
.cls = &amd64_reg_classes[CLASS_amd64_gp],
.should_be_same = BIT(0),
.width = 1,
......
......@@ -16,13 +16,14 @@
extern const x86_clobber_name_t amd64_additional_clobber_names[];
extern const x86_asm_constraint_list_t amd64_asm_constraints;
extern arch_register_req_t const *amd64_xmm_reqs[];
extern arch_register_req_t const amd64_requirement_gp_same_0;
extern arch_register_req_t const *amd64_xmm_reqs[];
extern arch_register_req_t const **const gp_am_reqs[];
extern arch_register_req_t const *reg_reqs[];
extern arch_register_req_t const *rsp_reg_mem_reqs[];
extern arch_register_req_t const *xmm_reg_mem_reqs[];
extern arch_register_req_t const *amd64_reg_reg_reqs[];
extern arch_register_req_t const *amd64_xmm_xmm_reqs[];
extern arch_register_req_t const *reg_reqs[];
extern arch_register_req_t const *rsp_reg_mem_reqs[];
extern arch_register_req_t const *xmm_reg_mem_reqs[];
extern arch_register_req_t const *amd64_reg_reg_reqs[];
extern arch_register_req_t const *amd64_xmm_xmm_reqs[];
void amd64_init_transform(void);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment