Commit 043fc8b6 authored by Matthias Braun's avatar Matthias Braun
Browse files

ia32: use smaller spillslots where possible, float spills mode_E by default

This should reduce memory requirements in several cases and at the same
time fix a long standing correctness issue where we spilled float values
with 64bit precision instead of the full 80bit on x87.
parent 5caa43b7
......@@ -167,12 +167,24 @@ static ir_entity *ia32_get_frame_entity(const ir_node *irn)
return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL;
}
static void ia32_set_frame_entity(ir_node *node, ir_entity *entity)
static void ia32_set_frame_entity(ir_node *node, ir_entity *entity,
const ir_type *type)
{
if (is_be_node(node))
be_node_set_frame_entity(node, entity);
else
set_ia32_frame_ent(node, entity);
set_ia32_frame_ent(node, entity);
ia32_attr_t *attr = get_ia32_attr(node);
/* set ls_mode based on entity unless we explicitely requested
* a certain mode */
if (attr->data.need_32bit_stackent || attr->data.need_64bit_stackent
|| is_ia32_Conv_I2I(node))
return;
ir_mode *mode = get_type_mode(type);
/** we 8bit stores have a special register requirement, so we can't simply
* change the ls_mode to 8bit here. The "hack" in
* ia32_collect_frame_entity_nodes() should take care that it never happens
*/
assert(!is_ia32_Store(node) || get_mode_size_bits(mode) > 8);
set_ia32_ls_mode(node, mode);
}
static void ia32_set_frame_offset(ir_node *irn, int bias)
......@@ -310,11 +322,15 @@ static int ia32_get_op_estimated_cost(ir_node const *const irn)
return cost;
}
static ir_mode *get_spill_mode(const ir_mode *mode)
static ir_mode *get_spill_mode(const ir_node *value)
{
if (mode_is_float(mode))
return precise_x87_spills ? ia32_mode_E : ia32_mode_float64;
return ia32_mode_gp;
/* determine a sensible spill mode and try to make it small */
const ir_node *skipped = skip_Proj_const(value);
if (is_ia32_fld(skipped) || is_ia32_Load(skipped))
return get_ia32_ls_mode(skipped);
ir_mode *mode = get_irn_mode(value);
return mode_is_float(mode) ? ia32_mode_E : ia32_mode_gp;
}
/**
......@@ -549,13 +565,12 @@ static void ia32_before_ra(ir_graph *irg)
static ir_node *ia32_new_spill(ir_node *value, ir_node *after)
{
ir_graph *irg = get_irn_irg(value);
ir_node *block = get_block(after);
ir_node *frame = get_irg_frame(irg);
ir_mode *value_mode = get_irn_mode(value);
ir_mode *mode = get_spill_mode(value_mode);
ir_node *noreg = ia32_new_NoReg_gp(irg);
ir_node *nomem = get_irg_no_mem(irg);
ir_graph *irg = get_irn_irg(value);
ir_node *block = get_block(after);
ir_node *frame = get_irg_frame(irg);
ir_mode *mode = get_spill_mode(value);
ir_node *noreg = ia32_new_NoReg_gp(irg);
ir_node *nomem = get_irg_no_mem(irg);
ir_node *res;
ir_node *store;
......@@ -591,16 +606,17 @@ static ir_node *ia32_new_reload(ir_node *value, ir_node *spill, ir_node *before)
ir_graph *irg = get_irn_irg(before);
ir_node *block = get_block(before);
ir_mode *mode = get_irn_mode(value);
ir_mode *spillmode = get_spill_mode(mode);
ir_mode *spillmode = get_spill_mode(value);
ir_node *noreg = ia32_new_NoReg_gp(irg);
ir_node *frame = get_irg_frame(irg);
ir_node *load;
ir_node *load;
if (mode_is_float(spillmode)) {
if (ia32_cg_config.use_sse2)
if (ia32_cg_config.use_sse2) {
load = new_bd_ia32_xLoad(NULL, block, frame, noreg, spill, spillmode);
else
} else {
load = new_bd_ia32_fld(NULL, block, frame, noreg, spill, spillmode);
}
} else if (get_mode_size_bits(spillmode) == 128) {
/* Reload 128 bit SSE registers */
load = new_bd_ia32_xxLoad(NULL, block, frame, noreg, spill);
......@@ -615,7 +631,6 @@ static ir_node *ia32_new_reload(ir_node *value, ir_node *spill, ir_node *before)
sched_add_before(before, load);
ir_node *proj = new_r_Proj(load, mode, pn_ia32_res);
return proj;
}
......@@ -819,6 +834,13 @@ static void ia32_collect_frame_entity_nodes(ir_node *node, void *data)
mode = mode_Ls;
} else {
mode = get_ia32_ls_mode(node);
/* stupid hack: in some situations (like reloads folded into ConvI2I
* with 8bit mode, an 8bit entity and reload+spill would suffice, but
* an 8bit store has special register requirements on ia32 which we may
* not be able to fulfill anymore at this point, so extend the spillslot
* size to 16bit :-( */
if (get_mode_size_bits(mode) == 8)
mode = mode_Hu;
}
ir_type *type = get_type_for_mode(mode);
be_load_needs_frame_entity(env, node, type);
......@@ -867,6 +889,7 @@ static void introduce_epilog(ir_node *ret)
/* pop ebp */
pop = new_bd_ia32_PopEbp(NULL, block, curr_mem, curr_sp);
set_ia32_ls_mode(pop, ia32_mode_gp);
curr_bp = new_r_Proj(pop, mode_gp, pn_ia32_PopEbp_res);
curr_sp = new_r_Proj(pop, mode_gp, pn_ia32_PopEbp_stack);
curr_mem = new_r_Proj(pop, mode_M, pn_ia32_Pop_M);
......@@ -910,9 +933,10 @@ static void introduce_prolog_epilog(ir_graph *irg)
ir_node *mem = get_irg_initial_mem(irg);
ir_node *noreg = ia32_new_NoReg_gp(irg);
ir_node *initial_bp = be_get_initial_reg_value(irg, bp);
ir_node *push = new_bd_ia32_Push(NULL, block, noreg, noreg, mem, initial_bp, initial_sp);
ir_node *push = new_bd_ia32_Push(NULL, block, noreg, noreg, mem,
initial_bp, initial_sp);
ir_node *curr_sp = new_r_Proj(push, mode_gp, pn_ia32_Push_stack);
ir_node *incsp;
set_ia32_ls_mode(push, ia32_mode_gp);
arch_set_irn_register(curr_sp, sp);
sched_add_after(start, push);
......@@ -926,7 +950,7 @@ static void introduce_prolog_epilog(ir_graph *irg)
be_set_constr_single_reg_out(curr_sp, 0, sp, arch_register_req_type_produces_sp);
edges_reroute_except(initial_bp, curr_bp, push);
incsp = be_new_IncSP(sp, block, curr_sp, frame_size, 0);
ir_node *incsp = be_new_IncSP(sp, block, curr_sp, frame_size, 0);
edges_reroute_except(initial_sp, incsp, push);
sched_add_after(curr_sp, incsp);
......
......@@ -101,6 +101,12 @@ static ir_node *create_fpu_mode_spill(void *env, ir_node *state, bool force,
return NULL;
}
static void set_32bit_stackent(ir_node *node)
{
ia32_attr_t *attr = get_ia32_attr(node);
attr->data.need_32bit_stackent = true;
}
static ir_node *create_fldcw_ent(ir_node *block, ir_entity *entity)
{
ir_graph *irg = get_irn_irg(block);
......@@ -114,6 +120,7 @@ static ir_node *create_fldcw_ent(ir_node *block, ir_entity *entity)
set_ia32_am_ent(reload, entity);
set_ia32_use_frame(reload);
arch_set_irn_register(reload, &ia32_registers[REG_FPCW]);
set_32bit_stackent(reload);
return reload;
}
......@@ -147,6 +154,7 @@ static ir_node *create_fpu_mode_reload(void *env, ir_node *state,
set_ia32_op_type(reload, ia32_AddrModeS);
set_ia32_ls_mode(reload, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
set_ia32_use_frame(reload);
set_32bit_stackent(reload);
arch_set_irn_register(reload, &ia32_registers[REG_FPCW]);
sched_add_before(before, reload);
......@@ -163,12 +171,14 @@ static ir_node *create_fpu_mode_reload(void *env, ir_node *state,
set_ia32_op_type(cwstore, ia32_AddrModeD);
set_ia32_ls_mode(cwstore, lsmode);
set_ia32_use_frame(cwstore);
set_32bit_stackent(cwstore);
sched_add_before(before, cwstore);
load = new_bd_ia32_Load(NULL, block, frame, noreg, cwstore);
set_ia32_op_type(load, ia32_AddrModeS);
set_ia32_ls_mode(load, lsmode);
set_ia32_use_frame(load);
set_32bit_stackent(load);
sched_add_before(before, load);
load_res = new_r_Proj(load, ia32_mode_gp, pn_ia32_Load_res);
......@@ -186,6 +196,7 @@ static ir_node *create_fpu_mode_reload(void *env, ir_node *state,
/* use ia32_mode_gp, as movl has a shorter opcode than movw */
set_ia32_ls_mode(store, ia32_mode_gp);
set_ia32_use_frame(store);
set_32bit_stackent(store);
store_proj = new_r_Proj(store, mode_M, pn_ia32_Store_M);
sched_add_before(before, store);
......@@ -193,6 +204,7 @@ static ir_node *create_fpu_mode_reload(void *env, ir_node *state,
set_ia32_op_type(fldcw, ia32_AddrModeS);
set_ia32_ls_mode(fldcw, lsmode);
set_ia32_use_frame(fldcw);
set_32bit_stackent(fldcw);
arch_set_irn_register(fldcw, &ia32_registers[REG_FPCW]);
sched_add_before(before, fldcw);
......
......@@ -2154,6 +2154,7 @@ static ir_node *gen_Load(ir_node *node)
new_mem, noreg_GP, mode);
} else {
new_node = new_bd_ia32_Load(dbgi, block, base, idx, new_mem);
mode = ia32_mode_gp;
}
}
int throws_exception = ir_throws_exception(node);
......@@ -3473,6 +3474,17 @@ static ir_node *gen_Mux(ir_node *node)
}
}
static void force_int_stackent(ir_node *node, ir_mode *mode)
{
ia32_attr_t *attr = get_ia32_attr(node);
if (get_mode_size_bits(mode) == 64) {
attr->data.need_64bit_stackent = true;
} else {
assert(get_mode_size_bits(mode) == 32);
attr->data.need_32bit_stackent = true;
}
}
/**
* Create a conversion from x87 state register to general purpose.
*/
......@@ -3498,11 +3510,12 @@ static ir_node *gen_x87_fp_to_gp(ir_node *node)
assert(get_mode_size_bits(mode) <= 32);
/* exception we can only store signed 32 bit integers, so for unsigned
we store a 64bit (signed) integer and load the lower bits */
ir_mode *ls_mode = ia32_mode_gp;
if (get_mode_size_bits(mode) == 32 && !mode_is_signed(mode)) {
set_ia32_ls_mode(fist, mode_Ls);
} else {
set_ia32_ls_mode(fist, ia32_mode_gp);
ls_mode = mode_Ls;
}
set_ia32_ls_mode(fist, ls_mode);
force_int_stackent(fist, ls_mode);
SET_IA32_ORIG_NODE(fist, node);
/* do a Load */
......@@ -3512,13 +3525,7 @@ static ir_node *gen_x87_fp_to_gp(ir_node *node)
set_ia32_use_frame(load);
set_ia32_op_type(load, ia32_AddrModeS);
set_ia32_ls_mode(load, ia32_mode_gp);
if (get_ia32_ls_mode(fist) == mode_Ls) {
ia32_attr_t *attr = get_ia32_attr(load);
attr->data.need_64bit_stackent = 1;
} else {
ia32_attr_t *attr = get_ia32_attr(load);
attr->data.need_32bit_stackent = 1;
}
force_int_stackent(load, ls_mode);
SET_IA32_ORIG_NODE(load, node);
return new_r_Proj(load, ia32_mode_gp, pn_ia32_Load_res);
......@@ -3613,15 +3620,19 @@ static void store_gp(dbg_info *dbgi, ia32_address_mode_t *am, ir_node *block,
add_ia32_am_offs_int(zero_store, 4);
set_ia32_ls_mode(zero_store, ia32_mode_gp);
arch_add_irn_flags(zero_store, arch_irn_flag_spill);
ia32_attr_t *zero_store_attr = get_ia32_attr(zero_store);
zero_store_attr->data.need_64bit_stackent = true;
in[0] = zero_store_mem;
in[1] = store_mem;
store_mem = new_rd_Sync(dbgi, new_block, 2, in);
store_mode = mode_Ls;
force_int_stackent(zero_store, store_mode);
} else {
store_mode = ia32_mode_gp;
}
force_int_stackent(store, store_mode);
memset(am, 0, sizeof(*am));
x86_address_t *addr = &am->addr;
......@@ -3651,6 +3662,8 @@ static ir_node *gen_x87_gp_to_fp(ir_node *node)
ir_node *fild = new_bd_ia32_fild(dbgi, new_block, addr->base,
addr->index, addr->mem);
ir_node *new_node = new_r_Proj(fild, mode_fp, pn_ia32_fild_res);
if (addr->use_frame && addr->entity == NULL)
force_int_stackent(fild, am.ls_mode);
set_am_attributes(fild, &am);
SET_IA32_ORIG_NODE(fild, node);
......@@ -3825,6 +3838,7 @@ static void store_fp(dbg_info *dbgi, ia32_address_mode_t *am, ir_node *block,
set_ia32_use_frame(fst);
set_ia32_op_type(fst, ia32_AddrModeD);
arch_add_irn_flags(fst, arch_irn_flag_spill);
force_int_stackent(fst, mode);
ir_node *mem = new_r_Proj(fst, mode_M, pn_ia32_fst_M);
memset(am, 0, sizeof(*am));
......@@ -3870,6 +3884,7 @@ static ir_node *gen_Bitcast(ir_node *const node)
const x86_address_t *addr = &am.addr;
ir_node *fld = new_bd_ia32_fld(dbgi, new_block, addr->base,
addr->index, addr->mem, dst_mode);
force_int_stackent(fld, dst_mode);
res = new_r_Proj(fld, mode_fp, pn_ia32_fld_res);
am.ls_mode = dst_mode;
......@@ -3882,6 +3897,7 @@ static ir_node *gen_Bitcast(ir_node *const node)
const x86_address_t *addr = &am.addr;
ir_node *ld = new_bd_ia32_Load(dbgi, new_block, addr->base, addr->index,
addr->mem);
force_int_stackent(ld, dst_mode);
res = new_r_Proj(ld, ia32_mode_gp, pn_ia32_Load_res);
am.ls_mode = dst_mode;
set_am_attributes(ld, &am);
......@@ -4205,6 +4221,8 @@ static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
set_ia32_ls_mode(store_high, ia32_mode_gp);
arch_add_irn_flags(store_low, arch_irn_flag_spill);
arch_add_irn_flags(store_high, arch_irn_flag_spill);
force_int_stackent(store_low, mode_Ls);
force_int_stackent(store_high, mode_Ls);
add_ia32_am_offs_int(store_high, 4);
ir_node *in[2] = { mem_low, mem_high };
......@@ -4216,6 +4234,7 @@ static ir_node *gen_ia32_l_LLtoFloat(ir_node *node)
set_ia32_use_frame(fild);
set_ia32_op_type(fild, ia32_AddrModeS);
set_ia32_ls_mode(fild, mode_Ls);
force_int_stackent(fild, mode_Ls);
SET_IA32_ORIG_NODE(fild, node);
......@@ -4271,6 +4290,7 @@ static ir_node *gen_ia32_l_FloattoLL(ir_node *node)
set_ia32_op_type(fist, ia32_AddrModeD);
set_ia32_ls_mode(fist, mode_Ls);
arch_add_irn_flags(fist, arch_irn_flag_spill);
force_int_stackent(fist, mode_Ls);
assert((long)pn_ia32_fist_M == (long) pn_ia32_fisttp_M);
return new_r_Proj(fist, mode_M, pn_ia32_fist_M);
......@@ -4292,10 +4312,7 @@ static ir_node *gen_Proj_l_FloattoLL(ir_node *node)
set_ia32_use_frame(load);
set_ia32_op_type(load, ia32_AddrModeS);
set_ia32_ls_mode(load, ia32_mode_gp);
/* we need a 64bit stackslot (fist stores 64bit) even though we only load
* 32 bit from it with this particular load */
ia32_attr_t *attr = get_ia32_attr(load);
attr->data.need_64bit_stackent = 1;
force_int_stackent(load, mode_Ls);
if (pn == pn_ia32_l_FloattoLL_res_high) {
add_ia32_am_offs_int(load, 4);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment