Commit 3516aec5 authored by Matthias Braun's avatar Matthias Braun
Browse files

- fix x87 simulator not killing float copies that only move registers around,

	 when the operand is a constant
- workaround gas bugs which swaps fsubp,subrp, fdivp, fdivrp
parent 4953a88a
......@@ -3,9 +3,8 @@
* @author Christian Wuerdig
* $Id$
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#include <config.h>
#endif
#ifdef HAVE_MALLOC_H
......@@ -425,6 +424,161 @@ static void ia32_abi_dont_save_regs(void *self, pset *s)
pset_insert_ptr(s, env->isa->bp);
}
#if 0
static unsigned count_callee_saves(ia32_code_gen_t *cg)
{
unsigned callee_saves = 0;
int c, num_reg_classes;
arch_isa_if_t *isa;
num_reg_classes = arch_isa_get_n_reg_class(isa);
for(c = 0; c < num_reg_classes; ++c) {
int r, num_registers;
arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
num_registers = arch_register_class_n_regs(regclass);
for(r = 0; r < num_registers; ++r) {
arch_register_t *reg = arch_register_for_index(regclass, r);
if(arch_register_type_is(reg, callee_save))
callee_saves++;
}
}
return callee_saves;
}
static void create_callee_save_regprojs(ia32_code_gen_t *cg, ir_node *regparams)
{
int c, num_reg_classes;
arch_isa_if_t *isa;
long n = 0;
num_reg_classes = arch_isa_get_n_reg_class(isa);
cg->initial_regs = obstack_alloc(cg->obst,
num_reg_classes * sizeof(cg->initial_regs[0]));
for(c = 0; c < num_reg_classes; ++c) {
int r, num_registers;
ir_node **initial_regclass;
arch_register_class_t *regclass = arch_isa_get_reg_class(isa, c);
num_registers = arch_register_class_n_regs(regclass);
initial_regclass = obstack_alloc(num_registers * sizeof(initial_regclass[0]));
for(r = 0; r < num_registers; ++r) {
ir_node *proj;
arch_register_t *reg = arch_register_for_index(regclass, r);
if(!arch_register_type_is(reg, callee_save))
continue;
proj = new_r_Proj(irg, start_block, regparams, n);
be_set_constr_single_reg(regparams, n, reg);
arch_set_irn_register(cg->arch_env, proj, reg);
initial_regclass[r] = proj;
n++;
}
cg->initial_regs[c] = initial_regclass;
}
}
static void callee_saves_obstack_grow(ia32_code_gen_t *cg)
{
int c, num_reg_classes;
arch_isa_if_t *isa;
for(c = 0; c < num_reg_classes; ++c) {
int r, num_registers;
num_registers = arch_register_class_n_regs(regclass);
for(r = 0; r < num_registers; ++r) {
ir_node *proj;
arch_register_t *reg = arch_register_for_index(regclass, r);
if(!arch_register_type_is(reg, callee_save))
continue;
proj = cg->initial_regs[c][r];
obstack_ptr_grow(cg->obst, proj);
}
}
}
static unsigned count_parameters_in_regs(ia32_code_gen_t *cg)
{
return 0;
}
static void ia32_gen_prologue(ia32_code_gen_t *cg)
{
ir_graph *irg = cg->irg;
ir_node *start_block = get_irg_start_block(irg);
ir_node *sp;
ir_node *regparams;
int n_regparams_out;
/* Create the regparams node */
n_regparams_out = count_callee_saves(cg) + count_parameters_in_regs(cg);
regparams = be_new_RegParams(irg, start_block, n_regparams_out);
create_callee_save_regprojs(cg, regparams);
/* Setup the stack */
if(!omit_fp) {
ir_node *bl = get_irg_start_block(env->irg);
ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp);
ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp);
ir_node *noreg = ia32_new_NoReg_gp(cg);
ir_node *push;
/* push ebp */
push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem);
curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack);
*mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M);
/* the push must have SP out register */
arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
set_ia32_flags(push, arch_irn_flags_ignore);
/* move esp to ebp */
curr_bp = be_new_Copy(env->isa->bp->reg_class, env->irg, bl, curr_sp);
be_set_constr_single_reg(curr_bp, BE_OUT_POS(0), env->isa->bp);
arch_set_irn_register(env->aenv, curr_bp, env->isa->bp);
be_node_set_flags(curr_bp, BE_OUT_POS(0), arch_irn_flags_ignore);
/* beware: the copy must be done before any other sp use */
curr_sp = be_new_CopyKeep_single(env->isa->sp->reg_class, env->irg, bl, curr_sp, curr_bp, get_irn_mode(curr_sp));
be_set_constr_single_reg(curr_sp, BE_OUT_POS(0), env->isa->sp);
arch_set_irn_register(env->aenv, curr_sp, env->isa->sp);
be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore);
be_abi_reg_map_set(reg_map, env->isa->sp, curr_sp);
be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp);
}
sp = be_new_IncSP(sp, irg, start_block, initialsp, BE_STACK_FRAME_SIZE_EXPAND);
set_irg_frame(irg, sp);
}
static void ia32_gen_epilogue(ia32_code_gen_t *cg)
{
int n_callee_saves = count_callee_saves(cg);
int n_results_regs = 0;
int barrier_size;
ir_node *barrier;
ir_node *end_block = get_irg_end_block(irg);
ir_node **in;
/* We have to make sure that all reloads occur before the stack frame
gets destroyed, so we create a barrier for all callee-save and return
values here */
barrier_size = n_callee_saves + n_results_regs;
barrier = be_new_Barrier(irg, end_block, barrier_size,
/* simply remove the stack frame here */
curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK);
add_irn_dep(curr_sp, *mem);
}
#endif
/**
* Generate the routine prologue.
*
......
......@@ -110,6 +110,11 @@ typedef struct _ia32_code_gen_t {
ir_node *noreg_gp; /**< unique NoReg_GP node */
ir_node *noreg_vfp; /**< unique NoReg_VFP node */
ir_node *noreg_xmm; /**< unique NoReg_XMM node */
ir_node ***initial_regs; /**< proj nodes that represent the initial register
values initial_regs[regclass][reg] */
struct obstack *obst;
DEBUG_ONLY(firm_dbg_module_t *mod;) /**< debugging module */
} ia32_code_gen_t;
......
......@@ -937,12 +937,14 @@ static void emit_ia32_x87CondJmp(ia32_emit_env_t *env, const ir_node *node) {
switch (get_ia32_irn_opcode(node)) {
case iro_ia32_fcomrJmp:
pnc = get_inversed_pnc(pnc);
reg = attr->x87[0]->name;
case iro_ia32_fcomJmp:
default:
ia32_emit_cstring(env, "\tfucom ");
break;
case iro_ia32_fcomrpJmp:
pnc = get_inversed_pnc(pnc);
reg = attr->x87[0]->name;
case iro_ia32_fcompJmp:
ia32_emit_cstring(env, "\tfucomp ");
break;
......
......@@ -1560,6 +1560,9 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) {
# /_/\_\___//_/ |_| |_|\___/ \__,_|\__| |_| |_|\___/ \__,_|\___||___/ #
#------------------------------------------------------------------------#
# Note: gas is strangely buggy: fdivrp and fdivp as well as fsubrp and fsubp
# are swapped, we work this around in the emitter...
"fadd" => {
"op_flags" => "R",
"rd_constructor" => "NONE",
......@@ -1605,7 +1608,8 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) {
"rd_constructor" => "NONE",
"comment" => "x87 fp Sub: Sub(a, b) = a - b",
"reg_req" => { },
"emit" => '. fsubp %x87_binop',
# see note about gas bugs
"emit" => '. fsubrp %x87_binop',
},
"fsubr" => {
......@@ -1623,7 +1627,8 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) {
"irn_flags" => "R",
"comment" => "x87 fp SubR: SubR(a, b) = b - a",
"reg_req" => { },
"emit" => '. fsubrp %x87_binop',
# see note about gas bugs
"emit" => '. fsubp %x87_binop',
},
"fprem" => {
......@@ -1657,7 +1662,8 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) {
"rd_constructor" => "NONE",
"comment" => "x87 fp Div: Div(a, b) = a / b",
"reg_req" => { },
"emit" => '. fdivp %x87_binop',
# see note about gas bugs
"emit" => '. fdivrp %x87_binop',
},
"fdivr" => {
......@@ -1673,7 +1679,8 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) {
"rd_constructor" => "NONE",
"comment" => "x87 fp DivR: DivR(a, b) = b / a",
"reg_req" => { },
"emit" => '. fdivrp %x87_binop',
# see note about gas bugs
"emit" => '. fdivp %x87_binop',
},
"fabs" => {
......
......@@ -6,10 +6,9 @@
*
* $Id$
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */
#include <config.h>
#endif
#include <assert.h>
......@@ -36,11 +35,11 @@
#define N_x87_REGS 8
/* first and second binop index */
#define BINOP_IDX_1 2
#define BINOP_IDX_1 2
#define BINOP_IDX_2 3
/* the unop index */
#define UNOP_IDX 0
#define UNOP_IDX 0
/* the store val index */
#define STORE_VAL_IDX 2
......@@ -129,6 +128,7 @@ static int x87_get_depth(const x87_state *state) {
return state->depth;
}
#if 0
/**
* Check if the state is empty.
*
......@@ -139,6 +139,7 @@ static int x87_get_depth(const x87_state *state) {
static int x87_state_is_empty(const x87_state *state) {
return state->depth == 0;
}
#endif
/**
* Return the virtual register index at st(pos).
......@@ -176,7 +177,8 @@ static void x87_dump_stack(const x87_state *state) {
int i;
for (i = state->depth - 1; i >= 0; --i) {
DB((dbg, LEVEL_2, "vf%d ", x87_get_st_reg(state, i)));
DB((dbg, LEVEL_2, "vf%d(%+F) ", x87_get_st_reg(state, i),
x87_get_st_node(state, i)));
}
DB((dbg, LEVEL_2, "<-- TOS\n"));
} /* x87_dump_stack */
......@@ -868,6 +870,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
int op2_idx, op1_idx;
int out_idx, do_pop = 0;
ia32_attr_t *attr;
ir_node *patched_insn;
ir_op *dst;
x87_simulator *sim = state->sim;
const arch_register_t *op1 = x87_get_irn_register(sim, get_irn_n(n, BINOP_IDX_1));
......@@ -916,8 +919,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
out_idx = 0;
dst = tmpl->normal_op;
}
}
else {
} else {
/* Second operand is dead. */
if (is_vfp_live(arch_register_get_index(op1), live)) {
/* First operand is live: bring second to tos. */
......@@ -930,8 +932,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
/* now do fxxxr (tos = op X tos) */
out_idx = 0;
dst = tmpl->reverse_op;
}
else {
} else {
/* Both operands are dead here, pop them from the stack. */
if (op2_idx == 0) {
if (op1_idx == 0) {
......@@ -939,22 +940,19 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
/* here fxxx (tos = tos X tos) */
dst = tmpl->normal_op;
out_idx = 0;
}
else {
} else {
/* now do fxxxp (op = op X tos, pop) */
dst = tmpl->normal_pop_op;
do_pop = 1;
out_idx = op1_idx;
}
}
else if (op1_idx == 0) {
} else if (op1_idx == 0) {
assert(op1_idx != op2_idx);
/* now do fxxxrp (op = tos X op, pop) */
dst = tmpl->reverse_pop_op;
do_pop = 1;
out_idx = op2_idx;
}
else {
} else {
/* Bring the second on top. */
x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
if (op1_idx == op2_idx) {
......@@ -964,8 +962,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
/* use fxxx (tos = tos X tos) */
dst = tmpl->normal_op;
out_idx = 0;
}
else {
} else {
/* op2 is on tos now */
op2_idx = 0;
/* use fxxxp (op = op X tos, pop) */
......@@ -976,8 +973,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
}
}
}
}
else {
} else {
/* second operand is an address mode */
if (is_vfp_live(arch_register_get_index(op1), live)) {
/* first operand is live: push it here */
......@@ -986,8 +982,7 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
/* use fxxx (tos = tos X mem) */
dst = tmpl->normal_op;
out_idx = 0;
}
else {
} else {
/* first operand is dead: bring it to tos */
if (op1_idx != 0) {
x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
......@@ -1000,7 +995,8 @@ static int sim_binop(x87_state *state, ir_node *n, const exchange_tmpl *tmpl) {
}
}
x87_set_st(state, arch_register_get_index(out), x87_patch_insn(n, dst), out_idx);
patched_insn = x87_patch_insn(n, dst);
x87_set_st(state, arch_register_get_index(out), patched_insn, out_idx);
if (do_pop) {
x87_pop(state);
}
......@@ -1261,7 +1257,6 @@ static int sim_Phi(x87_state *state, ir_node *n, const arch_env_t *env) {
return 0;
} /* sim_Phi */
#define _GEN_BINOP(op, rev) \
static int sim_##op(x87_state *state, ir_node *n) { \
exchange_tmpl tmpl = { op_ia32_##op, op_ia32_##rev, op_ia32_##op##p, op_ia32_##rev##p }; \
......@@ -1362,8 +1357,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
op1_idx = 0;
dst = op_ia32_fcomJmp;
}
}
else {
} else {
/* second live, first operand is dead here, bring it to tos.
This means further, op1_idx != op2_idx. */
assert(op1_idx != op2_idx);
......@@ -1376,8 +1370,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
dst = op_ia32_fcompJmp;
pop_cnt = 1;
}
}
else {
} else {
/* second operand is dead */
if (is_vfp_live(arch_register_get_index(op1), live)) {
/* first operand is live: bring second to tos.
......@@ -1391,8 +1384,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
}
dst = op_ia32_fcomrpJmp;
pop_cnt = 1;
}
else {
} else {
/* both operands are dead here, check first for identity. */
if (op1_idx == op2_idx) {
/* identically, one pop needed */
......@@ -1415,8 +1407,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
}
dst = op_ia32_fcomppJmp;
pop_cnt = 2;
}
else if (op1_idx == 1) {
} else if (op1_idx == 1) {
/* good, first operand is already in the right place, move the second */
if (op2_idx != 0) {
/* bring the first on top */
......@@ -1425,8 +1416,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
}
dst = op_ia32_fcomrppJmp;
pop_cnt = 2;
}
else {
} else {
/* if one is already the TOS, we need two fxch */
if (op1_idx == 0) {
/* first one is TOS, move to st(1) */
......@@ -1436,8 +1426,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
op2_idx = 0;
dst = op_ia32_fcomrppJmp;
pop_cnt = 2;
}
else if (op2_idx == 0) {
} else if (op2_idx == 0) {
/* second one is TOS, move to st(1) */
x87_create_fxch(state, n, 1, BINOP_IDX_2);
op2_idx = 1;
......@@ -1445,8 +1434,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
op1_idx = 0;
dst = op_ia32_fcomrppJmp;
pop_cnt = 2;
}
else {
} else {
/* none of them is either TOS or st(1), 3 fxch needed */
x87_create_fxch(state, n, op2_idx, BINOP_IDX_2);
x87_create_fxch(state, n, 1, BINOP_IDX_2);
......@@ -1459,8 +1447,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
}
}
}
}
else {
} else {
/* second operand is an address mode */
if (is_vfp_live(arch_register_get_index(op1), live)) {
/* first operand is live: bring it to TOS */
......@@ -1469,8 +1456,7 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
op1_idx = 0;
}
dst = op_ia32_fcomJmp;
}
else {
} else {
/* first operand is dead: bring it to tos */
if (op1_idx != 0) {
x87_create_fxch(state, n, op1_idx, BINOP_IDX_1);
......@@ -1508,6 +1494,70 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
return 0;
} /* sim_fCondJmp */
static ir_node *create_Copy(x87_state *state, ir_node *n) {
x87_simulator *sim = state->sim;
ir_graph *irg = get_irn_irg(n);
dbg_info *n_dbg = get_irn_dbg_info(n);
ir_mode *mode = get_irn_mode(n);
ir_node *block = get_nodes_block(n);
ir_node *pred = get_irn_n(n, 0);
ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *) = NULL;
ir_node *res;
const arch_register_t *out;
const arch_register_t *op1;
ia32_attr_t *attr;
/* Do not copy constants, recreate them. */
switch (get_ia32_irn_opcode(pred)) {
case iro_ia32_fldz:
cnstr = new_rd_ia32_fldz;
break;
case iro_ia32_fld1:
cnstr = new_rd_ia32_fld1;
break;
case iro_ia32_fldpi:
cnstr = new_rd_ia32_fldpi;
break;
case iro_ia32_fldl2e:
cnstr = new_rd_ia32_fldl2e;
break;
case iro_ia32_fldl2t:
cnstr = new_rd_ia32_fldl2t;
break;
case iro_ia32_fldlg2:
cnstr = new_rd_ia32_fldlg2;
break;
case iro_ia32_fldln2:
cnstr = new_rd_ia32_fldln2;
break;
}
out = x87_get_irn_register(sim, n);
op1 = x87_get_irn_register(sim, pred);
if(cnstr != NULL) {
/* copy a constant */
res = (*cnstr)(n_dbg, irg, block, mode);
attr = get_ia32_attr(res);
attr->x87[2] = out = &ia32_st_regs[0];
} else {
int op1_idx = x87_on_stack(state, arch_register_get_index(op1));
res = new_rd_ia32_fpushCopy(n_dbg, irg, block, pred, mode);
attr = get_ia32_attr(res);
attr->x87[0] = op1 = &ia32_st_regs[op1_idx];
attr->x87[2] = out = &ia32_st_regs[0];
}
arch_set_irn_register(sim->env, res, out);
x87_push(state, arch_register_get_index(out), res);
DB((dbg, LEVEL_1, ">>> %+F -> %s\n", res, arch_register_get_name(out)));
return res;
}
/**
* Simulate a be_Copy.
*
......@@ -1515,158 +1565,87 @@ static int sim_fCondJmp(x87_state *state, ir_node *n) {
* @param n the node that should be simulated (and patched)
*/
static int sim_Copy(x87_state *state, ir_node *n) {
x87_simulator *sim;
ir_node *pred;
const arch_register_t *out;
const arch_register_t *op1;
ir_node *node, *next;
ia32_attr_t *attr;
int op1_idx, out_idx;
unsigned live;
ir_mode *mode = get_irn_mode(n);
if (mode_is_float(mode)) {
x87_simulator *sim = state->sim;
ir_node *pred = get_irn_n(n, 0);
const arch_register_t *out = x87_get_irn_register(sim, n);
const arch_register_t *op1 = x87_get_irn_register(sim, pred);
ir_node *node, *next;
ia32_attr_t *attr;
int op1_idx, out_idx;
unsigned live = vfp_live_args_after(sim, n, REGMASK(out));
ir_node *(*cnstr)(dbg_info *, ir_graph *, ir_node *, ir_mode *);
DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
arch_register_get_name(op1), arch_register_get_name(out)));
DEBUG_ONLY(vfp_dump_live(live));
if (!mode_is_float(mode))
return 0;
/* Do not copy constants, recreate them. */
switch (get_ia32_irn_opcode(pred)) {
case iro_ia32_fldz:
cnstr = new_rd_ia32_fldz;
break;
case iro_ia32_fld1:
cnstr = new_rd_ia32_fld1;
break;
case iro_ia32_fldpi:
cnstr = new_rd_ia32_fldpi;
break;
case iro_ia32_fldl2e:
cnstr = new_rd_ia32_fldl2e;
break;
case iro_ia32_fldl2t:
cnstr = new_rd_ia32_fldl2t;
break;
case iro_ia32_fldlg2:
cnstr = new_rd_ia32_fldlg2;
break;
case iro_ia32_fldln2:
cnstr = new_rd_ia32_fldln2;
break;
default:
goto no_constant;
}
sim = state->sim;
pred = get_irn_n(n, 0);
out = x87_get_irn_register(sim, n);
op1 = x87_get_irn_register(sim, pred);
live = vfp_live_args_after(sim, n, REGMASK(out));
/* copy a constant */
node = (*cnstr)(get_irn_dbg_info(n), get_irn_irg(n), get_nodes_block(n), mode);
arch_set_irn_register(sim->env, node, out);
DB((dbg, LEVEL_1, ">>> %+F %s -> %s\n", n,
arch_register_get_name(op1), arch_register_get_name(out)));
DEBUG_ONLY(vfp_dump_live(live));
x87_push(state, arch_register_get_index(out), node);
/* handle the infamous unknown value */
if (arch_register_get_index(op1) == REG_VFP_UKNWN) {
/* Matze: copies of unknowns should not happen (anymore) */
assert(0);
}
attr = get_ia32_attr(node);
attr->x87[2] = out = &ia32_st_regs[0];
op1_idx = x87_on_stack(state, arch_register_get_index(op1));