ia32_fpu.c 8.39 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

6
7
8
9
10
11
12
13
14
15
/**
 * @file
 * @brief   Handles fpu rounding modes
 * @author  Matthias Braun
 *
 * The problem we deal with here is that the x86 ABI says the user can control
 * the fpu rounding mode, which means that when we do some operations like float
 * to int conversion which are specified as truncation in the C standard we have
 * to spill, change and restore the fpu rounding mode between spills.
 */
16
#include "bearch_ia32_t.h"
17
18
#include "ia32_fpu.h"
#include "ia32_new_nodes.h"
19
#include "ia32_architecture.h"
20
21
22
23
24
25
26
#include "gen_ia32_regalloc_if.h"

#include "ircons.h"
#include "irgwalk.h"
#include "tv.h"
#include "array.h"

27
28
29
30
31
32
33
#include "bearch.h"
#include "besched.h"
#include "beabi.h"
#include "benode.h"
#include "bestate.h"
#include "beutil.h"
#include "bessaconstr.h"
34

35
36
37
38
39
40
static ir_entity *fpcw_round    = NULL;
static ir_entity *fpcw_truncate = NULL;

static ir_entity *create_ent(int value, const char *name)
{
	ir_mode   *mode = mode_Hu;
41
	ir_type   *type = new_type_primitive(mode);
42
	set_type_alignment_bytes(type, 4);
Matthias Braun's avatar
Matthias Braun committed
43
44
	ir_type   *glob = get_glob_type();
	ir_entity *ent = new_entity(glob, new_id_from_str(name), type);
45
	set_entity_ld_ident(ent, get_entity_ident(ent));
46
47
	set_entity_visibility(ent, ir_visibility_local);
	add_entity_linkage(ent, IR_LINKAGE_CONSTANT);
48

Matthias Braun's avatar
Matthias Braun committed
49
50
	ir_graph *cnst_irg = get_const_code_irg();
	ir_node  *cnst     = new_r_Const_long(cnst_irg, mode, value);
51
52
53
54
55
56
57
58
59
60
61
	set_atomic_ent_value(ent, cnst);

	return ent;
}

static void create_fpcw_entities(void)
{
	fpcw_round    = create_ent(0xc7f, "_fpcw_round");
	fpcw_truncate = create_ent(0x37f, "_fpcw_truncate");
}

Matthias Braun's avatar
Matthias Braun committed
62
static ir_node *create_fpu_mode_spill(void *env, ir_node *state, bool force,
63
64
                                      ir_node *after)
{
Matthias Braun's avatar
Matthias Braun committed
65
	(void)env;
66

67
	/* we don't spill the fpcw in unsafe mode */
68
	if (ia32_cg_config.use_unsafe_floatconv) {
69
		ir_node *block = get_nodes_block(state);
Matthias Braun's avatar
Matthias Braun committed
70
		if (force || !is_ia32_ChangeCW(state)) {
71
			ir_node *spill = new_bd_ia32_FnstCWNOP(NULL, block, state);
72
73
74
75
76
77
			sched_add_after(after, spill);
			return spill;
		}
		return NULL;
	}

Matthias Braun's avatar
Matthias Braun committed
78
79
80
81
82
83
84
	if (force || !is_ia32_ChangeCW(state)) {
		ir_graph *irg   = get_irn_irg(state);
		ir_node  *block = get_nodes_block(state);
		ir_node  *noreg = ia32_new_NoReg_gp(irg);
		ir_node  *nomem = get_irg_no_mem(irg);
		ir_node  *frame = get_irg_frame(irg);
		ir_node  *spill
85
			= new_bd_ia32_FnstCW(NULL, block, frame, noreg, nomem, state);
86
		set_ia32_op_type(spill, ia32_AddrModeD);
87
88
		/* use ia32_mode_gp, as movl has a shorter opcode than movw */
		set_ia32_ls_mode(spill, ia32_mode_gp);
89
90
		set_ia32_use_frame(spill);

91
		sched_add_after(skip_Proj(after), spill);
92
		return spill;
93
	}
94
	return NULL;
95
96
}

97
98
99
100
101
102
static void set_32bit_stackent(ir_node *node)
{
	ia32_attr_t *attr = get_ia32_attr(node);
	attr->data.need_32bit_stackent = true;
}

103
static ir_node *create_fldcw_ent(ir_node *block, ir_entity *entity)
104
{
Matthias Braun's avatar
Matthias Braun committed
105
106
107
108
	ir_graph *irg    = get_irn_irg(block);
	ir_node  *nomem  = get_irg_no_mem(irg);
	ir_node  *noreg  = ia32_new_NoReg_gp(irg);
	ir_node  *reload = new_bd_ia32_FldCW(NULL, block, noreg, noreg, nomem);
109
110
	set_ia32_op_type(reload, ia32_AddrModeS);
	set_ia32_ls_mode(reload, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
111
	set_ia32_am_ent(reload, entity);
112
	set_ia32_use_frame(reload);
113
	arch_set_irn_register(reload, &ia32_registers[REG_FPCW]);
114
	set_32bit_stackent(reload);
115
116
117
118

	return reload;
}

119
120
121
122
static ir_node *create_fpu_mode_reload(void *env, ir_node *state,
                                       ir_node *spill, ir_node *before,
                                       ir_node *last_state)
{
Matthias Braun's avatar
Matthias Braun committed
123
	(void)env;
124
125
126
127
128
	ir_graph *irg    = get_irn_irg(state);
	ir_node  *block  = get_nodes_block(before);
	ir_node  *frame  = get_irg_frame(irg);
	ir_node  *noreg  = ia32_new_NoReg_gp(irg);
	ir_node  *reload = NULL;
129

130
	if (ia32_cg_config.use_unsafe_floatconv) {
Matthias Braun's avatar
Matthias Braun committed
131
		if (fpcw_round == NULL)
132
			create_fpcw_entities();
133
		if (spill != NULL) {
134
			reload = create_fldcw_ent(block, fpcw_round);
135
		} else {
136
			reload = create_fldcw_ent(block, fpcw_truncate);
137
138
139
140
		}
		sched_add_before(before, reload);
		return reload;
	}
141

142
	if (spill != NULL) {
143
		reload = new_bd_ia32_FldCW(NULL, block, frame, noreg, spill);
144
145
146
		set_ia32_op_type(reload, ia32_AddrModeS);
		set_ia32_ls_mode(reload, ia32_reg_classes[CLASS_ia32_fp_cw].mode);
		set_ia32_use_frame(reload);
147
		set_32bit_stackent(reload);
148
		arch_set_irn_register(reload, &ia32_registers[REG_FPCW]);
149
150
151

		sched_add_before(before, reload);
	} else {
152
		ir_mode *lsmode = mode_Hu;
153
		ir_node *nomem  = get_irg_no_mem(irg);
154
		ir_node *cwstore, *load, *load_res, *orn, *store, *fldcw;
155
		ir_node *store_proj;
156
		ir_node *or_const;
157
158

		assert(last_state != NULL);
159
		cwstore = new_bd_ia32_FnstCW(NULL, block, frame, noreg, nomem,
160
		                             last_state);
161
162
163
		set_ia32_op_type(cwstore, ia32_AddrModeD);
		set_ia32_ls_mode(cwstore, lsmode);
		set_ia32_use_frame(cwstore);
164
		set_32bit_stackent(cwstore);
165
166
		sched_add_before(before, cwstore);

167
		load = new_bd_ia32_Load(NULL, block, frame, noreg, cwstore);
168
169
170
		set_ia32_op_type(load, ia32_AddrModeS);
		set_ia32_ls_mode(load, lsmode);
		set_ia32_use_frame(load);
171
		set_32bit_stackent(load);
172
173
		sched_add_before(before, load);

174
		load_res = new_r_Proj(load, ia32_mode_gp, pn_ia32_Load_res);
175
176

		/* TODO: make the actual mode configurable in ChangeCW... */
177
		or_const = new_bd_ia32_Immediate(NULL, get_irg_start_block(irg),
178
		                                 NULL, 0, 3072);
179
		arch_set_irn_register(or_const, &ia32_registers[REG_GP_NOREG]);
180
		orn = new_bd_ia32_Or(NULL, block, noreg, noreg, nomem, load_res,
181
		                    or_const);
182
		sched_add_before(before, orn);
183

184
		store = new_bd_ia32_Store(NULL, block, frame, noreg, nomem, orn);
185
		set_ia32_op_type(store, ia32_AddrModeD);
186
187
		/* use ia32_mode_gp, as movl has a shorter opcode than movw */
		set_ia32_ls_mode(store, ia32_mode_gp);
188
		set_ia32_use_frame(store);
189
		set_32bit_stackent(store);
190
		store_proj = new_r_Proj(store, mode_M, pn_ia32_Store_M);
191
192
		sched_add_before(before, store);

193
		fldcw = new_bd_ia32_FldCW(NULL, block, frame, noreg, store_proj);
194
195
196
		set_ia32_op_type(fldcw, ia32_AddrModeS);
		set_ia32_ls_mode(fldcw, lsmode);
		set_ia32_use_frame(fldcw);
197
		set_32bit_stackent(fldcw);
198
		arch_set_irn_register(fldcw, &ia32_registers[REG_FPCW]);
199
200
201
202
203
204
205
206
207
		sched_add_before(before, fldcw);

		reload = fldcw;
	}

	return reload;
}

typedef struct collect_fpu_mode_nodes_env_t {
Matthias Braun's avatar
Matthias Braun committed
208
	ir_node **state_nodes;
209
210
} collect_fpu_mode_nodes_env_t;

211
static void collect_fpu_mode_nodes_walker(ir_node *node, void *data)
212
{
213
	if (!mode_is_data(get_irn_mode(node)))
214
215
		return;

Matthias Braun's avatar
Matthias Braun committed
216
217
218
	collect_fpu_mode_nodes_env_t *env = (collect_fpu_mode_nodes_env_t*)data;
	const arch_register_t        *reg = arch_get_irn_register(node);
	if (reg == &ia32_registers[REG_FPCW] && !is_ia32_ChangeCW(node))
219
220
221
		ARR_APP1(ir_node*, env->state_nodes, node);
}

222
static void rewire_fpu_mode_nodes(ir_graph *irg)
223
224
{
	/* do ssa construction for the fpu modes */
Matthias Braun's avatar
Matthias Braun committed
225
	collect_fpu_mode_nodes_env_t env;
226
227
228
229
230
	env.state_nodes = NEW_ARR_F(ir_node*, 0);
	irg_walk_graph(irg, collect_fpu_mode_nodes_walker, NULL, &env);

	/* nothing needs to be done, in fact we must not continue as for endless
	 * loops noone is using the initial_value and it will point to a bad node
Matthias Braun's avatar
Matthias Braun committed
231
	 * now */
232
	if (ARR_LEN(env.state_nodes) == 0) {
233
234
235
236
		DEL_ARR_F(env.state_nodes);
		return;
	}

Matthias Braun's avatar
Matthias Braun committed
237
238
239
	const arch_register_t *reg = &ia32_registers[REG_FPCW];
	ir_node *initial_value = be_get_initial_reg_value(irg, reg);
	be_ssa_construction_env_t senv;
240
	be_ssa_construction_init(&senv, irg);
241
242
243
244
	be_ssa_construction_add_copies(&senv, env.state_nodes,
	                               ARR_LEN(env.state_nodes));
	be_ssa_construction_fix_users(&senv, initial_value);

Matthias Braun's avatar
Matthias Braun committed
245
	be_lv_t *lv = be_get_irg_liveness(irg);
246
	if (lv != NULL) {
247
248
		be_ssa_construction_update_liveness_phis(&senv, lv);
		be_liveness_update(lv, initial_value);
Matthias Braun's avatar
Matthias Braun committed
249
		for (size_t i = 0, len = ARR_LEN(env.state_nodes); i < len; ++i) {
250
251
			be_liveness_update(lv, env.state_nodes[i]);
		}
252
	} else {
253
		be_invalidate_live_sets(irg);
254
255
256
	}

	/* set registers for the phis */
Matthias Braun's avatar
Matthias Braun committed
257
258
	ir_node **phis = be_ssa_construction_get_new_phis(&senv);
	for (size_t i = 0, len = ARR_LEN(phis); i < len; ++i) {
259
		ir_node *phi = phis[i];
260
		arch_set_irn_register(phi, reg);
261
262
263
	}
	be_ssa_construction_destroy(&senv);
	DEL_ARR_F(env.state_nodes);
264

265
	be_invalidate_live_sets(irg);
266
267
}

268
void ia32_setup_fpu_mode(ir_graph *irg)
269
270
{
	/* do ssa construction for the fpu modes */
271
	rewire_fpu_mode_nodes(irg);
272
273

	/* ensure correct fpu mode for operations */
274
	be_assure_state(irg, &ia32_registers[REG_FPCW],
275
	                NULL, create_fpu_mode_spill, create_fpu_mode_reload);
276
}