ia32_x87.c 44.4 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
9
10
 * @file
 * @brief       This file implements the x87 support and virtual to stack
 *              register translation for the ia32 backend.
 * @author      Michael Beck
11
12
13
 */
#include <assert.h>

14
#include "bearch_ia32_t.h"
15
#include "beirg.h"
16
#include "beutil.h"
17
18
19
20
#include "irnode_t.h"
#include "irop_t.h"
#include "irprog.h"
#include "iredges_t.h"
21
#include "irgmod.h"
22
#include "ircons.h"
23
#include "irgwalk.h"
24
#include "irtools.h"
25
26
#include "obst.h"
#include "pmap.h"
27
#include "array.h"
28
#include "pdeq.h"
29
#include "debug.h"
Matthias Braun's avatar
Matthias Braun committed
30
#include "panic.h"
31

Matthias Braun's avatar
Matthias Braun committed
32
#include "belive.h"
33
34
#include "besched.h"
#include "benode.h"
35
#include "bessaconstr.h"
36
37
38
39
#include "ia32_new_nodes.h"
#include "gen_ia32_new_nodes.h"
#include "gen_ia32_regalloc_if.h"
#include "ia32_x87.h"
40
#include "ia32_architecture.h"
41

42
43
#define N_FLOAT_REGS  (N_ia32_fp_REGS-1)  // exclude NOREG

44
45
46
47
48
static bool is_x87_req(arch_register_req_t const *const req)
{
	return req->cls == &ia32_reg_classes[CLASS_ia32_fp];
}

49
50
51
52
53
54
static bool requested_x87_sim(ir_graph const *const irg)
{
	ia32_irg_data_t const *const irg_data = ia32_get_irg_data(irg);
	return irg_data->do_x87_sim;
}

55
/** the debug handle */
56
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
57

Sebastian Hack's avatar
Sebastian Hack committed
58
/* Forward declaration. */
59
typedef struct x87_simulator x87_simulator;
Sebastian Hack's avatar
Sebastian Hack committed
60

61
62
63
/**
 * An entry on the simulated x87 stack.
 */
64
typedef struct st_entry {
Matthias Braun's avatar
Matthias Braun committed
65
	unsigned reg_idx; /**< the virtual register index of this stack value */
Christoph Mallon's avatar
Christoph Mallon committed
66
	ir_node *node;    /**< the node that produced this value */
67
68
} st_entry;

69
70
71
/**
 * The x87 state.
 */
72
typedef struct x87_state {
73
	st_entry       st[N_FLOAT_REGS]; /**< the register stack */
Matthias Braun's avatar
Matthias Braun committed
74
	unsigned       depth;            /**< the current stack depth */
75
	x87_simulator *sim;              /**< The simulator. */
76
77
} x87_state;

Michael Beck's avatar
Michael Beck committed
78
79
80
81
82
83
/**
 * The type of an instruction simulator function.
 *
 * @param state  the x87 state
 * @param n      the node to be simulated
 */
84
typedef void (*sim_func)(x87_state *state, ir_node *n);
85
86
87
88

/**
 * A block state: Every block has a x87 state at the beginning and at the end.
 */
89
typedef struct blk_state {
Christoph Mallon's avatar
Christoph Mallon committed
90
91
	x87_state const *begin; /**< state at the begin or NULL if not assigned */
	x87_state const *end;   /**< state at the end or NULL if not assigned */
92
93
} blk_state;

94
95
/** liveness bitset for fp registers. */
typedef unsigned char fp_liveness;
Matthias Braun's avatar
Matthias Braun committed
96

97
98
99
/**
 * The x87 simulator.
 */
100
struct x87_simulator {
Christoph Mallon's avatar
Christoph Mallon committed
101
102
103
	struct obstack obst;       /**< An obstack for fast allocating. */
	pmap          *blk_states; /**< Map blocks to states. */
	be_lv_t       *lv;         /**< intrablock liveness. */
104
	fp_liveness   *live;       /**< Liveness information. */
Christoph Mallon's avatar
Christoph Mallon committed
105
106
	unsigned       n_idx;      /**< The cached get_irg_last_idx() result. */
	waitq         *worklist;   /**< Worklist of blocks that must be processed. */
Sebastian Hack's avatar
Sebastian Hack committed
107
};
108

Michael Beck's avatar
Michael Beck committed
109
/**
Michael Beck's avatar
BugFix:    
Michael Beck committed
110
 * Returns the current stack depth.
Michael Beck's avatar
Michael Beck committed
111
112
113
114
 *
 * @param state  the x87 state
 * @return the x87 stack depth
 */
Matthias Braun's avatar
Matthias Braun committed
115
static unsigned x87_get_depth(const x87_state *state)
Christoph Mallon's avatar
Christoph Mallon committed
116
{
Michael Beck's avatar
Michael Beck committed
117
	return state->depth;
118
}
119

Matthias Braun's avatar
Matthias Braun committed
120
static st_entry *x87_get_entry(x87_state *const state, unsigned const pos)
121
{
Matthias Braun's avatar
Matthias Braun committed
122
	assert(pos < state->depth);
123
	return &state->st[N_FLOAT_REGS - state->depth + pos];
124
125
}

126
127
128
129
130
131
132
133
134
135
136
137
/**
 * Return the node at st(pos).
 *
 * @param state  the x87 state
 * @param pos    a stack position
 * @return the node that produced the value at st(pos)
 */
static ir_node *x87_get_st_node(x87_state const *const state, unsigned const pos)
{
	return x87_get_entry((x87_state*)state, pos)->node;
}

138
/**
139
 * Return the virtual register index at st(pos).
Michael Beck's avatar
Michael Beck committed
140
141
142
 *
 * @param state  the x87 state
 * @param pos    a stack position
143
 * @return the fp register index that produced the value at st(pos)
144
 */
Matthias Braun's avatar
Matthias Braun committed
145
static unsigned x87_get_st_reg(const x87_state *state, unsigned pos)
Christoph Mallon's avatar
Christoph Mallon committed
146
{
147
	return x87_get_entry((x87_state*)state, pos)->reg_idx;
148
}
149

Matthias Braun's avatar
Matthias Braun committed
150
#ifdef DEBUG_libfirm
151
152
/**
 * Dump the stack for debugging.
Michael Beck's avatar
Michael Beck committed
153
154
 *
 * @param state  the x87 state
155
 */
Christoph Mallon's avatar
Christoph Mallon committed
156
157
static void x87_dump_stack(const x87_state *state)
{
Matthias Braun's avatar
Matthias Braun committed
158
	for (unsigned i = state->depth; i-- > 0;) {
Christoph Mallon's avatar
Christoph Mallon committed
159
160
		st_entry const *const entry = x87_get_entry((x87_state*)state, i);
		DB((dbg, LEVEL_2, "vf%d(%+F) ", entry->reg_idx, entry->node));
161
	}
162
	DB((dbg, LEVEL_2, "<-- TOS\n"));
163
}
164
#endif /* DEBUG_libfirm */
165
166

/**
167
 * Set a node to st(pos).
Michael Beck's avatar
Michael Beck committed
168
169
 *
 * @param state    the x87 state
170
 * @param node     the IR node that produces the value of the fp register
Michael Beck's avatar
Michael Beck committed
171
 * @param pos      the stack position where the new value should be entered
172
 */
173
static void x87_set_st(x87_state *const state, ir_node *const node, unsigned const pos)
Christoph Mallon's avatar
Christoph Mallon committed
174
{
175
	st_entry *const entry = x87_get_entry(state, pos);
176
	entry->reg_idx = arch_get_irn_register(node)->index;
177
	entry->node    = node;
178

Matthias Braun's avatar
Matthias Braun committed
179
	DB((dbg, LEVEL_2, "After SET_REG: "));
180
	DEBUG_ONLY(x87_dump_stack(state);)
181
}
182
183
184

/**
 * Swap st(0) with st(pos).
Michael Beck's avatar
Michael Beck committed
185
186
187
 *
 * @param state    the x87 state
 * @param pos      the stack position to change the tos with
188
 */
Matthias Braun's avatar
Matthias Braun committed
189
static void x87_fxch(x87_state *state, unsigned pos)
Christoph Mallon's avatar
Christoph Mallon committed
190
{
191
192
193
194
195
	st_entry *const a = x87_get_entry(state, pos);
	st_entry *const b = x87_get_entry(state, 0);
	st_entry  const t = *a;
	*a = *b;
	*b = t;
196

197
	DB((dbg, LEVEL_2, "After FXCH: "));
198
	DEBUG_ONLY(x87_dump_stack(state);)
199
}
200
201

/**
202
 * Convert a node to the stack index.
Michael Beck's avatar
Michael Beck committed
203
204
 *
 * @param state    the x87 state
205
206
207
 * @param node     the node to find
 * @return the stack position where the node is stacked
 *         or -1 if the node was not found
208
 */
209
static unsigned x87_on_stack(x87_state const *const state, ir_node const *const node)
Christoph Mallon's avatar
Christoph Mallon committed
210
{
211
	unsigned const reg_idx = arch_get_irn_register(node)->index;
Matthias Braun's avatar
Matthias Braun committed
212
	for (unsigned i = 0; i < state->depth; ++i) {
213
		if (x87_get_st_reg(state, i) == reg_idx)
214
			return i;
215
	}
Matthias Braun's avatar
Matthias Braun committed
216
	return (unsigned)-1;
217
}
218

219
220
221
222
223
224
static unsigned is_at_pos(x87_state const *const state, ir_node const *const val, unsigned const pos)
{
	arch_register_t const *const reg = arch_get_irn_register(val);
	return x87_get_st_reg(state, pos) == reg->index;
}

225
/**
226
 * Push a node onto the stack, double pushes are NOT allowed.
Michael Beck's avatar
Michael Beck committed
227
 *
228
 * @param state     the x87 state
229
 * @param node      the node that produces the value of the fp register
230
 */
231
static void x87_push(x87_state *const state, ir_node *const node)
Christoph Mallon's avatar
Christoph Mallon committed
232
{
233
	assert(requested_x87_sim(get_irn_irg(node)));
234
	assert(x87_on_stack(state, node) == (unsigned)-1 && "double push");
235
	assert(state->depth < N_FLOAT_REGS && "stack overrun");
236
237

	++state->depth;
238
	st_entry *const entry = x87_get_entry(state, 0);
239
	entry->reg_idx = arch_get_irn_register(node)->index;
240
	entry->node    = node;
241

Matthias Braun's avatar
Matthias Braun committed
242
243
	DB((dbg, LEVEL_2, "After PUSH: "));
	DEBUG_ONLY(x87_dump_stack(state);)
244
}
245
246
247

/**
 * Pop a virtual Register from the stack.
Michael Beck's avatar
Michael Beck committed
248
249
 *
 * @param state     the x87 state
250
 */
Christoph Mallon's avatar
Christoph Mallon committed
251
252
static void x87_pop(x87_state *state)
{
253
254
	assert(state->depth > 0 && "stack underrun");
	--state->depth;
Matthias Braun's avatar
Matthias Braun committed
255
256
	DB((dbg, LEVEL_2, "After POP: "));
	DEBUG_ONLY(x87_dump_stack(state);)
257
}
258

259
260
261
262
263
/**
 * Empty the fpu stack
 *
 * @param state     the x87 state
 */
Christoph Mallon's avatar
Christoph Mallon committed
264
265
static void x87_emms(x87_state *state)
{
266
267
268
	state->depth = 0;
}

269
270
/**
 * Returns the block state of a block.
Michael Beck's avatar
Michael Beck committed
271
272
273
274
 *
 * @param sim    the x87 simulator handle
 * @param block  the current block
 * @return the block state
275
 */
Christoph Mallon's avatar
Christoph Mallon committed
276
277
static blk_state *x87_get_bl_state(x87_simulator *sim, ir_node *block)
{
278
	blk_state *res = pmap_get(blk_state, sim->blk_states, block);
279
280
281
282
	if (res == NULL) {
		res = OALLOC(&sim->obst, blk_state);
		res->begin = NULL;
		res->end   = NULL;
283

284
		pmap_insert(sim->blk_states, block, res);
285
	}
286
	return res;
287
}
288
289
290

/**
 * Clone a x87 state.
Michael Beck's avatar
Michael Beck committed
291
292
293
294
 *
 * @param sim    the x87 simulator handle
 * @param src    the x87 state that will be cloned
 * @return a cloned copy of the src state
295
 */
Christoph Mallon's avatar
Christoph Mallon committed
296
297
static x87_state *x87_clone_state(x87_simulator *sim, const x87_state *src)
{
298
	x87_state *const res = OALLOC(&sim->obst, x87_state);
299
	*res = *src;
300
	return res;
301
}
302

Matthias Braun's avatar
Matthias Braun committed
303
static inline const arch_register_t *get_st_reg(unsigned index)
304
305
306
307
{
	return &ia32_registers[REG_ST0 + index];
}

308
/**
309
 * Create a fxch node before another node.
310
 *
311
312
313
 * @param state   the x87 state
 * @param n       the node after the fxch
 * @param pos     exchange st(pos) with st(0)
314
 */
Matthias Braun's avatar
Matthias Braun committed
315
static void x87_create_fxch(x87_state *state, ir_node *n, unsigned pos)
Christoph Mallon's avatar
Christoph Mallon committed
316
{
317
	x87_fxch(state, pos);
318

319
320
321
	ir_node               *const block = get_nodes_block(n);
	arch_register_t const *const reg   = get_st_reg(pos);
	ir_node               *const fxch  = new_bd_ia32_fxch(NULL, block, reg);
322

Michael Beck's avatar
Michael Beck committed
323
	keep_alive(fxch);
324

325
	sched_add_before(n, fxch);
326
	DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fxch), reg->name));
327
}
328

329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
static void move_to_pos(x87_state *const state, ir_node *const before, ir_node *const val, unsigned const to)
{
	unsigned const from = x87_on_stack(state, val);
	assert(from != (unsigned)-1);
	if (from != to) {
		if (from != 0)
			x87_create_fxch(state, before, from);
		if (to != 0)
			x87_create_fxch(state, before, to);
	}
}

static void move_to_tos(x87_state *const state, ir_node *const before, ir_node *const val)
{
	move_to_pos(state, before, val, 0);
}

346
347
/* -------------- x87 perm --------------- */

348
349
/**
 * Calculate the necessary permutations to reach dst_state.
350
351
352
353
354
355
356
357
358
359
360
 *
 * These permutations are done with fxch instructions and placed
 * at the end of the block.
 *
 * Note that critical edges are removed here, so we need only
 * a shuffle if the current block has only one successor.
 *
 * @param block      the current block
 * @param state      the current x87 stack state, might be modified
 * @param dst_state  destination state
 * @return state
361
 */
362
static x87_state *x87_shuffle(ir_node *block, x87_state *state, const x87_state *dst_state)
Matthias Braun's avatar
Matthias Braun committed
363
{
364
365
	assert(state->depth == dst_state->depth);

366
	/* Some mathematics here:
Christoph Mallon's avatar
Christoph Mallon committed
367
368
369
370
371
372
373
374
375
376
377
378
	 * If we have a cycle of length n that includes the tos,
	 * we need n-1 exchange operations.
	 * We can always add the tos and restore it, so we need
	 * n+1 exchange operations for a cycle not containing the tos.
	 * So, the maximum of needed operations is for a cycle of 7
	 * not including the tos == 8.
	 * This is the same number of ops we would need for using stores,
	 * so exchange is cheaper (we save the loads).
	 * On the other hand, we might need an additional exchange
	 * in the next block to bring one operand on top, so the
	 * number of ops in the first case is identical.
	 * Further, no more than 4 cycles can exists (4 x 2). */
Matthias Braun's avatar
Matthias Braun committed
379
	unsigned all_mask = (1 << (state->depth)) - 1;
380

Matthias Braun's avatar
Matthias Braun committed
381
382
383
384
	unsigned      cycles[4];
	unsigned char cycle_idx[4][8];
	unsigned      n_cycles;
	for (n_cycles = 0; all_mask != 0; ++n_cycles) {
385
		/* find the first free slot */
Matthias Braun's avatar
Matthias Braun committed
386
		unsigned i;
387
388
389
390
391
		for (i = 0; i < state->depth; ++i) {
			if (all_mask & (1 << i)) {
				all_mask &= ~(1 << i);

				/* check if there are differences here */
392
				if (x87_get_st_reg(state, i) != x87_get_st_reg(dst_state, i))
393
394
395
396
					break;
			}
		}

Matthias Braun's avatar
Matthias Braun committed
397
		if (all_mask == 0) {
Michael Beck's avatar
Michael Beck committed
398
			/* no more cycles found */
399
400
401
			break;
		}

Matthias Braun's avatar
Matthias Braun committed
402
		unsigned k = 0;
Michael Beck's avatar
Michael Beck committed
403
404
		cycles[n_cycles] = (1 << i);
		cycle_idx[n_cycles][k++] = i;
Matthias Braun's avatar
Matthias Braun committed
405
		for (unsigned src_idx = i, dst_idx; ; src_idx = dst_idx) {
406
			dst_idx = x87_on_stack(dst_state, x87_get_st_node(state, src_idx));
407
408
409
410

			if ((all_mask & (1 << dst_idx)) == 0)
				break;

Michael Beck's avatar
Michael Beck committed
411
412
			cycle_idx[n_cycles][k++] = dst_idx;
			cycles[n_cycles] |=  (1 << dst_idx);
413
414
			all_mask       &= ~(1 << dst_idx);
		}
Matthias Braun's avatar
Matthias Braun committed
415
		cycle_idx[n_cycles][k] = (unsigned char)-1;
416
417
	}

Matthias Braun's avatar
Matthias Braun committed
418
	if (n_cycles == 0) {
419
420
421
422
423
		/* no permutation needed */
		return state;
	}

	/* Hmm: permutation needed */
424
	DB((dbg, LEVEL_2, "\n%+F needs permutation: from\n", block));
425
	DEBUG_ONLY(x87_dump_stack(state);)
426
	DB((dbg, LEVEL_2, "                  to\n"));
427
	DEBUG_ONLY(x87_dump_stack(dst_state);)
428
429

#ifdef DEBUG_libfirm
Michael Beck's avatar
Michael Beck committed
430
	DB((dbg, LEVEL_2, "Need %d cycles\n", n_cycles));
Matthias Braun's avatar
Matthias Braun committed
431
	for (unsigned ri = 0; ri < n_cycles; ++ri) {
432
		DB((dbg, LEVEL_2, " Ring %d:\n ", ri));
Matthias Braun's avatar
Matthias Braun committed
433
		for (unsigned k = 0; cycle_idx[ri][k] != (unsigned char)-1; ++k)
Michael Beck's avatar
Michael Beck committed
434
			DB((dbg, LEVEL_2, " st%d ->", cycle_idx[ri][k]));
435
436
437
438
		DB((dbg, LEVEL_2, "\n"));
	}
#endif

Matthias Braun's avatar
Matthias Braun committed
439
	/* Find the place node must be insert.
440
	 * We have only one successor block, so the last instruction should
Matthias Braun's avatar
Matthias Braun committed
441
	 * be a jump. */
442
	ir_node *const before = sched_last(block);
443
444
	assert(is_cfop(before));

445
	/* now do the permutations */
Matthias Braun's avatar
Matthias Braun committed
446
447
	for (unsigned ri = 0; ri < n_cycles; ++ri) {
		if ((cycles[ri] & 1) == 0)
448
			x87_create_fxch(state, before, cycle_idx[ri][0]);
Matthias Braun's avatar
Matthias Braun committed
449
		for (unsigned k = 1; cycle_idx[ri][k] != (unsigned char)-1; ++k) {
450
			x87_create_fxch(state, before, cycle_idx[ri][k]);
451
		}
Matthias Braun's avatar
Matthias Braun committed
452
		if ((cycles[ri] & 1) == 0)
453
			x87_create_fxch(state, before, cycle_idx[ri][0]);
454
	}
455
	return state;
456
}
457

458
459
460
static ir_node *x87_create_fdup(x87_state *const state, ir_node *const block,
                                ir_node *const val,
                                arch_register_t const *const out)
461
{
462
463
464
	unsigned               const pos  = x87_on_stack(state, val);
	arch_register_t const *const reg  = get_st_reg(pos);
	ir_node               *const fdup = new_bd_ia32_fdup(NULL, block, val, reg);
465
	arch_set_irn_register(fdup, out);
466
	x87_push(state, fdup);
467
	DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fdup), reg->name));
468
469
470
	return fdup;
}

471
/**
472
 * Create an fdup before node @p n replacing operand @p op_n.
Michael Beck's avatar
Michael Beck committed
473
 *
474
 * @param state     the x87 state
475
476
 * @param n         the node after the fdup
 * @param val       the value to duplicate
477
 */
478
static ir_node *x87_dup_operand(x87_state *const state, ir_node *const n, unsigned const op_n, ir_node *const val, arch_register_t const *const out)
Christoph Mallon's avatar
Christoph Mallon committed
479
{
480
481
482
483
	ir_node *const block = get_nodes_block(n);
	ir_node *const fdup  = x87_create_fdup(state, block, val, out);
	sched_add_before(n, fdup);
	set_irn_n(n, op_n, fdup);
484
	return fdup;
485
}
486

Michael Beck's avatar
Michael Beck committed
487
488
/**
 * Create a fpop before node n.
489
 * This overwrites st(pos) with st(0) and pops st(0).
Michael Beck's avatar
Michael Beck committed
490
491
 *
 * @param state   the x87 state
492
 * @param n       the node after which to schedule the fpop
493
 * @param pos     the index of the entry to remove the register stack
Michael Beck's avatar
Michael Beck committed
494
495
 * @return the fpop node
 */
Matthias Braun's avatar
Matthias Braun committed
496
497
static ir_node *x87_create_fpop(x87_state *const state, ir_node *const n,
                                unsigned const pos)
Matthias Braun's avatar
Matthias Braun committed
498
{
499
500
501
502
503
	if (pos != 0) {
		st_entry *const dst = x87_get_entry(state, pos);
		st_entry *const src = x87_get_entry(state, 0);
		*dst = *src;
	}
504
	x87_pop(state);
505
506
507
508
509
	ir_node               *const block = get_block(n);
	arch_register_t const *const reg = get_st_reg(pos);
	ir_node *const fpop = pos == 0 && ia32_cg_config.use_ffreep ?
		new_bd_ia32_ffreep(NULL, block, reg) :
		new_bd_ia32_fpop(  NULL, block, reg);
510
511

	keep_alive(fpop);
512
	sched_add_after(n, fpop);
513
	DB((dbg, LEVEL_1, "<<< %s %s\n", get_irn_opname(fpop), reg->name));
Michael Beck's avatar
Michael Beck committed
514
	return fpop;
515
}
Michael Beck's avatar
Michael Beck committed
516

517
518
519
520
521
522
/* --------------------------------- liveness ------------------------------------------ */

/**
 * The liveness transfer function.
 * Updates a live set over a single step from a given node to its predecessor.
 * Everything defined at the node is removed from the set, the uses of the node get inserted.
Michael Beck's avatar
BugFix:    
Michael Beck committed
523
 *
524
525
526
 * @param irn      The node at which liveness should be computed.
 * @param live     The bitset of registers live before @p irn. This set gets modified by updating it to
 *                 the registers live after irn.
Michael Beck's avatar
BugFix:    
Michael Beck committed
527
 * @return The live bitset.
528
 */
529
static fp_liveness fp_liveness_transfer(ir_node *irn, fp_liveness live)
530
{
531
	const arch_register_class_t *cls = &ia32_reg_classes[CLASS_ia32_fp];
532
	be_foreach_definition(irn, cls, def, req,
Matthias Braun's avatar
Matthias Braun committed
533
		const arch_register_t *reg = arch_get_irn_register(def);
534
		live &= ~(1 << reg->index);
535
	);
536
	be_foreach_use(irn, cls, in_req_, op, op_req_,
Matthias Braun's avatar
Matthias Braun committed
537
		const arch_register_t *reg = arch_get_irn_register(op);
538
539
		live |= 1 << reg->index;
	);
540
	return live;
541
}
542
543
544

/**
 * Put all live virtual registers at the end of a block into a bitset.
Michael Beck's avatar
BugFix:    
Michael Beck committed
545
 *
Michael Beck's avatar
Michael Beck committed
546
 * @param sim      the simulator handle
Michael Beck's avatar
BugFix:    
Michael Beck committed
547
548
 * @param bl       the block
 * @return The live bitset at the end of this block
549
 */
550
static fp_liveness fp_liveness_end_of_block(x87_simulator *sim, const ir_node *block)
551
{
Matthias Braun's avatar
Matthias Braun committed
552
553
554
	fp_liveness                  live = 0;
	const arch_register_class_t *cls  = &ia32_reg_classes[CLASS_ia32_fp];
	const be_lv_t               *lv   = sim->lv;
555

556
	be_lv_foreach_cls(lv, block, be_lv_state_end, cls, node) {
Matthias Braun's avatar
Matthias Braun committed
557
		const arch_register_t *reg = arch_get_irn_register(node);
558
		live |= 1 << reg->index;
559
560
561
	}

	return live;
562
}
563

564
/** get the register mask from an arch_register */
565
#define REGMASK(reg)    (1 << (reg->index))
566

567
/**
568
 * Return a bitset of argument registers which are live at the end of a node.
Michael Beck's avatar
BugFix:    
Michael Beck committed
569
570
571
 *
 * @param sim    the simulator handle
 * @param pos    the node
572
 * @param kill   kill mask for the output registers
573
574
 * @return The live bitset.
 */
Matthias Braun's avatar
Matthias Braun committed
575
576
static fp_liveness fp_live_args_after(x87_simulator *sim, const ir_node *pos,
                                      fp_liveness kill)
577
{
Michael Beck's avatar
BugFix:    
Michael Beck committed
578
579
	unsigned idx = get_irn_idx(pos);
	assert(idx < sim->n_idx);
580
	return sim->live[idx] & ~kill;
581
}
582

Michael Beck's avatar
BugFix:    
Michael Beck committed
583
584
585
/**
 * Calculate the liveness for a whole block and cache it.
 *
Matthias Braun's avatar
Matthias Braun committed
586
587
 * @param sim   the simulator handle
 * @param block the block
Michael Beck's avatar
BugFix:    
Michael Beck committed
588
 */
Christoph Mallon's avatar
Christoph Mallon committed
589
590
static void update_liveness(x87_simulator *sim, ir_node *block)
{
591
	fp_liveness live = fp_liveness_end_of_block(sim, block);
592
593
594
	/* Now iterate through the block backward and cache the results.
	 * Stop at the first Phi: this produces the live-in. */
	sched_foreach_non_phi_reverse(block, irn) {
Matthias Braun's avatar
Matthias Braun committed
595
		unsigned idx = get_irn_idx(irn);
Michael Beck's avatar
BugFix:    
Michael Beck committed
596
		sim->live[idx] = live;
597

598
		live = fp_liveness_transfer(irn, live);
599
	}
Matthias Braun's avatar
Matthias Braun committed
600
	unsigned idx = get_irn_idx(block);
Michael Beck's avatar
BugFix:    
Michael Beck committed
601
	sim->live[idx] = live;
602
}
603
604

/**
605
 * Returns true if node is live in the set.
Michael Beck's avatar
Michael Beck committed
606
 *
607
608
 * @param node  the node to check
 * @param live  a live bitset
609
 */
610
static inline bool is_fp_live(ir_node const *const node, fp_liveness const live)
Matthias Braun's avatar
Matthias Braun committed
611
{
612
	unsigned const reg_idx = arch_get_irn_register(node)->index;
Matthias Braun's avatar
Matthias Braun committed
613
614
	return live & (1 << reg_idx);
}
615

616
617
#ifdef DEBUG_libfirm
/**
Michael Beck's avatar
Michael Beck committed
618
619
620
 * Dump liveness info.
 *
 * @param live  the live bitset
621
 */
622
static void fp_dump_live(fp_liveness live)
Christoph Mallon's avatar
Christoph Mallon committed
623
{
Matthias Braun's avatar
Matthias Braun committed
624
	DB((dbg, LEVEL_2, "Live after: "));
Matthias Braun's avatar
Matthias Braun committed
625
	for (unsigned i = 0; i < N_FLOAT_REGS; ++i) {
626
		if (live & (1 << i)) {
Matthias Braun's avatar
Matthias Braun committed
627
			DB((dbg, LEVEL_2, "vf%d ", i));
628
629
		}
	}
630
	DB((dbg, LEVEL_2, "\n"));
631
}
632
#endif /* DEBUG_libfirm */
633
634
635

/* --------------------------------- simulators ---------------------------------------- */

636
637
638
639
640
static ir_node *get_result_node(ir_node *const n)
{
	return get_irn_mode(n) != mode_T ? n : get_Proj_for_pn(n, pn_ia32_res);
}

641
/**
Michael Beck's avatar
Michael Beck committed
642
643
644
645
 * Simulate a virtual binop.
 *
 * @param state  the x87 state
 * @param n      the node that should be simulated (and patched)
646
 */
647
static void sim_binop(x87_state *const state, ir_node *const n)
Christoph Mallon's avatar
Christoph Mallon committed
648
{
649
650
651
652
653
	ir_node                     *op1            = get_irn_n(n, n_ia32_binary_left);
	ir_node               *const op2            = get_irn_n(n, n_ia32_binary_right);
	arch_register_t const *const out            = arch_get_irn_register_out(n, pn_ia32_res);
	fp_liveness            const live           = fp_live_args_after(state->sim, n, REGMASK(out));
	bool                   const op1_live_after = is_fp_live(op1, live);
654
655

	DB((dbg, LEVEL_1, ">>> %+F %+F, %+F -> %s\n", n, op1, op2, out->name));
656
	DEBUG_ONLY(fp_dump_live(live);)
Matthias Braun's avatar
Matthias Braun committed
657
	DB((dbg, LEVEL_1, "Stack before: "));
658
	DEBUG_ONLY(x87_dump_stack(state);)
659

660
661
662
663
664
665
666
667
668
669
670
671
672
673
	if (is_ia32_NoReg_FP(op2)) {
		/* Second operand is an address mode. */
		if (op1_live_after) {
			/* First operand is live: Duplicate it. */
			x87_dup_operand(state, n, n_ia32_binary_left, op1, out);
		} else {
			/* First operand is dead: Move it to tos. */
			move_to_tos(state, n, op1);
		}
		x87_set_st(state, get_result_node(n), 0);
	} else {
		bool       reverse;
		bool       to_reg;
		bool       pop            = false;
674
		bool const op2_live_after = is_fp_live(op2, live);
675
676
		if (op1_live_after) {
			if (op2_live_after) {
Michael Beck's avatar
Michael Beck committed
677
				/* Both operands are live: push the first one.
678
				 * This works even for op1 == op2. */
679
680
681
				op1     = x87_dup_operand(state, n, n_ia32_binary_left, op1, out);
				reverse = false;
				to_reg  = false;
Matthias Braun's avatar
Matthias Braun committed
682
			} else {
683
684
685
686
687
688
689
690
				/* First live, second dead: Overwrite second. */
				if (is_at_pos(state, op1, 0)) {
					reverse = false;
					to_reg  = true;
				} else {
					move_to_tos(state, n, op2);
					reverse = true;
					to_reg  = false;
691
692
				}
			}
693
		} else {
694
695
696
697
698
699
700
701
702
			if (op2_live_after) {
				/* First dead, Second live: Overwrite first. */
				if (is_at_pos(state, op2, 0)) {
					reverse = true;
					to_reg  = true;
				} else {
					move_to_tos(state, n, op1);
					reverse = false;
					to_reg  = false;
703
				}
704
			} else {
705
706
707
				/* Both dead. */
				if (op1 == op2) {
					/* Operands are identical: No pop. */
708
					move_to_tos(state, n, op1);
709
710
					reverse = false;
					to_reg  = false;
711
				} else {
712
713
714
715
716
717
718
719
720
721
722
723
724
725
					/* Bring one operand to tos. Heuristically swap the operand not at
					 * st(1) to tos. This way, if any operand was at st(1), the result
					 * will end up in the new st(0) after the implicit pop. If the next
					 * operation uses the result, then no fxch will be necessary. */
					if (is_at_pos(state, op1, 0)) {
						reverse = false;
					} else if (is_at_pos(state, op2, 0)) {
						reverse = true;
					} else if (is_at_pos(state, op1, 1)) {
						move_to_tos(state, n, op2);
						reverse = true;
					} else {
						move_to_tos(state, n, op1);
						reverse = false;
726
					}
727
728
					to_reg = true;
					pop    = true;
729
				}
730
			}
731
732
		}

733
734
735
736
737
738
739
		unsigned const op_reg = x87_on_stack(state, reverse ? op1 : op2);
		/* Patch the operation. */
		ia32_x87_attr_t *const attr = get_ia32_x87_attr(n);
		attr->reg               = get_st_reg(op_reg);
		attr->attr.ins_permuted = reverse;
		attr->res_in_reg        = to_reg;
		attr->pop               = pop;
740

741
742
743
744
		x87_set_st(state, get_result_node(n), to_reg ? op_reg : 0);
		if (pop)
			x87_pop(state);
	}
745
746

	DEBUG_ONLY(
747
748
749
750
751
752
753
754
		ia32_x87_attr_t const *const attr = get_ia32_x87_attr(n);
		char            const *const st0  = get_st_reg(0)->name;
		char            const *const reg  =  attr->reg ? attr->reg->name : "[AM]";
		char            const *const l    =  attr->attr.ins_permuted ? reg : st0;
		char            const *const r    = !attr->attr.ins_permuted ? reg : st0;
		char            const *const o    =  attr->res_in_reg        ? reg : st0;
		char            const *const pop  =  attr->pop ? " [pop]" : "";
		DB((dbg, LEVEL_1, "<<< %s %s, %s -> %s%s\n", get_irn_opname(n), l, r, o, pop));
755
	)
756
}
757
758

/**
Michael Beck's avatar
Michael Beck committed
759
760
761
762
 * Simulate a virtual Unop.
 *
 * @param state  the x87 state
 * @param n      the node that should be simulated (and patched)
763
 */
764
static void sim_unop(x87_state *state, ir_node *n)
Christoph Mallon's avatar
Christoph Mallon committed
765
{
Matthias Braun's avatar
Matthias Braun committed
766
767
	arch_register_t const *const out  = arch_get_irn_register(n);
	fp_liveness            const live = fp_live_args_after(state->sim, n, REGMASK(out));
768
	DB((dbg, LEVEL_1, ">>> %+F -> %s\n", n, out->name));
769
	DEBUG_ONLY(fp_dump_live(live);)
770

771
772
	ir_node *const val = get_irn_n(n, 0);
	if (is_fp_live(val, live)) {
773
		/* push the operand here */
774
		x87_dup_operand(state, n, 0, val, out);
775
	} else {
776
		/* operand is dead, bring it to tos */
777
		move_to_tos(state, n, val);
778
	}
779

780
	x87_set_st(state, n, 0);
781
	DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
782
}
783
784

/**
Michael Beck's avatar
Michael Beck committed
785
786
787
788
 * Simulate a virtual Load instruction.
 *
 * @param state  the x87 state
 * @param n      the node that should be simulated (and patched)
789
 */
790
static void sim_load(x87_state *state, ir_node *n)
Christoph Mallon's avatar
Christoph Mallon committed
791
{
Matthias Braun's avatar
Matthias Braun committed
792
793
794
	assert((unsigned)pn_ia32_fld_res == (unsigned)pn_ia32_fild_res
	    && (unsigned)pn_ia32_fld_res == (unsigned)pn_ia32_fld1_res
	    && (unsigned)pn_ia32_fld_res == (unsigned)pn_ia32_fldz_res);
795

796
797
	DB((dbg, LEVEL_1, ">>> %+F\n", n));
	x87_push(state, get_result_node(n));
798
	DB((dbg, LEVEL_1, "<<< %s -> %s\n", get_irn_opname(n), get_st_reg(0)->name));
799
}
800

801
/**
Michael Beck's avatar
Michael Beck committed
802
803
804
805
 * Simulate a virtual Store.
 *
 * @param state  the x87 state
 * @param n      the node that should be simulated (and patched)
806
 */
807
static void sim_store(x87_state *state, ir_node *n)
Christoph Mallon's avatar
Christoph Mallon committed
808
{
809
810
	ir_node *const val = get_irn_n(n, n_ia32_fst_val);
	DB((dbg, LEVEL_1, ">>> %+F %+F ->\n", n, val));
811

812
	fp_liveness const live = fp_live_args_after(state->sim, n, 0);
813
	if (!is_fp_live(val, live)) {
814
		/* we can only store the tos to memory */
815
		move_to_tos(state, n, val);
816
817
		goto do_pop;
	} else {
Christoph Mallon's avatar
Christoph Mallon committed
818
		ir_mode *const mode = get_ia32_ls_mode(n);
819
820
		assert(!mode_is_int(mode) || get_mode_size_bits(mode) <= 32);
		if (get_mode_size_bits(mode) > 64) {
821
822
823
			/* Problem: fst doesn't support 80bit modes. Code selection chooses
			 * an explicit fstp in this case which is fine, however if we create
			 * an 80bit fst because of a spill we may need some fixup here.
824
825
826
			 * Solution:
			 *   - stack not full: push value and fstp
			 *   - stack full: fstp value and load again */
827
			if (x87_get_depth(state) < N_FLOAT_REGS) {
828
				/* ok, we have a free register: push + fstp */
829
830
				arch_register_t const *const out = get_st_reg(REG_FP_FP_NOREG);
				x87_dup_operand(state, n, n_ia32_fst_val, val, out);
831
832
do_pop:
				x87_pop(state);