ia32_bearch.c 51.6 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
9
 * @file
 * @brief       This is the main ia32 firm backend driver.
 * @author      Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11
12
#include "ia32_bearch_t.h"

13
#include "be_t.h"
Matthias Braun's avatar
Matthias Braun committed
14
15
16
#include "beflags.h"
#include "begnuas.h"
#include "bemodule.h"
17
#include "bera.h"
Matthias Braun's avatar
Matthias Braun committed
18
19
20
21
#include "besched.h"
#include "bespillslots.h"
#include "bestack.h"
#include "beutil.h"
22
#include "bevarargs.h"
23
24
25
#include "gen_ia32_regalloc_if.h"
#include "ia32_architecture.h"
#include "ia32_emitter.h"
26
#include "ia32_encode.h"
27
28
29
#include "ia32_new_nodes.h"
#include "ia32_optimize.h"
#include "ia32_transform.h"
30
#include "ident_t.h"
Matthias Braun's avatar
Matthias Braun committed
31
#include "instrument.h"
32
#include "ircons.h"
33
#include "iredges_t.h"
Christian Würdig's avatar
Christian Würdig committed
34
#include "irgmod.h"
Christian Würdig's avatar
Christian Würdig committed
35
#include "irgopt.h"
Matthias Braun's avatar
Matthias Braun committed
36
#include "irgwalk.h"
37
#include "iropt_t.h"
Matthias Braun's avatar
Matthias Braun committed
38
39
#include "irtools.h"
#include "lc_opts_enum.h"
40
#include "lower_alloc.h"
Matthias Braun's avatar
Matthias Braun committed
41
#include "lower_builtins.h"
42
#include "lower_calls.h"
43
#include "lower_mode_b.h"
44
#include "lower_softfloat.h"
45
#include "lowering.h"
Matthias Braun's avatar
Matthias Braun committed
46
#include "panic.h"
47
#include "x86_x87.h"
48

49
50
pmap *ia32_tv_ent; /**< A map of entities that store const tarvals */

51
ir_mode *ia32_mode_fpcw;
52
53
54
55
ir_mode *ia32_mode_flags;
ir_mode *ia32_mode_gp;
ir_mode *ia32_mode_float64;
ir_mode *ia32_mode_float32;
56

57
static bool return_small_struct_in_regs;
58

59
typedef ir_node *(*create_const_node_func) (dbg_info *dbgi, ir_node *block);
60

61
/**
62
 * Used to create per-graph unique pseudo nodes.
63
 */
64
static inline ir_node *create_const(ir_graph *irg, ir_node **place,
65
                                    create_const_node_func func,
66
                                    const arch_register_t* reg)
67
{
68
	if (*place != NULL)
69
70
		return *place;

Matthias Braun's avatar
Matthias Braun committed
71
72
	ir_node *block = get_irg_start_block(irg);
	ir_node *res   = func(NULL, block);
73
	arch_set_irn_register(res, reg);
74
	*place = res;
75
76
77
	/* We need a keep edge on our cached nodes, so that following firm
	 * irgwalks will not miss them. */
	keep_alive(res);
78
79
80
	return res;
}

81
ir_node *ia32_new_NoReg_gp(ir_graph *irg)
82
{
83
84
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
	return create_const(irg, &irg_data->noreg_gp, new_bd_ia32_NoReg_GP,
85
	                    &ia32_registers[REG_GP_NOREG]);
86
87
}

88
ir_node *ia32_new_NoReg_fp(ir_graph *irg)
89
{
90
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
91
92
	return create_const(irg, &irg_data->noreg_fp, new_bd_ia32_NoReg_FP,
	                    &ia32_registers[REG_FP_NOREG]);
93
94
}

95
ir_node *ia32_new_NoReg_xmm(ir_graph *irg)
96
{
97
98
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
	return create_const(irg, &irg_data->noreg_xmm, new_bd_ia32_NoReg_XMM,
99
	                    &ia32_registers[REG_XMM_NOREG]);
100
101
}

102
ir_node *ia32_new_Fpu_truncate(ir_graph *irg)
103
{
104
105
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
	return create_const(irg, &irg_data->fpu_trunc_mode, new_bd_ia32_ChangeCW,
106
                        &ia32_registers[REG_FPCW]);
107
108
}

109
/**
110
 * Returns the admissible noreg register node for input register pos of node irn.
111
 */
112
static ir_node *ia32_get_admissible_noreg(ir_node *irn, int pos)
113
{
114
	ir_graph                  *irg = get_irn_irg(irn);
115
	const arch_register_req_t *req = arch_get_irn_register_req_in(irn, pos);
Matthias Braun's avatar
Matthias Braun committed
116
	if (req->cls == &ia32_reg_classes[CLASS_ia32_gp])
117
		return ia32_new_NoReg_gp(irg);
Matthias Braun's avatar
Matthias Braun committed
118

119
	if (ia32_cg_config.use_sse2) {
120
		return ia32_new_NoReg_xmm(irg);
121
	} else {
122
		return ia32_new_NoReg_fp(irg);
123
	}
124
125
}

126
static void ia32_set_frame_entity(ir_node *node, ir_entity *entity,
127
                                  unsigned size, unsigned po2align)
128
{
129
	ia32_attr_t *const attr = get_ia32_attr(node);
130
	attr->addr.immediate = (x86_imm32_t) {
131
		.kind   = X86_IMM_FRAMEENT,
132
		.entity = entity,
133
		.offset = attr->addr.immediate.offset,
134
135
	};
	assert(get_ia32_frame_use(node) != IA32_FRAME_USE_NONE);
136

sebastian.buchwald1's avatar
sebastian.buchwald1 committed
137
	/* set ls_mode based on entity unless we explicitly requested
138
	 * a certain mode */
139
	if (get_ia32_frame_use(node) != IA32_FRAME_USE_AUTO
140
	 || is_ia32_Cmp(node) || is_ia32_Conv_I2I(node))
141
		return;
142
143
	(void)po2align;
	attr->size = size == 12 ? X86_SIZE_80 : x86_size_from_bytes(size);
144
	/** 8bit stores have a special register requirement, so we can't simply
145
146
	 * change size to 8bit here. The "hack" in ia32_collect_frame_entity_nodes()
	 * should take care that it never happens. */
147
	assert(!is_ia32_Store(node) || attr->size > X86_SIZE_8);
148
149
}

150
static bool node_has_sp_base(ir_node const *const node)
151
{
152
153
154
155
	assert(is_ia32_irn(node));
	arch_register_t const *const base_reg
		= arch_get_irn_register_in(node, n_ia32_base);
	return base_reg == &ia32_registers[REG_ESP];
156
157
}

158
static void ia32_determine_frameoffset(ir_node *node, int sp_offset)
159
{
160
161
	if (!is_ia32_irn(node))
		return;
162

163
	ia32_attr_t *const attr = get_ia32_attr(node);
164
	if (attr->addr.immediate.kind == X86_IMM_FRAMEENT) {
165
#ifndef NDEBUG
166
		attr->old_frame_ent = attr->addr.immediate.entity;
167
#endif
168
169
170
171
		attr->addr.immediate.offset
			+= get_entity_offset(attr->addr.immediate.entity);
		attr->addr.immediate.entity  = NULL;
		attr->addr.immediate.kind    = X86_IMM_FRAMEOFFSET;
Matthias Braun's avatar
Matthias Braun committed
172
	}
173

174
	if (attr->addr.immediate.kind == X86_IMM_FRAMEOFFSET) {
175
		if (node_has_sp_base(node))
176
			attr->addr.immediate.offset += sp_offset;
177
178
179
180
181
		else {
			assert(arch_get_irn_register_in(node, n_ia32_base)
			       == &ia32_registers[REG_EBP]);
			/* we calculate offsets relative to the SP value at function begin,
			 * but EBP points after the saved old frame pointer */
182
			attr->addr.immediate.offset += IA32_REGISTER_SIZE;
183
		}
184
		attr->addr.immediate.kind = X86_IMM_VALUE;
Matthias Braun's avatar
Matthias Braun committed
185
	}
186
187
}

188
static void ia32_sp_sim(ir_node *const node, stack_pointer_state_t *state)
189
{
190
191
192
	/* Pop nodes modify the stack pointer before calculating destination
	 * address, so do this first */
	if (is_ia32_Pop(node) || is_ia32_PopMem(node)) {
193
194
		ia32_attr_t const *const attr = get_ia32_attr_const(node);
		state->offset -= x86_bytes_from_size(attr->size);
195
	}
196

197
198
	if (!state->no_change)
		ia32_determine_frameoffset(node, state->offset);
199

200
201
202
	if (is_ia32_Call(node)) {
		state->offset -= get_ia32_call_attr_const(node)->pop;
	} else if (is_ia32_Push(node)) {
203
204
		ia32_attr_t const *const attr = get_ia32_attr_const(node);
		state->offset += x86_bytes_from_size(attr->size);
205
206
207
208
209
	} else if (is_ia32_Leave(node) || is_ia32_CopyEbpEsp(node)) {
		state->offset        = 0;
		state->align_padding = 0;
	} else if (is_ia32_SubSP(node)) {
		state->align_padding = 0;
Sebastian Hack's avatar
Sebastian Hack committed
210
	}
211
212
}

213
int ia32_get_sp_change(ir_node *const node)
214
{
215
216
217
218
219
220
221
222
223
224
	if (be_is_IncSP(node))
		return -be_get_IncSP_offset(node);
	stack_pointer_state_t state = {
		.offset    = 160,
		.no_change = true,
	};
	ia32_sp_sim(node, &state);
	int res = 160 - state.offset;
	assert(-16 <= res && res <= 16);
	return res;
Sebastian Hack's avatar
Sebastian Hack committed
225
226
}

227
228
229
230
231
232
233
/**
 * Get the estimated cycle count for @p irn.
 *
 * @param irn  The node.
 *
 * @return     The estimated cycle count for this operation
 */
234
static unsigned ia32_get_op_estimated_cost(ir_node const *const irn)
235
{
236
237
238
	if (!is_ia32_irn(irn))
		return 1;

239
	if (is_ia32_CopyB_i(irn)) {
240
241
		unsigned const size = get_ia32_copyb_size(irn);
		return 20 + size * 4 / 3;
Christian Würdig's avatar
Christian Würdig committed
242
	}
243
244
245

	unsigned cost = get_ia32_latency(irn);

Christian Würdig's avatar
Christian Würdig committed
246
	/* in case of address mode operations add additional cycles */
247
	if (get_ia32_op_type(irn) != ia32_Normal) {
248
		if (get_ia32_frame_use(irn) != IA32_FRAME_USE_NONE || (
249
		      is_ia32_NoReg_GP(get_irn_n(irn, n_ia32_base)) &&
250
		      is_ia32_NoReg_GP(get_irn_n(irn, n_ia32_index)))) {
251
			/* Stack access, assume it is cached. */
252
253
			cost += 5;
		} else {
254
			/* Access probably elsewhere. */
255
256
			cost += 20;
		}
Christian Würdig's avatar
Christian Würdig committed
257
258
259
	}

	return cost;
260
261
}

262
/**
Christoph Mallon's avatar
Christoph Mallon committed
263
 * Check if irn can load its operand at position i from memory (source addressmode).
264
265
 * @param irn    The irn to be checked
 * @param i      The operands position
266
 * @return whether operand can be loaded
267
 */
268
static bool ia32_possible_memory_operand(const ir_node *irn, unsigned int i)
269
{
270
271
	if (!is_ia32_irn(irn)                    || /* must be an ia32 irn */
	    get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
272
	    get_ia32_frame_use(irn) != IA32_FRAME_USE_NONE) /* must not already use frame */
Matthias Braun's avatar
Matthias Braun committed
273
		return false;
274

275
	switch (get_ia32_am_support(irn)) {
Matthias Braun's avatar
cleanup    
Matthias Braun committed
276
	case ia32_am_none:
Matthias Braun's avatar
Matthias Braun committed
277
		return false;
Matthias Braun's avatar
cleanup    
Matthias Braun committed
278
279
280

	case ia32_am_unary:
		if (i != n_ia32_unary_op)
Matthias Braun's avatar
Matthias Braun committed
281
			return false;
Matthias Braun's avatar
cleanup    
Matthias Braun committed
282
		break;
283

Matthias Braun's avatar
cleanup    
Matthias Braun committed
284
285
286
287
	case ia32_am_binary:
		switch (i) {
		case n_ia32_binary_left: {
			if (!is_ia32_commutative(irn))
Matthias Braun's avatar
Matthias Braun committed
288
				return false;
289

Matthias Braun's avatar
cleanup    
Matthias Braun committed
290
291
292
			/* we can't swap left/right for limited registers
			 * (As this (currently) breaks constraint handling copies) */
			arch_register_req_t const *const req = arch_get_irn_register_req_in(irn, n_ia32_binary_left);
293
			if (req->limited != NULL)
Matthias Braun's avatar
Matthias Braun committed
294
				return false;
Matthias Braun's avatar
cleanup    
Matthias Braun committed
295
296
			break;
		}
297

Matthias Braun's avatar
cleanup    
Matthias Braun committed
298
		case n_ia32_binary_right:
299
			break;
300
301

		default:
Matthias Braun's avatar
Matthias Braun committed
302
			return false;
Matthias Braun's avatar
cleanup    
Matthias Braun committed
303
304
305
306
		}
		break;

	default:
307
		panic("unknown AM type");
308
	}
309
310

	/* HACK: must not already use "real" memory.
311
312
	 * This can happen for Call and Div. While we should be able to use Sync
	 * this currently confused the spillslot coalescing code. */
313
	if (!is_NoMem(get_irn_n(irn, n_ia32_mem)))
Matthias Braun's avatar
Matthias Braun committed
314
		return false;
315

316
317
318
319
320
321
322
323
324
325
	ir_node           *const op   = get_irn_n(irn, i);
	ir_node     const *const load = get_Proj_pred(op);
	ia32_attr_t const *const attr = get_ia32_attr_const(load);
	if (attr->size > X86_SIZE_64)
		return false;
	/* Don't do reload folding for x87 nodes for now, as we can't predict yet
	 * wether the spillslot must be widened to 80bit for which no AM operations
	 * exist. */
	if (is_ia32_fld(load))
		return false;
326

Matthias Braun's avatar
Matthias Braun committed
327
	return true;
328
329
}

330
static void ia32_perform_memory_operand(ir_node *irn, unsigned int i)
331
{
332
333
	if (!ia32_possible_memory_operand(irn, i))
		return;
334

335
336
337
338
339
340
341
342
	ir_node           *const op           = get_irn_n(irn, i);
	ir_node           *const load         = get_Proj_pred(op);
	x86_insn_size_t    const load_size    = get_ia32_attr_const(load)->size;
	ir_node           *const spill        = get_irn_n(load, n_ia32_mem);
	ia32_attr_t       *const attr         = get_ia32_attr(irn);
	x86_insn_size_t    const dest_op_size = attr->size;
	if (load_size <= dest_op_size)
		attr->size = load_size;
343
	set_ia32_op_type(irn, ia32_AddrModeS);
344
	set_ia32_frame_use(irn, IA32_FRAME_USE_AUTO);
345

346
347
	if (i == n_ia32_binary_left                    &&
	    get_ia32_am_support(irn) == ia32_am_binary &&
348
349
	    /* immediates are only allowed on the right side */
	    !is_ia32_Immediate(get_irn_n(irn, n_ia32_binary_right))) {
350
		ia32_swap_left_right(irn);
351
		i = n_ia32_binary_right;
352
	}
353

354
355
	assert(is_NoMem(get_irn_n(irn, n_ia32_mem)));

356
357
	set_irn_n(irn, n_ia32_base, get_irg_frame(get_irn_irg(irn)));
	set_irn_n(irn, n_ia32_mem,  spill);
358
	set_irn_n(irn, i,           ia32_get_admissible_noreg(irn, i));
359
	attr->addr.variant = X86_ADDR_BASE;
360
	set_ia32_is_reload(irn);
361
362
363
364
365
366
367

	/* kill the reload */
	assert(get_irn_n_edges(op) == 0);
	assert(get_irn_n_edges(load) == 1);
	sched_remove(load);
	kill_node(op);
	kill_node(load);
368
369
}

Matthias Braun's avatar
Matthias Braun committed
370
static bool gprof;
Michael Beck's avatar
Michael Beck committed
371

372
373
374
375
376
static ir_node *ia32_turn_back_dest_am(ir_node *node)
{
	typedef ir_node *construct_binop_func(
		dbg_info *db, ir_node *block,
		ir_node *base, ir_node *index, ir_node *mem,
377
		ir_node *op1, ir_node *op2, x86_insn_size_t size);
378

379
380
	x86_insn_size_t const size = get_ia32_attr_const(node)->size;
	bool is_8bit = size == X86_SIZE_8;
381

382
	construct_binop_func *func;
383
	switch (get_ia32_irn_opcode(node)) {
384
385
386
387
388
	case iro_ia32_AddMem: func = is_8bit ? new_bd_ia32_Add_8bit : new_bd_ia32_Add; break;
	case iro_ia32_AndMem: func = is_8bit ? new_bd_ia32_And_8bit : new_bd_ia32_And; break;
	case iro_ia32_OrMem:  func = is_8bit ? new_bd_ia32_Or_8bit  : new_bd_ia32_Or;  break;
	case iro_ia32_SubMem: func = is_8bit ? new_bd_ia32_Sub_8bit : new_bd_ia32_Sub; break;
	case iro_ia32_XorMem: func = is_8bit ? new_bd_ia32_Xor_8bit : new_bd_ia32_Xor; break;
389
	default: panic("cannot turn back DestAM for %+F", node);
390
391
	}

392
393
394
395
396
	dbg_info *const dbgi  = get_irn_dbg_info(node);
	ir_node  *const block = get_nodes_block(node);
	ir_node  *const base  = get_irn_n(node, n_ia32_base);
	ir_node  *const idx   = get_irn_n(node, n_ia32_index);
	ir_node  *const mem   = get_irn_n(node, n_ia32_mem);
397
	ir_node  *const load  = new_bd_ia32_Load(dbgi, block, base, idx, mem, size, false);
398
399
400
	ia32_copy_am_attrs(load, node);
	if (is_ia32_is_reload(node))
		set_ia32_is_reload(load);
401
	sched_add_before(node, load);
402
403
	ir_node *const load_res = be_new_Proj(load, pn_ia32_Load_res);
	ir_node *const load_mem = be_new_Proj(load, pn_ia32_Load_M);
404
405
406
407
408

	ir_graph *const irg      = get_irn_irg(node);
	ir_node  *const noreg    = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem    = get_irg_no_mem(irg);
	ir_node  *const operand  = get_irn_n(node, n_ia32_binary_left);
409
	ir_node  *const new_node = func(dbgi, block, noreg, noreg, nomem, load_res, operand, size);
410
411
	set_irn_mode(new_node, mode_T);

412
	arch_set_irn_register_out(new_node, pn_ia32_flags, &ia32_registers[REG_EFLAGS]);
413

414
	ir_node *const res_proj = be_new_Proj(new_node, pn_ia32_res);
415
416
	ir_node *const store    = is_8bit ? new_bd_ia32_Store_8bit(dbgi, block, base, idx, load_mem, res_proj, size)
	                                  : new_bd_ia32_Store(dbgi, block, base, idx, load_mem, res_proj, size);
417
418
	ia32_copy_am_attrs(store, node);
	set_ia32_op_type(store, ia32_AddrModeD);
419
	sched_add_after(node, store);
420

421
422
423
	ir_node *const mem_proj = get_Proj_for_pn(node, pn_ia32_M);
	set_Proj_pred(mem_proj, store);
	set_Proj_num(mem_proj, pn_ia32_Store_M);
424

425
426
	sched_replace(node, new_node);
	exchange(node, new_node);
427
428
429
	return new_node;
}

430
ir_node *ia32_turn_back_am(ir_node *node)
431
{
432
	ia32_attr_t *const attr = get_ia32_attr(node);
Matthias Braun's avatar
Matthias Braun committed
433
434
435
436
437
438
	dbg_info *dbgi     = get_irn_dbg_info(node);
	ir_graph *irg      = get_irn_irg(node);
	ir_node  *block    = get_nodes_block(node);
	ir_node  *base     = get_irn_n(node, n_ia32_base);
	ir_node  *idx      = get_irn_n(node, n_ia32_index);
	ir_node  *mem      = get_irn_n(node, n_ia32_mem);
439
440
	ir_node  *load     = new_bd_ia32_Load(dbgi, block, base, idx, mem,
	                                      attr->size, attr->sign_extend);
441
	ir_node  *load_res = be_new_Proj(load, pn_ia32_Load_res);
442
443

	ia32_copy_am_attrs(load, node);
444
445
	if (is_ia32_is_reload(node))
		set_ia32_is_reload(load);
446
	set_irn_n(node, n_ia32_mem, get_irg_no_mem(irg));
447

448
	switch (get_ia32_am_support(node)) {
Matthias Braun's avatar
cleanup    
Matthias Braun committed
449
450
451
	case ia32_am_unary:
		set_irn_n(node, n_ia32_unary_op, load_res);
		break;
452

Matthias Braun's avatar
cleanup    
Matthias Braun committed
453
454
455
456
457
458
459
	case ia32_am_binary:
		if (is_ia32_Immediate(get_irn_n(node, n_ia32_binary_right))) {
			set_irn_n(node, n_ia32_binary_left, load_res);
		} else {
			set_irn_n(node, n_ia32_binary_right, load_res);
		}
		break;
460

Matthias Braun's avatar
cleanup    
Matthias Braun committed
461
	default:
462
		panic("unknown AM type");
463
	}
Matthias Braun's avatar
Matthias Braun committed
464
	ir_node *noreg = ia32_new_NoReg_gp(irg);
465
	set_irn_n(node, n_ia32_base,  noreg);
466
	set_irn_n(node, n_ia32_index, noreg);
467
	attr->addr.immediate = (x86_imm32_t) { .kind = X86_IMM_VALUE, .offset = 0 };
468
	attr->addr.log_scale = 0;
469
	attr->frame_use      = IA32_FRAME_USE_NONE;
470
471

	/* rewire mem-proj */
Michael Beck's avatar
Michael Beck committed
472
	if (get_irn_mode(node) == mode_T) {
473
474
		foreach_out_edge(node, edge) {
			ir_node *out = get_edge_src_irn(edge);
475
476
			if (get_irn_mode(out) == mode_M) {
				set_Proj_pred(out, load);
477
				set_Proj_num(out, pn_ia32_Load_M);
478
				break;
479
480
481
482
483
			}
		}
	}

	set_ia32_op_type(node, ia32_Normal);
Michael Beck's avatar
Michael Beck committed
484
	if (sched_is_scheduled(node))
485
		sched_add_before(node, load);
486
487

	return load_res;
488
489
490
491
}

static ir_node *flags_remat(ir_node *node, ir_node *after)
{
492
	/* we should turn back address modes when rematerializing nodes */
Christoph Mallon's avatar
Christoph Mallon committed
493
	ir_node *const block = get_block(after);
494

Matthias Braun's avatar
Matthias Braun committed
495
	ia32_op_type_t type = get_ia32_op_type(node);
496
	switch (type) {
Matthias Braun's avatar
cleanup    
Matthias Braun committed
497
498
499
	case ia32_AddrModeS:
		ia32_turn_back_am(node);
		break;
500

Matthias Braun's avatar
cleanup    
Matthias Braun committed
501
	case ia32_AddrModeD:
502
503
		node = ia32_turn_back_dest_am(node);
		break;
504

Matthias Braun's avatar
Matthias Braun committed
505
506
507
	default:
		assert(type == ia32_Normal);
		break;
508
509
	}

Matthias Braun's avatar
Matthias Braun committed
510
	ir_node *copy = exact_copy(node);
511
	set_nodes_block(copy, block);
512
513
514
515
	sched_add_after(after, copy);
	return copy;
}

516
517
COMPILETIME_ASSERT((int)(n_ia32_Sub_minuend)    == (int)(n_ia32_Cmp_left) &&
                   (int)(n_ia32_Sub_subtrahend) == (int)(n_ia32_Cmp_right),
Matthias Braun's avatar
Matthias Braun committed
518
                   Cmp_and_Sub_operand_numbers_equal)
519

520
static bool ia32_try_replace_flags(ir_node *consumers, ir_node *flags, ir_node *available)
521
{
522
523
	if (!is_ia32_Sub(flags) && !is_ia32_Cmp(flags))
		return false;
524
525
526
527
528
529
	unsigned pn;
	if (is_ia32_Sub(available)) {
		pn = pn_ia32_Sub_flags;
	} else if (is_ia32_Cmp(available)) {
		pn = pn_ia32_Cmp_eflags;
	} else {
530
		return false;
531
	}
532
533
534
535
536
537
538
539
540
541
542
	/* Assuming CSE would have found the more obvious case */
	ir_node *const flags_left  = get_irn_n(flags,     n_ia32_binary_left);
	ir_node *const avail_right = get_irn_n(available, n_ia32_binary_right);
	if (flags_left != avail_right)
		return false;
	ir_node *const avail_left  = get_irn_n(available, n_ia32_binary_left);
	ir_node *const flags_right = get_irn_n(flags,     n_ia32_binary_right);
	if (avail_left != flags_right)
		return false;

	/* We can use available if we reverse the consumers' condition codes. */
543
	arch_set_irn_register_out(available, pn, &ia32_registers[REG_EFLAGS]);
544
	ir_node *const proj      = get_irn_mode(available) == mode_T ? be_get_or_make_Proj_for_pn(available, pn) : available;
545
546
547
548
549
550
	ir_mode *const flag_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
	for (ir_node *c = consumers; c != NULL; c = get_irn_link(c)) {
		x86_condition_code_t cc = get_ia32_condcode(c);
		set_ia32_condcode(c, x86_invert_condition_code(cc));

		foreach_irn_in(c, i, in) {
551
			if (get_irn_mode(in) == flag_mode)
552
				set_irn_n(c, i, proj);
553
554
		}
	}
555
	return true;
556
557
}

558
static void remat_simplifier(ir_node *node, void *env)
559
560
561
{
	(void)env;

562
563
	/* A Sub with unused result is a Cmp. */
	if (is_ia32_Sub(node) && get_irn_mode(node) == mode_T) {
564
		ir_node *projs[] = { [pn_ia32_Sub_M] = NULL };
565
		foreach_out_edge(node, out) {
566
567
568
569
570
			ir_node *const proj = get_edge_src_irn(out);
			unsigned const num  = get_Proj_num(proj);
			assert(num < ARRAY_SIZE(projs));
			assert(!projs[num] && "duplicate Proj");
			projs[num] = proj;
571
		}
572

573
574
575
576
577
578
579
580
581
582
		ir_node       *res_keep = NULL;
		ir_node *const sub_res  = projs[pn_ia32_Sub_res];
		if (sub_res) {
			foreach_out_edge(sub_res, out) {
				ir_node *const user = get_edge_src_irn(out);
				if (be_is_Keep(user)) {
					assert(!res_keep && "Proj has two be_Keep");
					res_keep = user;
				} else {
					return;
583
				}
584
			}
585
		}
586

587
588
589
590
591
592
593
		dbg_info *const dbgi    = get_irn_dbg_info(node);
		ir_node  *const block   = get_nodes_block(node);
		ir_node  *const base    = get_irn_n(node, n_ia32_Sub_base);
		ir_node  *const idx     = get_irn_n(node, n_ia32_Sub_index);
		ir_node  *const mem     = get_irn_n(node, n_ia32_Sub_mem);
		ir_node  *const minu    = get_irn_n(node, n_ia32_Sub_minuend);
		ir_node  *const subt    = get_irn_n(node, n_ia32_Sub_subtrahend);
594
595
596
597
		x86_insn_size_t const size = get_ia32_attr_const(node)->size;
		bool            is_8bit = size == X86_SIZE_8;
		ir_node        *cmp     = is_8bit ? new_bd_ia32_Cmp_8bit(dbgi, block, base, idx, mem, minu, subt, size, false)
		                                  : new_bd_ia32_Cmp(dbgi, block, base, idx, mem, minu, subt, size, false);
598
599
600
601
602
603
604
605
606
607
608
609
610
		arch_set_irn_register(cmp, &ia32_registers[REG_EFLAGS]);
		ia32_copy_am_attrs(cmp, node);

		sched_replace(node, cmp);

		if (get_ia32_op_type(node) == ia32_AddrModeS) {
			set_ia32_op_type(cmp, ia32_AddrModeS);
			set_irn_mode(cmp, mode_T);

			ir_node *const sub_mem = projs[pn_ia32_Sub_M];
			if (sub_mem) {
				ir_node *const proj_M = be_new_Proj(cmp, pn_ia32_Cmp_M);
				exchange(sub_mem, proj_M);
611
612
			}

613
614
615
			cmp = be_new_Proj(cmp, pn_ia32_Cmp_eflags);
		} else {
			assert(get_ia32_op_type(node) == ia32_Normal);
616
617
		}

618
619
620
621
622
623
624
625
		exchange(projs[pn_ia32_Sub_flags], cmp);

		if (res_keep) {
			sched_remove(res_keep);
			remove_keep_alive(res_keep);
			kill_node(res_keep);
		}
		kill_node(node);
626
	}
627
628
629
630
}

static void simplify_remat_nodes(ir_graph *irg)
{
631
632
	irg_walk_graph(irg, remat_simplifier, NULL, NULL);
	remove_End_Bads_and_doublets(get_irg_end(irg));
633
634
}

635
static ir_node *ia32_new_spill(ir_node *value, ir_node *after)
636
{
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
	ir_graph       *irg   = get_irn_irg(value);
	ir_node        *block = get_block(after);
	ir_node        *frame = get_irg_frame(irg);
	ir_node        *noreg = ia32_new_NoReg_gp(irg);
	ir_node        *nomem = get_irg_no_mem(irg);

	// FIXME: Find a way to not duplicate logic with ia32_new_reload()
	arch_register_req_t   const *const req  = arch_get_irn_register_req(value);
	arch_register_class_t const *const cls  = req->cls;
	ir_node               const *const skip = skip_Proj_const(value);
	ir_node         *res;
	ir_node         *store;
	if (cls == &ia32_reg_classes[CLASS_ia32_gp]) {
		x86_insn_size_t size = X86_SIZE_32;
		if (is_ia32_Load(skip))
			size = get_ia32_attr_const(skip)->size;

		store = size == X86_SIZE_8
			? new_bd_ia32_Store_8bit(NULL, block, frame, noreg, nomem, value, size)
			: new_bd_ia32_Store     (NULL, block, frame, noreg, nomem, value, size);
657
		res   = be_new_Proj(store, pn_ia32_Store_M);
658
659
660
661
662
663
664
665
666
667
668
669
	} else if (cls == &ia32_reg_classes[CLASS_ia32_fp]) {
		x86_insn_size_t size  = X86_SIZE_80;
		if (is_ia32_fld(skip))
			size = get_ia32_attr_const(skip)->size;
		store = new_bd_ia32_fst(NULL, block, frame, noreg, nomem, value, size);
		res   = be_new_Proj(store, pn_ia32_fst_M);
	} else {
		assert(cls == &ia32_reg_classes[CLASS_ia32_xmm]);
		// TODO: find out when we can use xStore and only store 64bit
		store = new_bd_ia32_xxStore(NULL, block, frame, noreg, nomem, value,
		                            X86_SIZE_128);
		res   = be_new_Proj(store, pn_ia32_xxStore_M);
Christian Würdig's avatar
Christian Würdig committed
670
	}
671

672
673
	ia32_attr_t *const attr = get_ia32_attr(store);
	attr->addr.variant = X86_ADDR_BASE;
674
	set_ia32_op_type(store, ia32_AddrModeD);
675
	set_ia32_frame_use(store, IA32_FRAME_USE_AUTO);
676
	set_ia32_is_spill(store);
677
	sched_add_after(after, store);
Christian Würdig's avatar
Christian Würdig committed
678

679
680
681
682
683
	return res;
}

static ir_node *ia32_new_reload(ir_node *value, ir_node *spill, ir_node *before)
{
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
	ir_graph *const irg   = get_irn_irg(before);
	ir_node  *const block = get_block(before);
	ir_node  *const noreg = ia32_new_NoReg_gp(irg);
	ir_node  *const frame = get_irg_frame(irg);

	// FIXME: Find a way to not duplicate logic with ia32_new_spill()
	arch_register_req_t   const *const req  = arch_get_irn_register_req(value);
	arch_register_class_t const *const cls  = req->cls;
	ir_node               const *const skip = skip_Proj_const(value);
	ir_node        *load;
	if (cls == &ia32_reg_classes[CLASS_ia32_gp]) {
		x86_insn_size_t size = X86_SIZE_32;
		if (is_ia32_Load(skip))
			size = get_ia32_attr_const(skip)->size;
		load = new_bd_ia32_Load(NULL, block, frame, noreg, spill, size, false);
	} else if (cls == &ia32_reg_classes[CLASS_ia32_fp]) {
		x86_insn_size_t size = X86_SIZE_80;
		if (is_ia32_fld(skip))
			size = get_ia32_attr_const(skip)->size;
		load = new_bd_ia32_fld(NULL, block, frame, noreg, spill, size);
704
	} else {
705
706
707
708
709
		assert(cls == &ia32_reg_classes[CLASS_ia32_xmm]);
		// TODO: find out when we can use xLoad and only load 64bit
		/* Reload 128 bit SSE registers */
		load = new_bd_ia32_xxLoad(NULL, block, frame, noreg, spill,
		                          X86_SIZE_128);
710
	}
711
712
	ia32_attr_t *const attr = get_ia32_attr(load);
	attr->addr.variant = X86_ADDR_BASE;
713
	set_ia32_op_type(load, ia32_AddrModeS);
714
	set_ia32_frame_use(load, IA32_FRAME_USE_AUTO);
715
716
717
718
	set_ia32_is_reload(load);
	arch_add_irn_flags(load, arch_irn_flag_reload);
	sched_add_before(before, load);

719
	return be_new_Proj(load, pn_ia32_res);
Christian Würdig's avatar
Christian Würdig committed
720
721
}

Matthias Braun's avatar
Matthias Braun committed
722
static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp,
723
724
                            ir_node *mem, ir_entity *ent,
                            x86_insn_size_t const size)
725
{
726
727
728
729
730
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *noreg = ia32_new_NoReg_gp(irg);
	ir_node  *frame = get_irg_frame(irg);
731

732
	ir_node *const push = new_bd_ia32_Push(dbgi, block, frame, noreg, mem,
733
	                                       noreg, sp, size);
734
	ia32_attr_t *const attr = get_ia32_attr(push);
735
736
737
738
739
740
	attr->addr = (x86_addr_t) {
		.immediate = (x86_imm32_t) {
			.kind   = X86_IMM_FRAMEENT,
			.entity = ent,
		},
		.variant = X86_ADDR_BASE,
741
742
	};
	set_ia32_frame_use(push, IA32_FRAME_USE_AUTO);
743
	set_ia32_op_type(push, ia32_AddrModeS);
744
	set_ia32_is_spill(push);
745
746
747
748
749

	sched_add_before(schedpoint, push);
	return push;
}

Matthias Braun's avatar
Matthias Braun committed
750
static ir_node *create_pop(ir_node *node, ir_node *schedpoint, ir_node *sp,
751
                           ir_entity *ent, x86_insn_size_t size)
752
{
753
	dbg_info *dbgi  = get_irn_dbg_info(node);
754
755
756
757
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *noreg = ia32_new_NoReg_gp(irg);
	ir_node  *frame = get_irg_frame(irg);
Matthias Braun's avatar
Matthias Braun committed
758
	ir_node  *pop   = new_bd_ia32_PopMem(dbgi, block, frame, noreg,
759
	                                     get_irg_no_mem(irg), sp, size);
760
	ia32_attr_t *const attr = get_ia32_attr(pop);
761
762
763
764
765
766
	attr->addr = (x86_addr_t) {
		.immediate = (x86_imm32_t) {
			.kind   = X86_IMM_FRAMEENT,
			.entity = ent,
		},
		.variant = X86_ADDR_BASE,
767
768
	};
	set_ia32_frame_use(pop, IA32_FRAME_USE_AUTO);
769
	set_ia32_op_type(pop, ia32_AddrModeD);
770
	set_ia32_is_reload(pop);
771
772
773
774
	sched_add_before(schedpoint, pop);
	return pop;
}

775
static ir_node *create_spproj(ir_node *const pred, unsigned const pos)
776
{
777
	return be_new_Proj_reg(pred, pos, &ia32_registers[REG_ESP]);
778
779
}

780
/**
781
 * Transform MemPerm, currently we do this the ugly way and produce
782
783
784
 * push/pop into/from memory cascades. This is possible without using
 * any registers.
 */
785
static void transform_MemPerm(ir_node *node)
786
{
787
	ir_graph *irg   = get_irn_irg(node);
Christoph Mallon's avatar
Christoph Mallon committed
788
	ir_node  *sp    = be_get_Start_proj(irg, &ia32_registers[REG_ESP]);
789
790
	int       arity = be_get_MemPerm_entity_arity(node);
	ir_node **pops  = ALLOCAN(ir_node*, arity);
791

792
	/* create Pushs */
Matthias Braun's avatar
Matthias Braun committed
793
	for (int i = 0; i < arity; ++i) {
794
795
		ir_entity *inent = be_get_MemPerm_in_entity(node, i);
		ir_entity *outent = be_get_MemPerm_out_entity(node, i);
796
797
798
799
		assert(inent->kind == IR_ENTITY_SPILLSLOT);
		assert(outent->kind == IR_ENTITY_SPILLSLOT);
		unsigned entsize = inent->attr.spillslot.size;
		unsigned entsize2 = outent->attr.spillslot.size;
800
		ir_node *mem = get_irn_n(node, i);
801

802
		/* work around cases where entities have different sizes */
803
		if (entsize2 < entsize)
804
			entsize = entsize2;
805
806
807

		int offset = 0;
		do {
808
			x86_insn_size_t size;
809
			if (entsize%2 == 1) {
810
				size = X86_SIZE_8;
811
			} else if (entsize % 4 == 2) {
812
				size = X86_SIZE_16;
813
814
			} else {
				assert(entsize%4 == 0);
815
				size = X86_SIZE_32;
Matthias Braun's avatar
Matthias Braun committed
816
			}
817

818
			ir_node *push = create_push(node, node, sp, mem, inent, size);
819
			sp = create_spproj(push, pn_ia32_Push_stack);
820
821
			ia32_attr_t *const attr = get_ia32_attr(push);
			attr->addr.immediate.offset = offset;
822

823
824
825
			unsigned size_bytes = x86_bytes_from_size(size);
			offset  += size_bytes;
			entsize -= size_bytes;
826
		} while(entsize > 0);
Matthias Braun's avatar
Matthias Braun committed
827
		set_irn_n(node, i, new_r_Bad(irg, mode_X));
828
829
	}

830
	/* create pops */
Matthias Braun's avatar
Matthias Braun committed
831
	for (int i = arity; i-- > 0; ) {
832
833
		ir_entity *inent = be_get_MemPerm_in_entity(node, i);
		ir_entity *outent = be_get_MemPerm_out_entity(node, i);
834
835
836
837
		assert(inent->kind == IR_ENTITY_SPILLSLOT);
		assert(outent->kind == IR_ENTITY_SPILLSLOT);
		unsigned entsize = outent->attr.spillslot.size;
		unsigned entsize2 = inent->attr.spillslot.size;
838

839
		/* work around cases where entities have different sizes */
840
		if (entsize2 < entsize)
841
			entsize = entsize2;
842

843
844
845
		int      offset = entsize;
		ir_node *pop;
		do {
846
			x86_insn_size_t size;
847
			if (entsize%2 == 1) {
848
				size = X86_SIZE_8;
849
			} else if (entsize%4 == 2) {
850
				size = X86_SIZE_16;
851
852
			} else {
				assert(entsize%4 == 0);
853
				size = X86_SIZE_32;
854
			}
855
			pop = create_pop(node, node, sp, outent, size);