ia32_bearch.c 51 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1 2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4 5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7 8 9
 * @file
 * @brief       This is the main ia32 firm backend driver.
 * @author      Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11 12
#include "ia32_bearch_t.h"

13
#include "be_t.h"
Matthias Braun's avatar
Matthias Braun committed
14 15 16
#include "beflags.h"
#include "begnuas.h"
#include "bemodule.h"
17
#include "bera.h"
Matthias Braun's avatar
Matthias Braun committed
18 19 20
#include "besched.h"
#include "bespillslots.h"
#include "bestack.h"
21
#include "betranshlp.h"
Matthias Braun's avatar
Matthias Braun committed
22
#include "beutil.h"
23
#include "bevarargs.h"
24 25 26
#include "gen_ia32_regalloc_if.h"
#include "ia32_architecture.h"
#include "ia32_emitter.h"
27
#include "ia32_encode.h"
28 29 30
#include "ia32_new_nodes.h"
#include "ia32_optimize.h"
#include "ia32_transform.h"
31
#include "ident_t.h"
Matthias Braun's avatar
Matthias Braun committed
32
#include "instrument.h"
33
#include "ircons.h"
34
#include "iredges_t.h"
Christian Würdig's avatar
Christian Würdig committed
35
#include "irgmod.h"
Christian Würdig's avatar
Christian Würdig committed
36
#include "irgopt.h"
Matthias Braun's avatar
Matthias Braun committed
37
#include "irgwalk.h"
38
#include "iropt_t.h"
Matthias Braun's avatar
Matthias Braun committed
39 40
#include "irtools.h"
#include "lc_opts_enum.h"
41
#include "lower_alloc.h"
Matthias Braun's avatar
Matthias Braun committed
42
#include "lower_builtins.h"
43
#include "lower_calls.h"
44
#include "lower_mode_b.h"
45
#include "lower_softfloat.h"
46
#include "lowering.h"
Matthias Braun's avatar
Matthias Braun committed
47
#include "panic.h"
48
#include "x86_x87.h"
49

50 51
pmap *ia32_tv_ent; /**< A map of entities that store const tarvals */

52
ir_mode *ia32_mode_fpcw;
53 54 55 56
ir_mode *ia32_mode_flags;
ir_mode *ia32_mode_gp;
ir_mode *ia32_mode_float64;
ir_mode *ia32_mode_float32;
57

58
static bool return_small_struct_in_regs;
59

60
typedef ir_node *(*create_const_node_func) (dbg_info *dbgi, ir_node *block);
61

62
/**
63
 * Used to create per-graph unique pseudo nodes.
64
 */
65
static inline ir_node *create_const(ir_graph *irg, ir_node **place,
66
                                    create_const_node_func func,
67
                                    const arch_register_t* reg)
68
{
69
	if (*place != NULL)
70 71
		return *place;

Matthias Braun's avatar
Matthias Braun committed
72 73
	ir_node *block = get_irg_start_block(irg);
	ir_node *res   = func(NULL, block);
74
	arch_set_irn_register(res, reg);
75
	*place = res;
76 77 78
	/* We need a keep edge on our cached nodes, so that following firm
	 * irgwalks will not miss them. */
	keep_alive(res);
79 80 81
	return res;
}

82
ir_node *ia32_new_NoReg_gp(ir_graph *irg)
83
{
84 85
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
	return create_const(irg, &irg_data->noreg_gp, new_bd_ia32_NoReg_GP,
86
	                    &ia32_registers[REG_GP_NOREG]);
87 88
}

89
ir_node *ia32_new_NoReg_fp(ir_graph *irg)
90
{
91
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
92 93
	return create_const(irg, &irg_data->noreg_fp, new_bd_ia32_NoReg_FP,
	                    &ia32_registers[REG_FP_NOREG]);
94 95
}

96
ir_node *ia32_new_NoReg_xmm(ir_graph *irg)
97
{
98 99
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
	return create_const(irg, &irg_data->noreg_xmm, new_bd_ia32_NoReg_XMM,
100
	                    &ia32_registers[REG_XMM_NOREG]);
101 102
}

103
ir_node *ia32_new_Fpu_truncate(ir_graph *irg)
104
{
105 106
	ia32_irg_data_t *irg_data = ia32_get_irg_data(irg);
	return create_const(irg, &irg_data->fpu_trunc_mode, new_bd_ia32_ChangeCW,
107
                        &ia32_registers[REG_FPCW]);
108 109
}

110
/**
111
 * Returns the admissible noreg register node for input register pos of node irn.
112
 */
113
static ir_node *ia32_get_admissible_noreg(ir_node *irn, int pos)
114
{
115
	ir_graph                  *irg = get_irn_irg(irn);
116
	const arch_register_req_t *req = arch_get_irn_register_req_in(irn, pos);
Matthias Braun's avatar
Matthias Braun committed
117
	if (req->cls == &ia32_reg_classes[CLASS_ia32_gp])
118
		return ia32_new_NoReg_gp(irg);
Matthias Braun's avatar
Matthias Braun committed
119

120
	if (ia32_cg_config.use_sse2) {
121
		return ia32_new_NoReg_xmm(irg);
122
	} else {
123
		return ia32_new_NoReg_fp(irg);
124
	}
125 126
}

127
static void ia32_set_frame_entity(ir_node *node, ir_entity *entity,
128
                                  unsigned size, unsigned po2align)
129
{
130
	ia32_attr_t *const attr = get_ia32_attr(node);
131
	attr->addr.immediate = (x86_imm32_t) {
132
		.kind   = X86_IMM_FRAMEENT,
133
		.entity = entity,
134
		.offset = attr->addr.immediate.offset,
135 136
	};
	assert(get_ia32_frame_use(node) != IA32_FRAME_USE_NONE);
137

sebastian.buchwald1's avatar
sebastian.buchwald1 committed
138
	/* set ls_mode based on entity unless we explicitly requested
139
	 * a certain mode */
140
	if (get_ia32_frame_use(node) != IA32_FRAME_USE_AUTO
141
	 || is_ia32_Cmp(node) || is_ia32_Conv_I2I(node))
142
		return;
143 144
	(void)po2align;
	attr->size = size == 12 ? X86_SIZE_80 : x86_size_from_bytes(size);
145
	/** 8bit stores have a special register requirement, so we can't simply
146 147
	 * change size to 8bit here. The "hack" in ia32_collect_frame_entity_nodes()
	 * should take care that it never happens. */
148
	assert(!is_ia32_Store(node) || attr->size > X86_SIZE_8);
149 150
}

151
static bool node_has_sp_base(ir_node const *const node)
152
{
153 154 155 156
	assert(is_ia32_irn(node));
	arch_register_t const *const base_reg
		= arch_get_irn_register_in(node, n_ia32_base);
	return base_reg == &ia32_registers[REG_ESP];
157 158
}

159
static void ia32_determine_frameoffset(ir_node *node, int sp_offset)
160
{
161 162
	if (!is_ia32_irn(node))
		return;
163

164
	ia32_attr_t *const attr = get_ia32_attr(node);
165
	if (attr->addr.immediate.kind == X86_IMM_FRAMEENT) {
166
#ifndef NDEBUG
167
		attr->old_frame_ent = attr->addr.immediate.entity;
168
#endif
169 170 171 172
		attr->addr.immediate.offset
			+= get_entity_offset(attr->addr.immediate.entity);
		attr->addr.immediate.entity  = NULL;
		attr->addr.immediate.kind    = X86_IMM_FRAMEOFFSET;
Matthias Braun's avatar
Matthias Braun committed
173
	}
174

175
	if (attr->addr.immediate.kind == X86_IMM_FRAMEOFFSET) {
176
		if (node_has_sp_base(node))
177
			attr->addr.immediate.offset += sp_offset;
178 179 180 181 182
		else {
			assert(arch_get_irn_register_in(node, n_ia32_base)
			       == &ia32_registers[REG_EBP]);
			/* we calculate offsets relative to the SP value at function begin,
			 * but EBP points after the saved old frame pointer */
183
			attr->addr.immediate.offset += IA32_REGISTER_SIZE;
184
		}
185
		attr->addr.immediate.kind = X86_IMM_VALUE;
Matthias Braun's avatar
Matthias Braun committed
186
	}
187 188
}

189
static void ia32_sp_sim(ir_node *const node, stack_pointer_state_t *state)
190
{
191 192 193
	/* Pop nodes modify the stack pointer before calculating destination
	 * address, so do this first */
	if (is_ia32_Pop(node) || is_ia32_PopMem(node)) {
194 195
		ia32_attr_t const *const attr = get_ia32_attr_const(node);
		state->offset -= x86_bytes_from_size(attr->size);
196
	}
197

198 199
	if (!state->no_change)
		ia32_determine_frameoffset(node, state->offset);
200

201 202 203
	if (is_ia32_Call(node)) {
		state->offset -= get_ia32_call_attr_const(node)->pop;
	} else if (is_ia32_Push(node)) {
204 205
		ia32_attr_t const *const attr = get_ia32_attr_const(node);
		state->offset += x86_bytes_from_size(attr->size);
206 207 208 209 210
	} else if (is_ia32_Leave(node) || is_ia32_CopyEbpEsp(node)) {
		state->offset        = 0;
		state->align_padding = 0;
	} else if (is_ia32_SubSP(node)) {
		state->align_padding = 0;
Sebastian Hack's avatar
Sebastian Hack committed
211
	}
212 213
}

214
int ia32_get_sp_change(ir_node *const node)
215
{
216 217 218 219 220 221 222 223 224 225
	if (be_is_IncSP(node))
		return -be_get_IncSP_offset(node);
	stack_pointer_state_t state = {
		.offset    = 160,
		.no_change = true,
	};
	ia32_sp_sim(node, &state);
	int res = 160 - state.offset;
	assert(-16 <= res && res <= 16);
	return res;
Sebastian Hack's avatar
Sebastian Hack committed
226 227
}

228 229 230 231 232 233 234
/**
 * Get the estimated cycle count for @p irn.
 *
 * @param irn  The node.
 *
 * @return     The estimated cycle count for this operation
 */
235
static unsigned ia32_get_op_estimated_cost(ir_node const *const irn)
236
{
237 238 239
	if (!is_ia32_irn(irn))
		return 1;

240
	if (is_ia32_CopyB_i(irn)) {
241 242
		unsigned const size = get_ia32_copyb_size(irn);
		return 20 + size * 4 / 3;
Christian Würdig's avatar
Christian Würdig committed
243
	}
244 245 246

	unsigned cost = get_ia32_latency(irn);

Christian Würdig's avatar
Christian Würdig committed
247
	/* in case of address mode operations add additional cycles */
248
	if (get_ia32_op_type(irn) != ia32_Normal) {
249
		if (get_ia32_frame_use(irn) != IA32_FRAME_USE_NONE || (
250
		      is_ia32_NoReg_GP(get_irn_n(irn, n_ia32_base)) &&
251
		      is_ia32_NoReg_GP(get_irn_n(irn, n_ia32_index)))) {
252
			/* Stack access, assume it is cached. */
253 254
			cost += 5;
		} else {
255
			/* Access probably elsewhere. */
256 257
			cost += 20;
		}
Christian Würdig's avatar
Christian Würdig committed
258 259 260
	}

	return cost;
261 262
}

263
/**
Christoph Mallon's avatar
Christoph Mallon committed
264
 * Check if irn can load its operand at position i from memory (source addressmode).
265 266
 * @param irn    The irn to be checked
 * @param i      The operands position
267
 * @return whether operand can be loaded
268
 */
269
static bool ia32_possible_memory_operand(const ir_node *irn, unsigned int i)
270
{
271 272
	if (!is_ia32_irn(irn)                    || /* must be an ia32 irn */
	    get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */
273
	    get_ia32_frame_use(irn) != IA32_FRAME_USE_NONE) /* must not already use frame */
Matthias Braun's avatar
Matthias Braun committed
274
		return false;
275

276
	switch (get_ia32_am_support(irn)) {
Matthias Braun's avatar
cleanup  
Matthias Braun committed
277
	case ia32_am_none:
Matthias Braun's avatar
Matthias Braun committed
278
		return false;
Matthias Braun's avatar
cleanup  
Matthias Braun committed
279 280 281

	case ia32_am_unary:
		if (i != n_ia32_unary_op)
Matthias Braun's avatar
Matthias Braun committed
282
			return false;
Matthias Braun's avatar
cleanup  
Matthias Braun committed
283
		break;
284

Matthias Braun's avatar
cleanup  
Matthias Braun committed
285 286 287 288
	case ia32_am_binary:
		switch (i) {
		case n_ia32_binary_left: {
			if (!is_ia32_commutative(irn))
Matthias Braun's avatar
Matthias Braun committed
289
				return false;
290

Matthias Braun's avatar
cleanup  
Matthias Braun committed
291 292 293
			/* we can't swap left/right for limited registers
			 * (As this (currently) breaks constraint handling copies) */
			arch_register_req_t const *const req = arch_get_irn_register_req_in(irn, n_ia32_binary_left);
294
			if (req->limited != NULL)
Matthias Braun's avatar
Matthias Braun committed
295
				return false;
Matthias Braun's avatar
cleanup  
Matthias Braun committed
296 297
			break;
		}
298

Matthias Braun's avatar
cleanup  
Matthias Braun committed
299
		case n_ia32_binary_right:
300
			break;
301 302

		default:
Matthias Braun's avatar
Matthias Braun committed
303
			return false;
Matthias Braun's avatar
cleanup  
Matthias Braun committed
304 305 306 307
		}
		break;

	default:
308
		panic("unknown AM type");
309
	}
310 311

	/* HACK: must not already use "real" memory.
312 313
	 * This can happen for Call and Div. While we should be able to use Sync
	 * this currently confused the spillslot coalescing code. */
314
	if (!is_NoMem(get_irn_n(irn, n_ia32_mem)))
Matthias Braun's avatar
Matthias Braun committed
315
		return false;
316

317 318 319 320 321 322 323 324 325 326
	ir_node           *const op   = get_irn_n(irn, i);
	ir_node     const *const load = get_Proj_pred(op);
	ia32_attr_t const *const attr = get_ia32_attr_const(load);
	if (attr->size > X86_SIZE_64)
		return false;
	/* Don't do reload folding for x87 nodes for now, as we can't predict yet
	 * wether the spillslot must be widened to 80bit for which no AM operations
	 * exist. */
	if (is_ia32_fld(load))
		return false;
327

Matthias Braun's avatar
Matthias Braun committed
328
	return true;
329 330
}

331
static void ia32_perform_memory_operand(ir_node *irn, unsigned int i)
332
{
333 334
	if (!ia32_possible_memory_operand(irn, i))
		return;
335

336 337 338 339 340 341 342 343
	ir_node           *const op           = get_irn_n(irn, i);
	ir_node           *const load         = get_Proj_pred(op);
	x86_insn_size_t    const load_size    = get_ia32_attr_const(load)->size;
	ir_node           *const spill        = get_irn_n(load, n_ia32_mem);
	ia32_attr_t       *const attr         = get_ia32_attr(irn);
	x86_insn_size_t    const dest_op_size = attr->size;
	if (load_size <= dest_op_size)
		attr->size = load_size;
344
	set_ia32_op_type(irn, ia32_AddrModeS);
345
	set_ia32_frame_use(irn, IA32_FRAME_USE_AUTO);
346

347 348
	if (i == n_ia32_binary_left                    &&
	    get_ia32_am_support(irn) == ia32_am_binary &&
349 350
	    /* immediates are only allowed on the right side */
	    !is_ia32_Immediate(get_irn_n(irn, n_ia32_binary_right))) {
351
		ia32_swap_left_right(irn);
352
		i = n_ia32_binary_right;
353
	}
354

355 356
	assert(is_NoMem(get_irn_n(irn, n_ia32_mem)));

357 358
	set_irn_n(irn, n_ia32_base, get_irg_frame(get_irn_irg(irn)));
	set_irn_n(irn, n_ia32_mem,  spill);
359
	set_irn_n(irn, i,           ia32_get_admissible_noreg(irn, i));
360
	attr->addr.variant = X86_ADDR_BASE;
361
	set_ia32_is_reload(irn);
362 363 364 365 366 367 368

	/* kill the reload */
	assert(get_irn_n_edges(op) == 0);
	assert(get_irn_n_edges(load) == 1);
	sched_remove(load);
	kill_node(op);
	kill_node(load);
369 370
}

Matthias Braun's avatar
Matthias Braun committed
371
static bool gprof;
Michael Beck's avatar
Michael Beck committed
372

373 374 375 376 377
static ir_node *ia32_turn_back_dest_am(ir_node *node)
{
	typedef ir_node *construct_binop_func(
		dbg_info *db, ir_node *block,
		ir_node *base, ir_node *index, ir_node *mem,
378
		ir_node *op1, ir_node *op2, x86_insn_size_t size);
379

380 381
	x86_insn_size_t const size = get_ia32_attr_const(node)->size;
	bool is_8bit = size == X86_SIZE_8;
382

383
	construct_binop_func *func;
384
	switch (get_ia32_irn_opcode(node)) {
385 386 387 388 389
	case iro_ia32_AddMem: func = is_8bit ? new_bd_ia32_Add_8bit : new_bd_ia32_Add; break;
	case iro_ia32_AndMem: func = is_8bit ? new_bd_ia32_And_8bit : new_bd_ia32_And; break;
	case iro_ia32_OrMem:  func = is_8bit ? new_bd_ia32_Or_8bit  : new_bd_ia32_Or;  break;
	case iro_ia32_SubMem: func = is_8bit ? new_bd_ia32_Sub_8bit : new_bd_ia32_Sub; break;
	case iro_ia32_XorMem: func = is_8bit ? new_bd_ia32_Xor_8bit : new_bd_ia32_Xor; break;
390
	default: panic("cannot turn back DestAM for %+F", node);
391 392
	}

393 394 395 396 397
	dbg_info *const dbgi  = get_irn_dbg_info(node);
	ir_node  *const block = get_nodes_block(node);
	ir_node  *const base  = get_irn_n(node, n_ia32_base);
	ir_node  *const idx   = get_irn_n(node, n_ia32_index);
	ir_node  *const mem   = get_irn_n(node, n_ia32_mem);
398
	ir_node  *const load  = new_bd_ia32_Load(dbgi, block, base, idx, mem, size, false);
399 400 401
	ia32_copy_am_attrs(load, node);
	if (is_ia32_is_reload(node))
		set_ia32_is_reload(load);
402
	sched_add_before(node, load);
403 404
	ir_node *const load_res = be_new_Proj(load, pn_ia32_Load_res);
	ir_node *const load_mem = be_new_Proj(load, pn_ia32_Load_M);
405 406 407 408 409

	ir_graph *const irg      = get_irn_irg(node);
	ir_node  *const noreg    = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem    = get_irg_no_mem(irg);
	ir_node  *const operand  = get_irn_n(node, n_ia32_binary_left);
410
	ir_node  *const new_node = func(dbgi, block, noreg, noreg, nomem, load_res, operand, size);
411 412
	set_irn_mode(new_node, mode_T);

413
	arch_set_irn_register_out(new_node, pn_ia32_flags, &ia32_registers[REG_EFLAGS]);
414

415
	ir_node *const res_proj = be_new_Proj(new_node, pn_ia32_res);
416 417
	ir_node *const store    = is_8bit ? new_bd_ia32_Store_8bit(dbgi, block, base, idx, load_mem, res_proj, size)
	                                  : new_bd_ia32_Store(dbgi, block, base, idx, load_mem, res_proj, size);
418 419
	ia32_copy_am_attrs(store, node);
	set_ia32_op_type(store, ia32_AddrModeD);
420
	sched_add_after(node, store);
421

422 423 424
	ir_node *const mem_proj = get_Proj_for_pn(node, pn_ia32_M);
	set_Proj_pred(mem_proj, store);
	set_Proj_num(mem_proj, pn_ia32_Store_M);
425

426 427
	sched_replace(node, new_node);
	exchange(node, new_node);
428 429 430
	return new_node;
}

431
ir_node *ia32_turn_back_am(ir_node *node)
432
{
433
	ia32_attr_t *const attr = get_ia32_attr(node);
Matthias Braun's avatar
Matthias Braun committed
434 435 436 437 438 439
	dbg_info *dbgi     = get_irn_dbg_info(node);
	ir_graph *irg      = get_irn_irg(node);
	ir_node  *block    = get_nodes_block(node);
	ir_node  *base     = get_irn_n(node, n_ia32_base);
	ir_node  *idx      = get_irn_n(node, n_ia32_index);
	ir_node  *mem      = get_irn_n(node, n_ia32_mem);
440 441
	ir_node  *load     = new_bd_ia32_Load(dbgi, block, base, idx, mem,
	                                      attr->size, attr->sign_extend);
442
	ir_node  *load_res = be_new_Proj(load, pn_ia32_Load_res);
443 444

	ia32_copy_am_attrs(load, node);
445 446
	if (is_ia32_is_reload(node))
		set_ia32_is_reload(load);
447
	set_irn_n(node, n_ia32_mem, get_irg_no_mem(irg));
448

449
	switch (get_ia32_am_support(node)) {
Matthias Braun's avatar
cleanup  
Matthias Braun committed
450 451 452
	case ia32_am_unary:
		set_irn_n(node, n_ia32_unary_op, load_res);
		break;
453

Matthias Braun's avatar
cleanup  
Matthias Braun committed
454 455 456 457 458 459 460
	case ia32_am_binary:
		if (is_ia32_Immediate(get_irn_n(node, n_ia32_binary_right))) {
			set_irn_n(node, n_ia32_binary_left, load_res);
		} else {
			set_irn_n(node, n_ia32_binary_right, load_res);
		}
		break;
461

Matthias Braun's avatar
cleanup  
Matthias Braun committed
462
	default:
463
		panic("unknown AM type");
464
	}
Matthias Braun's avatar
Matthias Braun committed
465
	ir_node *noreg = ia32_new_NoReg_gp(irg);
466
	set_irn_n(node, n_ia32_base,  noreg);
467
	set_irn_n(node, n_ia32_index, noreg);
468
	attr->addr.immediate = (x86_imm32_t) { .kind = X86_IMM_VALUE, .offset = 0 };
469
	attr->addr.log_scale = 0;
470
	attr->frame_use      = IA32_FRAME_USE_NONE;
471 472

	/* rewire mem-proj */
Michael Beck's avatar
Michael Beck committed
473
	if (get_irn_mode(node) == mode_T) {
474 475
		foreach_out_edge(node, edge) {
			ir_node *out = get_edge_src_irn(edge);
476 477
			if (get_irn_mode(out) == mode_M) {
				set_Proj_pred(out, load);
478
				set_Proj_num(out, pn_ia32_Load_M);
479
				break;
480 481 482 483 484
			}
		}
	}

	set_ia32_op_type(node, ia32_Normal);
Michael Beck's avatar
Michael Beck committed
485
	if (sched_is_scheduled(node))
486
		sched_add_before(node, load);
487 488

	return load_res;
489 490 491 492
}

static ir_node *flags_remat(ir_node *node, ir_node *after)
{
493
	/* we should turn back address modes when rematerializing nodes */
Christoph Mallon's avatar
Christoph Mallon committed
494
	ir_node *const block = get_block(after);
495

Matthias Braun's avatar
Matthias Braun committed
496
	ia32_op_type_t type = get_ia32_op_type(node);
497
	switch (type) {
Matthias Braun's avatar
cleanup  
Matthias Braun committed
498 499 500
	case ia32_AddrModeS:
		ia32_turn_back_am(node);
		break;
501

Matthias Braun's avatar
cleanup  
Matthias Braun committed
502
	case ia32_AddrModeD:
503 504
		node = ia32_turn_back_dest_am(node);
		break;
505

Matthias Braun's avatar
Matthias Braun committed
506 507 508
	default:
		assert(type == ia32_Normal);
		break;
509 510
	}

Matthias Braun's avatar
Matthias Braun committed
511
	ir_node *copy = exact_copy(node);
512
	set_nodes_block(copy, block);
513 514 515 516
	sched_add_after(after, copy);
	return copy;
}

517 518
COMPILETIME_ASSERT((int)(n_ia32_Sub_minuend)    == (int)(n_ia32_Cmp_left) &&
                   (int)(n_ia32_Sub_subtrahend) == (int)(n_ia32_Cmp_right),
Matthias Braun's avatar
Matthias Braun committed
519
                   Cmp_and_Sub_operand_numbers_equal)
520

521
static bool ia32_try_replace_flags(ir_node *consumers, ir_node *flags, ir_node *available)
522
{
523 524
	if (!is_ia32_Sub(flags) && !is_ia32_Cmp(flags))
		return false;
525 526 527 528 529 530
	unsigned pn;
	if (is_ia32_Sub(available)) {
		pn = pn_ia32_Sub_flags;
	} else if (is_ia32_Cmp(available)) {
		pn = pn_ia32_Cmp_eflags;
	} else {
531
		return false;
532
	}
533 534 535 536 537 538 539 540 541 542 543
	/* Assuming CSE would have found the more obvious case */
	ir_node *const flags_left  = get_irn_n(flags,     n_ia32_binary_left);
	ir_node *const avail_right = get_irn_n(available, n_ia32_binary_right);
	if (flags_left != avail_right)
		return false;
	ir_node *const avail_left  = get_irn_n(available, n_ia32_binary_left);
	ir_node *const flags_right = get_irn_n(flags,     n_ia32_binary_right);
	if (avail_left != flags_right)
		return false;

	/* We can use available if we reverse the consumers' condition codes. */
544
	arch_set_irn_register_out(available, pn, &ia32_registers[REG_EFLAGS]);
545
	ir_node *const proj      = get_irn_mode(available) == mode_T ? be_get_or_make_Proj_for_pn(available, pn) : available;
546 547 548 549 550 551
	ir_mode *const flag_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
	for (ir_node *c = consumers; c != NULL; c = get_irn_link(c)) {
		x86_condition_code_t cc = get_ia32_condcode(c);
		set_ia32_condcode(c, x86_invert_condition_code(cc));

		foreach_irn_in(c, i, in) {
552
			if (get_irn_mode(in) == flag_mode)
553
				set_irn_n(c, i, proj);
554 555
		}
	}
556
	return true;
557 558
}

559
static void remat_simplifier(ir_node *node, void *env)
560 561 562
{
	(void)env;

563 564
	/* A Sub with unused result is a Cmp. */
	if (is_ia32_Sub(node) && get_irn_mode(node) == mode_T) {
565
		ir_node *projs[] = { [pn_ia32_Sub_M] = NULL };
566
		foreach_out_edge(node, out) {
567 568 569 570 571
			ir_node *const proj = get_edge_src_irn(out);
			unsigned const num  = get_Proj_num(proj);
			assert(num < ARRAY_SIZE(projs));
			assert(!projs[num] && "duplicate Proj");
			projs[num] = proj;
572
		}
573

574 575 576 577 578 579 580 581 582 583
		ir_node       *res_keep = NULL;
		ir_node *const sub_res  = projs[pn_ia32_Sub_res];
		if (sub_res) {
			foreach_out_edge(sub_res, out) {
				ir_node *const user = get_edge_src_irn(out);
				if (be_is_Keep(user)) {
					assert(!res_keep && "Proj has two be_Keep");
					res_keep = user;
				} else {
					return;
584
				}
585
			}
586
		}
587

588 589 590 591 592 593 594
		dbg_info *const dbgi    = get_irn_dbg_info(node);
		ir_node  *const block   = get_nodes_block(node);
		ir_node  *const base    = get_irn_n(node, n_ia32_Sub_base);
		ir_node  *const idx     = get_irn_n(node, n_ia32_Sub_index);
		ir_node  *const mem     = get_irn_n(node, n_ia32_Sub_mem);
		ir_node  *const minu    = get_irn_n(node, n_ia32_Sub_minuend);
		ir_node  *const subt    = get_irn_n(node, n_ia32_Sub_subtrahend);
595 596 597 598
		x86_insn_size_t const size = get_ia32_attr_const(node)->size;
		bool            is_8bit = size == X86_SIZE_8;
		ir_node        *cmp     = is_8bit ? new_bd_ia32_Cmp_8bit(dbgi, block, base, idx, mem, minu, subt, size, false)
		                                  : new_bd_ia32_Cmp(dbgi, block, base, idx, mem, minu, subt, size, false);
599 600 601 602 603 604 605 606 607 608 609 610 611
		arch_set_irn_register(cmp, &ia32_registers[REG_EFLAGS]);
		ia32_copy_am_attrs(cmp, node);

		sched_replace(node, cmp);

		if (get_ia32_op_type(node) == ia32_AddrModeS) {
			set_ia32_op_type(cmp, ia32_AddrModeS);
			set_irn_mode(cmp, mode_T);

			ir_node *const sub_mem = projs[pn_ia32_Sub_M];
			if (sub_mem) {
				ir_node *const proj_M = be_new_Proj(cmp, pn_ia32_Cmp_M);
				exchange(sub_mem, proj_M);
612 613
			}

614 615 616
			cmp = be_new_Proj(cmp, pn_ia32_Cmp_eflags);
		} else {
			assert(get_ia32_op_type(node) == ia32_Normal);
617 618
		}

619 620 621 622 623 624 625 626
		exchange(projs[pn_ia32_Sub_flags], cmp);

		if (res_keep) {
			sched_remove(res_keep);
			remove_keep_alive(res_keep);
			kill_node(res_keep);
		}
		kill_node(node);
627
	}
628 629 630 631
}

static void simplify_remat_nodes(ir_graph *irg)
{
632 633
	irg_walk_graph(irg, remat_simplifier, NULL, NULL);
	remove_End_Bads_and_doublets(get_irg_end(irg));
634 635
}

636
static ir_node *ia32_new_spill(ir_node *value, ir_node *after)
637
{
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
	ir_graph       *irg   = get_irn_irg(value);
	ir_node        *block = get_block(after);
	ir_node        *frame = get_irg_frame(irg);
	ir_node        *noreg = ia32_new_NoReg_gp(irg);
	ir_node        *nomem = get_irg_no_mem(irg);

	// FIXME: Find a way to not duplicate logic with ia32_new_reload()
	arch_register_req_t   const *const req  = arch_get_irn_register_req(value);
	arch_register_class_t const *const cls  = req->cls;
	ir_node               const *const skip = skip_Proj_const(value);
	ir_node         *res;
	ir_node         *store;
	if (cls == &ia32_reg_classes[CLASS_ia32_gp]) {
		x86_insn_size_t size = X86_SIZE_32;
		if (is_ia32_Load(skip))
			size = get_ia32_attr_const(skip)->size;

		store = size == X86_SIZE_8
			? new_bd_ia32_Store_8bit(NULL, block, frame, noreg, nomem, value, size)
			: new_bd_ia32_Store     (NULL, block, frame, noreg, nomem, value, size);
658
		res   = be_new_Proj(store, pn_ia32_Store_M);
659 660 661 662 663 664 665 666 667 668 669 670
	} else if (cls == &ia32_reg_classes[CLASS_ia32_fp]) {
		x86_insn_size_t size  = X86_SIZE_80;
		if (is_ia32_fld(skip))
			size = get_ia32_attr_const(skip)->size;
		store = new_bd_ia32_fst(NULL, block, frame, noreg, nomem, value, size);
		res   = be_new_Proj(store, pn_ia32_fst_M);
	} else {
		assert(cls == &ia32_reg_classes[CLASS_ia32_xmm]);
		// TODO: find out when we can use xStore and only store 64bit
		store = new_bd_ia32_xxStore(NULL, block, frame, noreg, nomem, value,
		                            X86_SIZE_128);
		res   = be_new_Proj(store, pn_ia32_xxStore_M);
Christian Würdig's avatar
Christian Würdig committed
671
	}
672

673 674
	ia32_attr_t *const attr = get_ia32_attr(store);
	attr->addr.variant = X86_ADDR_BASE;
675
	set_ia32_op_type(store, ia32_AddrModeD);
676
	set_ia32_frame_use(store, IA32_FRAME_USE_AUTO);
677
	set_ia32_is_spill(store);
678
	sched_add_after(after, store);
Christian Würdig's avatar
Christian Würdig committed
679

680 681 682 683 684
	return res;
}

static ir_node *ia32_new_reload(ir_node *value, ir_node *spill, ir_node *before)
{
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
	ir_graph *const irg   = get_irn_irg(before);
	ir_node  *const block = get_block(before);
	ir_node  *const noreg = ia32_new_NoReg_gp(irg);
	ir_node  *const frame = get_irg_frame(irg);

	// FIXME: Find a way to not duplicate logic with ia32_new_spill()
	arch_register_req_t   const *const req  = arch_get_irn_register_req(value);
	arch_register_class_t const *const cls  = req->cls;
	ir_node               const *const skip = skip_Proj_const(value);
	ir_node        *load;
	if (cls == &ia32_reg_classes[CLASS_ia32_gp]) {
		x86_insn_size_t size = X86_SIZE_32;
		if (is_ia32_Load(skip))
			size = get_ia32_attr_const(skip)->size;
		load = new_bd_ia32_Load(NULL, block, frame, noreg, spill, size, false);
	} else if (cls == &ia32_reg_classes[CLASS_ia32_fp]) {
		x86_insn_size_t size = X86_SIZE_80;
		if (is_ia32_fld(skip))
			size = get_ia32_attr_const(skip)->size;
		load = new_bd_ia32_fld(NULL, block, frame, noreg, spill, size);
705
	} else {
706 707 708 709 710
		assert(cls == &ia32_reg_classes[CLASS_ia32_xmm]);
		// TODO: find out when we can use xLoad and only load 64bit
		/* Reload 128 bit SSE registers */
		load = new_bd_ia32_xxLoad(NULL, block, frame, noreg, spill,
		                          X86_SIZE_128);
711
	}
712 713
	ia32_attr_t *const attr = get_ia32_attr(load);
	attr->addr.variant = X86_ADDR_BASE;
714
	set_ia32_op_type(load, ia32_AddrModeS);
715
	set_ia32_frame_use(load, IA32_FRAME_USE_AUTO);
716 717 718 719
	set_ia32_is_reload(load);
	arch_add_irn_flags(load, arch_irn_flag_reload);
	sched_add_before(before, load);

720
	return be_new_Proj(load, pn_ia32_res);
Christian Würdig's avatar
Christian Würdig committed
721 722
}

Matthias Braun's avatar
Matthias Braun committed
723
static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp,
724 725
                            ir_node *mem, ir_entity *ent,
                            x86_insn_size_t const size)
726
{
727 728 729 730 731
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *noreg = ia32_new_NoReg_gp(irg);
	ir_node  *frame = get_irg_frame(irg);
732

733
	ir_node *const push = new_bd_ia32_Push(dbgi, block, frame, noreg, mem,
734
	                                       noreg, sp, size);
735
	ia32_attr_t *const attr = get_ia32_attr(push);
736 737 738 739 740 741
	attr->addr = (x86_addr_t) {
		.immediate = (x86_imm32_t) {
			.kind   = X86_IMM_FRAMEENT,
			.entity = ent,
		},
		.variant = X86_ADDR_BASE,
742 743
	};
	set_ia32_frame_use(push, IA32_FRAME_USE_AUTO);
744
	set_ia32_op_type(push, ia32_AddrModeS);
745
	set_ia32_is_spill(push);
746 747 748 749 750

	sched_add_before(schedpoint, push);
	return push;
}

Matthias Braun's avatar
Matthias Braun committed
751
static ir_node *create_pop(ir_node *node, ir_node *schedpoint, ir_node *sp,
752
                           ir_entity *ent, x86_insn_size_t size)
753
{
754
	dbg_info *dbgi  = get_irn_dbg_info(node);
755 756 757 758
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *noreg = ia32_new_NoReg_gp(irg);
	ir_node  *frame = get_irg_frame(irg);
Matthias Braun's avatar
Matthias Braun committed
759
	ir_node  *pop   = new_bd_ia32_PopMem(dbgi, block, frame, noreg,
760
	                                     get_irg_no_mem(irg), sp, size);
761
	ia32_attr_t *const attr = get_ia32_attr(pop);