amd64_bearch.c 26 KB
Newer Older
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
4
5
6
7
8
9
 */

/**
 * @file
 * @brief    The main amd64 backend driver file.
 */
10
#include "amd64_bearch_t.h"
11
12
13
#include "amd64_emitter.h"
#include "amd64_finish.h"
#include "amd64_new_nodes.h"
Christoph Mallon's avatar
Christoph Mallon committed
14
#include "amd64_optimize.h"
15
#include "amd64_transform.h"
16
#include "amd64_varargs.h"
Matthias Braun's avatar
Matthias Braun committed
17
18
19
#include "beflags.h"
#include "beirg.h"
#include "bemodule.h"
20
#include "bera.h"
Matthias Braun's avatar
Matthias Braun committed
21
#include "besched.h"
22
23
#include "bespillslots.h"
#include "bestack.h"
24
#include "beutil.h"
25
#include "debug.h"
26
#include "gen_amd64_regalloc_if.h"
27
#include "irarch_t.h"
28
#include "ircons.h"
29
#include "iredges_t.h"
Matthias Braun's avatar
Matthias Braun committed
30
#include "irgmod.h"
31
#include "irgopt.h"
Matthias Braun's avatar
Matthias Braun committed
32
33
#include "irgwalk.h"
#include "iropt_t.h"
34
#include "irtools.h"
Matthias Braun's avatar
Matthias Braun committed
35
#include "lower_alloc.h"
Matthias Braun's avatar
Matthias Braun committed
36
37
#include "lower_builtins.h"
#include "lower_calls.h"
Tobias Rapp's avatar
Tobias Rapp committed
38
#include "lower_mode_b.h"
39
40
#include "lowering.h"
#include "panic.h"
41
42
43

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

44
45
pmap *amd64_constants;

46
ir_mode *amd64_mode_xmm;
47

48
static int get_insn_size_bytes(amd64_insn_size_t size)
49
{
50
51
52
53
54
55
56
	switch (size) {
	case INSN_SIZE_8:       return 1;
	case INSN_SIZE_16:      return 2;
	case INSN_SIZE_32:      return 4;
	case INSN_SIZE_64:      return 8;
	case INSN_SIZE_128:     return 16;
	case INSN_SIZE_80:      break;
57
58
59
60
61
	}
	panic("bad insn mode");
}

static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp,
62
                            ir_node *mem, ir_entity *ent,
63
                            amd64_insn_size_t size)
64
65
66
67
68
69
{
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *frame = get_irg_frame(irg);

70
71
	amd64_addr_t addr = {
		.immediate = {
72
			.kind   = X86_IMM_FRAMEENT,
73
74
			.entity = ent,
		},
75
76
		.variant    = X86_ADDR_BASE,
		.base_input = 1,
77
	};
78
	ir_node *in[] = { sp, frame, mem };
79
	ir_node *const push = new_bd_amd64_push_am(dbgi, block, ARRAY_SIZE(in), in, rsp_reg_mem_reqs, size, addr);
80
81
82
83
	sched_add_before(schedpoint, push);
	return push;
}

84
static ir_node *create_pop(ir_node *node, ir_node *schedpoint, ir_node *sp,
85
                           ir_entity *ent, amd64_insn_size_t size)
86
87
88
89
90
91
{
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *frame = get_irg_frame(irg);

92
93
	amd64_addr_t addr = {
		.immediate = {
94
			.kind   = X86_IMM_FRAMEENT,
95
96
			.entity = ent,
		},
97
		.variant     = X86_ADDR_BASE,
98
99
		.base_input  = 1,
	};
100
101
	ir_node *in[] = { sp, frame, get_irg_no_mem(irg) };

102
	ir_node *const pop = new_bd_amd64_pop_am(dbgi, block, ARRAY_SIZE(in), in, rsp_reg_mem_reqs, size, addr);
103
104
105
106
107
108
109
	sched_add_before(schedpoint, pop);

	return pop;
}

static ir_node* create_spproj(ir_node *pred, int pos)
{
110
	return be_new_Proj_reg(pred, pos, &amd64_registers[REG_RSP]);
111
112
113
114
115
116
117
118
119
120
}

/**
 * Transform MemPerm, currently we do this the ugly way and produce
 * push/pop into/from memory cascades. This is possible without using
 * any registers.
 */
static void transform_MemPerm(ir_node *node)
{
	ir_graph *irg   = get_irn_irg(node);
Christoph Mallon's avatar
Christoph Mallon committed
121
	ir_node  *sp    = be_get_Start_proj(irg, &amd64_registers[REG_RSP]);
122
123
124
125
	int       arity = be_get_MemPerm_entity_arity(node);
	ir_node **pops  = ALLOCAN(ir_node*, arity);

	/* create Pushs */
126
	for (int i = 0; i < arity; ++i) {
127
128
		ir_entity *inent = be_get_MemPerm_in_entity(node, i);
		ir_entity *outent = be_get_MemPerm_out_entity(node, i);
129
130
131
132
		assert(inent->kind == IR_ENTITY_SPILLSLOT);
		assert(outent->kind == IR_ENTITY_SPILLSLOT);
		unsigned entsize = inent->attr.spillslot.size;
		unsigned entsize2 = outent->attr.spillslot.size;
133
		ir_node *mem = get_irn_n(node, i);
134
135
136
137
138

		/* work around cases where entities have different sizes */
		if (entsize2 < entsize)
			entsize = entsize2;

139
140
		int offset = 0;
		do {
141
			amd64_insn_size_t size;
142
			if (entsize%2 == 1) {
143
				size = INSN_SIZE_8;
144
			} else if (entsize % 4 == 2) {
145
				size = INSN_SIZE_16;
146
			} else if (entsize % 8 == 4) {
147
				size = INSN_SIZE_32;
148
149
			} else {
				assert(entsize%8 == 0);
150
				size = INSN_SIZE_64;
151
152
			}

153
			ir_node *push = create_push(node, node, sp, mem, inent, size);
154
			sp = create_spproj(push, pn_amd64_push_am_stack);
155
156
			get_amd64_addr_attr(push)->addr.immediate.offset = offset;

157
158
159
			unsigned bytes = get_insn_size_bytes(size);
			offset  += bytes;
			entsize -= bytes;
160
		} while(entsize > 0);
161
162
163
164
		set_irn_n(node, i, new_r_Bad(irg, mode_X));
	}

	/* create pops */
165
	for (int i = arity; i-- > 0; ) {
166
167
		ir_entity *inent = be_get_MemPerm_in_entity(node, i);
		ir_entity *outent = be_get_MemPerm_out_entity(node, i);
168
169
170
171
		assert(inent->kind == IR_ENTITY_SPILLSLOT);
		assert(outent->kind == IR_ENTITY_SPILLSLOT);
		unsigned entsize = outent->attr.spillslot.size;
		unsigned entsize2 = inent->attr.spillslot.size;
172
173
174
175
176

		/* work around cases where entities have different sizes */
		if (entsize2 < entsize)
			entsize = entsize2;

177
178
179
		int      offset = entsize;
		ir_node *pop;
		do {
180
			amd64_insn_size_t size;
181
			if (entsize%2 == 1) {
182
				size = INSN_SIZE_8;
183
			} else if (entsize % 4 == 2) {
184
				size = INSN_SIZE_16;
185
			} else if (entsize % 8 == 4) {
186
				size = INSN_SIZE_32;
187
188
			} else {
				assert(entsize%8 == 0);
189
				size = INSN_SIZE_64;
190
191
			}

192
			pop = create_pop(node, node, sp, outent, size);
193
			sp  = create_spproj(pop, pn_amd64_pop_am_stack);
194

195
196
197
			unsigned bytes = get_insn_size_bytes(size);
			offset  -= bytes;
			entsize -= bytes;
198
199
			get_amd64_addr_attr(pop)->addr.immediate.offset = offset;
		} while(entsize > 0);
200
201
202
		pops[i] = pop;
	}

203
	ir_node *const keep = be_new_Keep_one(sp);
204
205
206
207
208
	sched_replace(node, keep);

	/* exchange memprojs */
	foreach_out_edge_safe(node, edge) {
		ir_node *proj = get_edge_src_irn(edge);
209
		int p = get_Proj_num(proj);
210
211
212
213

		assert(p < arity);

		set_Proj_pred(proj, pops[p]);
214
		set_Proj_num(proj, pn_amd64_pop_am_M);
215
216
217
218
219
220
	}

	/* remove memperm */
	kill_node(node);
}

221
222
223
224
static void amd64_after_ra_walker(ir_node *block, void *data)
{
	(void) data;

225
	sched_foreach_reverse_safe(block, node) {
Matthias Braun's avatar
Matthias Braun committed
226
		if (be_is_MemPerm(node)) {
227
			transform_MemPerm(node);
228
229
230
231
		}
	}
}

232
233
/**
 * rewrite unsigned long -> float/double conversion
234
235
236
237
238
239
240
241
242
243
244
245
 * x86_64 only has a signed conversion so we do some crazy sse construction
 * instead (first seen this pattern in llvm): We split the 64bit value into
 * 2 32bit vals and place them into the mantissa parts of apropriately choosen
 * float values and later add the 2 floats together. In pseudo code:
 *
 * a = (vector unsigned long, unsigned long) x;
 * b = (vector unsigned, unsigned, unsigned, unsigned)
 *       upper half of 0x1p+52, upper half of 0x1p+84, 0, 0
 * c = repack (a[0], b[0], a[1], b[1])
 * d = (vector double) 0x1p+52, 0x1p+84
 * e = c - d
 * f = e[0] + e[1]
246
247
248
 */
static void rewrite_unsigned_float_Conv(ir_node *node)
{
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
	ir_graph  *irg    = get_irn_irg(node);
	dbg_info  *dbgi   = get_irn_dbg_info(node);
	ir_node   *block  = get_nodes_block(node);
	ir_node   *in     = get_Conv_op(node);
	ir_node   *in_xmm = new_r_Conv(block, in, amd64_mode_xmm);
	ir_tarval *magic0
		= new_integer_tarval_from_str("4530000043300000", 16, 0, 16,
		                              amd64_mode_xmm);
	ir_node   *const0 = new_r_Const(irg, magic0);
	collect_new_start_block_node(const0);
	ir_node   *punpck = new_bd_amd64_l_punpckldq(dbgi, block, in_xmm, const0);
	ir_tarval *magic1
		= new_integer_tarval_from_str("45300000000000004330000000000000", 32,
		                              0, 16, amd64_mode_xmm);
	ir_node   *const1 = new_r_Const(irg, magic1);
	collect_new_start_block_node(const1);
	ir_node   *subpd  = new_bd_amd64_l_subpd(dbgi, block, punpck, const1);
	ir_node   *haddpd = new_bd_amd64_l_haddpd(dbgi, block, subpd, subpd);
	ir_mode   *mode   = get_irn_mode(node);
	ir_node   *conv   = new_r_Conv(block, haddpd, mode);
	exchange(node, conv);
270
271
}

272
273
274
275
276
/* Creates a 64-bit constant with only the sign bit set,
 * i.e. returns 0x8000000000000000
 */
static ir_node *create_sign_bit_const(ir_graph *irg)
{
277
	ir_tarval *sign_tv = create_sign_tv(mode_Ls);
278
279
280
281
	return new_r_Const(irg, sign_tv);
}

/* rewrite float/double -> unsigned long conversion
282
283
284
285
286
287
288
289
 * x86_64 only has a signed conversion so we rewrite to the following:
 *
 * if (x >= 9223372036854775808.) {
 *   converted ^= (int)(x-9223372036854775808.) ^ 0x8000000000000000;
 * } else {
 *   converted = (int)x;
 * }
 * return (unsigned)converted;
290
291
292
293
294
295
296
297
298
299
 */
static void rewrite_float_unsigned_Conv(ir_node *node)
{
	ir_graph *irg        = get_irn_irg(node);
	dbg_info *dbgi       = get_irn_dbg_info(node);
	ir_node *lower_block = get_nodes_block(node);
	ir_mode *dest_mode   = get_irn_mode(node);

	part_block(node);

300
301
302
303
304
305
	ir_node   *block    = get_nodes_block(node);
	ir_node   *fp_x     = get_Conv_op(node);
	ir_mode   *src_mode = get_irn_mode(fp_x);
	double     d_const  = 9223372036854775808.;
	ir_tarval *tv       = new_tarval_from_double(d_const, src_mode);
	ir_node   *fp_const = new_r_Const(irg, tv);
306
	collect_new_start_block_node(fp_const);
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322

	/* Test if the sign bit is needed */
	ir_node *cmp         = new_rd_Cmp(dbgi, block, fp_x, fp_const,
	                                 ir_relation_greater_equal);
	ir_node *cond        = new_rd_Cond(dbgi, block, cmp);
	ir_node *proj_true   = new_r_Proj(cond, mode_X, pn_Cond_true);
	ir_node *proj_false  = new_r_Proj(cond, mode_X, pn_Cond_false);
	ir_node *in_true[1]  = { proj_true };
	ir_node *in_false[1] = { proj_false };

	/* true block: Do some arithmetic to use the signed conversion */
	ir_node *true_block  = new_r_Block(irg, ARRAY_SIZE(in_true), in_true);
	ir_node *true_jmp    = new_r_Jmp(true_block);
	ir_node *sub         = new_r_Sub(true_block, fp_const, fp_x, src_mode);
	ir_node *sub_conv    = new_rd_Conv(dbgi, true_block, sub, mode_Ls);
	ir_node *sign_bit    = create_sign_bit_const(irg);
323
	collect_new_start_block_node(sign_bit);
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
	ir_node *xor         = new_r_Eor(true_block, sub_conv, sign_bit, mode_Ls);
	ir_node *true_res    = new_rd_Conv(dbgi, true_block, xor, dest_mode);

	/* false block: Simply convert */
	ir_node *false_block  = new_r_Block(irg, ARRAY_SIZE(in_false), in_false);
	ir_node *false_jmp    = new_r_Jmp(false_block);
	ir_node *false_signed = new_rd_Conv(dbgi, false_block, fp_x, mode_Ls);
	ir_node *false_res    = new_rd_Conv(dbgi, false_block, false_signed,
	                                    dest_mode);

	/* lower block */
	ir_node *lower_in[2] = { true_jmp, false_jmp };
	ir_node *phi_in[2]   = { true_res, false_res };

	set_irn_in(lower_block, ARRAY_SIZE(lower_in), lower_in);
	ir_node *phi = new_r_Phi(lower_block, ARRAY_SIZE(phi_in), phi_in,
	                         dest_mode);
341
	collect_new_phi_node(phi);
342
343
344
	exchange(node, phi);
}

345
346
347
348
349
350
351
352
353
static bool amd64_rewrite_Conv(ir_node *node)
{
	ir_mode *to_mode    = get_irn_mode(node);
	ir_node *op         = get_Conv_op(node);
	ir_mode *from_mode  = get_irn_mode(op);
	bool     to_float   = mode_is_float(to_mode);
	bool     from_float = mode_is_float(from_mode);

	if (to_float && !from_float && !mode_is_signed(from_mode)
354
355
	    && get_mode_size_bits(from_mode) == 64
	    && to_mode != x86_mode_E) {
356
357
358
		rewrite_unsigned_float_Conv(node);
		return true;
	} else if (from_float && !to_float && !mode_is_signed(to_mode)
359
360
	           && get_mode_size_bits(to_mode) == 64
	           && from_mode != x86_mode_E) {
361
		rewrite_float_unsigned_Conv(node);
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
		return true;
	}

	return false;
}

static void amd64_intrinsics_walker(ir_node *node, void *data)
{
	bool *changed = (bool*)data;
	if (is_Conv(node)) {
		if (amd64_rewrite_Conv(node))
			*changed = true;
	}
}

static void amd64_handle_intrinsics(ir_graph *irg)
{
	ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
380
	collect_phiprojs_and_start_block_nodes(irg);
381
382
383
384
385
386
387
388
389
390
391
392
393
	bool changed = false;
	irg_walk_graph(irg, amd64_intrinsics_walker, NULL, &changed);
	ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);

	if (changed) {
		confirm_irg_properties(irg,
		        IR_GRAPH_PROPERTY_NO_BADS
		        | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
		        | IR_GRAPH_PROPERTY_MANY_RETURNS
		        | IR_GRAPH_PROPERTY_ONE_RETURN);
	}
}

394
395
static void amd64_set_frame_entity(ir_node *node, ir_entity *entity,
                                   const ir_type *type)
396
{
397
	(void)type;
Matthias Braun's avatar
Matthias Braun committed
398
399
	amd64_addr_attr_t *attr = get_amd64_addr_attr(node);
	attr->addr.immediate.entity = entity;
400
401
}

402
static ir_type *get_type_for_insn_size(amd64_insn_size_t const size)
403
404
{
	/* TODO: do not hardcode node names here */
405
406
407
	switch (size) {
	case INSN_SIZE_128: return get_type_for_mode(amd64_mode_xmm);
	case INSN_SIZE_80:  return x86_type_E;
408
409
410
411
	default:            return get_type_for_mode(mode_Lu);
	}
}

412
413
414
415
416
/**
 * Collects nodes that need frame entities assigned.
 */
static void amd64_collect_frame_entity_nodes(ir_node *node, void *data)
{
417
418
	if (!is_amd64_irn(node))
		return;
419
420
421
422

	/* Disable coalescing for "returns twice" calls: In case of setjmp/longjmp
	 * our control flow graph isn't completely correct: There are no backedges
	 * from longjmp to the setjmp => coalescing would produce wrong results. */
423
	be_fec_env_t *const env = (be_fec_env_t*)data;
424
425
426
427
428
429
430
431
432
	if (is_amd64_call(node)) {
		const amd64_call_addr_attr_t    *attrs = get_amd64_call_addr_attr_const(node);
		const ir_type                   *type  = attrs->call_tp;
		const mtp_additional_properties  mtp
			= get_method_additional_properties(type);
		if (mtp & mtp_property_returns_twice)
			be_forbid_coalescing(env);
	}

433
	/* we are only interested to report Load nodes */
434
	if (!amd64_loads(node))
Matthias Braun's avatar
Matthias Braun committed
435
436
		return;

Matthias Braun's avatar
Matthias Braun committed
437
	const amd64_addr_attr_t *attr = get_amd64_addr_attr_const(node);
438
	x86_imm32_t       const *imm  = &attr->addr.immediate;
439
	if (imm->kind == X86_IMM_FRAMEENT && imm->entity == NULL) {
440
		const ir_type *type = get_type_for_insn_size(attr->size);
441
		be_load_needs_frame_entity(env, node, type);
442
443
444
	}
}

445
446
static int determine_rbp_input(ir_node *ret)
{
447
	arch_register_t const *const bp = &amd64_registers[REG_RBP];
448
449
450
451
	foreach_irn_in(ret, i, input) {
		if (arch_get_irn_register(input) == bp)
			return i;
	}
sebastian.buchwald1's avatar
sebastian.buchwald1 committed
452
	panic("no rbp input found at %+F", ret);
453
454
}

455
456
457
458
459
/**
 * prepare graph and perform code selection.
 */
static void amd64_select_instructions(ir_graph *irg)
{
460
461
	amd64_adjust_pic(irg);

462
463
464
465
466
	be_timer_push(T_CODEGEN);
	amd64_transform_graph(irg);
	be_timer_pop(T_CODEGEN);

	be_dump(DUMP_BE, irg, "code-selection");
467
468
469
470

	optimize_graph_df(irg);

	be_dump(DUMP_BE, irg, "opt");
471
472
}

473
static void introduce_epilogue(ir_node *ret, bool omit_fp)
474
{
475
476
477
478
	ir_graph *irg      = get_irn_irg(ret);
	ir_node  *block    = get_nodes_block(ret);
	ir_node  *first_sp = get_irn_n(ret, n_amd64_ret_stack);
	ir_node  *curr_sp  = first_sp;
479

480
	if (!omit_fp) {
481
482
483
484
485
		int      const n_rbp    = determine_rbp_input(ret);
		ir_node       *curr_bp  = get_irn_n(ret, n_rbp);
		ir_node       *curr_mem = get_irn_n(ret, n_amd64_ret_mem);
		ir_node *const leave    = new_bd_amd64_leave(NULL, block, curr_bp, curr_mem);
		curr_mem = be_new_Proj(leave, pn_amd64_leave_M);
486
487
		curr_bp = be_new_Proj_reg(leave, pn_amd64_leave_frame, &amd64_registers[REG_RBP]);
		curr_sp = be_new_Proj_reg(leave, pn_amd64_leave_stack, &amd64_registers[REG_RSP]);
488
489
		sched_add_before(ret, leave);

490
491
		set_irn_n(ret, n_amd64_ret_mem, curr_mem);
		set_irn_n(ret, n_rbp,           curr_bp);
492
	} else {
493
494
495
496
497
498
		ir_type *frame_type = get_irg_frame_type(irg);
		unsigned frame_size = get_type_size(frame_type);
		ir_node *incsp = amd64_new_IncSP(block, curr_sp, -(int)frame_size,
										 true);
		sched_add_before(ret, incsp);
		curr_sp = incsp;
499
	}
500
	set_irn_n(ret, n_amd64_ret_stack, curr_sp);
501
502
503
504
505

	/* keep verifier happy... */
	if (get_irn_n_edges(first_sp) == 0 && is_Proj(first_sp)) {
		kill_node(first_sp);
	}
506
507
}

508
static void introduce_prologue(ir_graph *const irg, bool omit_fp)
509
510
{
	const arch_register_t *sp         = &amd64_registers[REG_RSP];
511
	const arch_register_t *bp         = &amd64_registers[REG_RBP];
512
513
514
	ir_node               *start      = get_irg_start(irg);
	ir_node               *block      = get_nodes_block(start);
	ir_type               *frame_type = get_irg_frame_type(irg);
515
	unsigned               frame_size = get_type_size(frame_type);
Christoph Mallon's avatar
Christoph Mallon committed
516
	ir_node               *initial_sp = be_get_Start_proj(irg, sp);
517

518
	if (!omit_fp) {
519
		/* push rbp */
520
		ir_node *const mem        = get_irg_initial_mem(irg);
Christoph Mallon's avatar
Christoph Mallon committed
521
		ir_node *const initial_bp = be_get_Start_proj(irg, bp);
522
523
524
525
		ir_node *const push       = new_bd_amd64_push_reg(NULL, block, initial_sp, mem, initial_bp);
		sched_add_after(start, push);
		ir_node *const curr_mem   = be_new_Proj(push, pn_amd64_push_reg_M);
		edges_reroute_except(mem, curr_mem, push);
526
		ir_node *const curr_sp    = be_new_Proj_reg(push, pn_amd64_push_reg_stack, sp);
527
528
529
530

		/* move rsp to rbp */
		ir_node *const curr_bp = be_new_Copy(block, curr_sp);
		sched_add_after(push, curr_bp);
531
		arch_copy_irn_out_info(curr_bp, 0, initial_bp);
532
		edges_reroute_except(initial_bp, curr_bp, push);
533

534
		ir_node *incsp = amd64_new_IncSP(block, curr_sp, frame_size, false);
535
		sched_add_after(curr_bp, incsp);
536
		edges_reroute_except(initial_sp, incsp, push);
537
538

		/* make sure the initial IncSP is really used by someone */
539
		be_keep_if_unused(incsp);
540
	} else {
541
542
543
544
		ir_node *const incsp = amd64_new_IncSP(block, initial_sp,
											   frame_size, false);
		sched_add_after(start, incsp);
		edges_reroute_except(initial_sp, incsp, incsp);
545
	}
546
}
547

548
static void introduce_prologue_epilogue(ir_graph *irg, bool omit_fp)
549
{
550
551
	/* introduce epilogue for every return node */
	foreach_irn_in(get_irg_end_block(irg), i, ret) {
552
		assert(is_amd64_ret(ret));
553
		introduce_epilogue(ret, omit_fp);
554
	}
555

556
	introduce_prologue(irg, omit_fp);
557
558
}

559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
static bool node_has_sp_base(ir_node const *const node,
                             amd64_addr_t const *const addr)
{
	if (!x86_addr_variant_has_base(addr->variant))
		return false;
	arch_register_t const *const base_reg
		= arch_get_irn_register_in(node, addr->base_input);
	return base_reg == &amd64_registers[REG_RSP];
}

static void amd64_determine_frameoffset(ir_node *node, int sp_offset)
{
	if (!is_amd64_irn(node)
	 || !amd64_has_addr_attr(get_amd64_attr_const(node)->op_mode))
		return;

	amd64_addr_t *const addr = &get_amd64_addr_attr(node)->addr;
	if (addr->immediate.kind == X86_IMM_FRAMEENT) {
		addr->immediate.offset += get_entity_offset(addr->immediate.entity);
		addr->immediate.entity  = NULL;
		addr->immediate.kind    = X86_IMM_FRAMEOFFSET;
	}

	if (addr->immediate.kind == X86_IMM_FRAMEOFFSET) {
		if (node_has_sp_base(node, addr))
			addr->immediate.offset += sp_offset;
		else {
			/* we calculate offsets relative to the SP value at function begin,
			 * but RBP points after the saved old frame pointer */
			addr->immediate.offset += AMD64_REGISTER_SIZE;
		}
		addr->immediate.kind = X86_IMM_VALUE;
	}
}

static void amd64_sp_sim(ir_node *const node, stack_pointer_state_t *state)
{
	/* Pop nodes modify the stack pointer before calculating destination
	 * address, so do this first */
	if (is_amd64_pop_am(node)) {
		const amd64_addr_attr_t *attr = get_amd64_addr_attr_const(node);
		state->offset -= get_insn_size_bytes(attr->size);
	}

	amd64_determine_frameoffset(node, state->offset);

	if (is_amd64_push_am(node)) {
		const amd64_addr_attr_t *attr = get_amd64_addr_attr_const(node);
		state->offset       += get_insn_size_bytes(attr->size);
	} else if (is_amd64_push_reg(node)) {
		/* 64-bit register size */
		state->offset       += AMD64_REGISTER_SIZE;
	} else if (is_amd64_leave(node)) {
		state->offset        = 0;
		state->align_padding = 0;
	} else if (is_amd64_sub_sp(node)) {
		state->align_padding = 0;
	}
}

619
620
621
/**
 * Called immediatly before emit phase.
 */
622
static void amd64_finish_and_emit(ir_graph *irg)
623
{
624
	bool omit_fp = amd64_get_irg_data(irg)->omit_fp;
625
626

	/* create and coalesce frame entities */
627
	be_fec_env_t *fec_env = be_new_frame_entity_coalescer(irg);
628
	irg_walk_graph(irg, NULL, amd64_collect_frame_entity_nodes, fec_env);
629
	be_assign_entities(fec_env, amd64_set_frame_entity, omit_fp);
630
	be_free_frame_entity_coalescer(fec_env);
631

632
633
634
635
636
637
	ir_type *const frame = get_irg_frame_type(irg);
	be_sort_frame_entities(frame, omit_fp);
	unsigned const misalign = AMD64_REGISTER_SIZE; /* return address on stack */
	int      const begin    = omit_fp ? 0 : -AMD64_REGISTER_SIZE;
	be_layout_frame_type(frame, begin, misalign);

638
	irg_block_walk_graph(irg, NULL, amd64_after_ra_walker, NULL);
639

640
	introduce_prologue_epilogue(irg, omit_fp);
641

642
	/* fix stack entity offsets */
643
	be_fix_stack_nodes(irg, &amd64_registers[REG_RSP]);
644
	be_birg_from_irg(irg)->non_ssa_regs = NULL;
645
646
	unsigned const p2align = AMD64_PO2_STACK_ALIGNMENT;
	be_sim_stack_pointer(irg, misalign, p2align, amd64_sp_sim);
647
648
649

	/* Fix 2-address code constraints. */
	amd64_finish_irg(irg);
650

Matthias Braun's avatar
Matthias Braun committed
651
652
	amd64_simulate_graph_x87(irg);

Christoph Mallon's avatar
Christoph Mallon committed
653
654
	amd64_peephole_optimization(irg);

655
	/* emit code */
656
	be_timer_push(T_EMIT);
657
	amd64_emit_function(irg);
658
	be_timer_pop(T_EMIT);
659
}
660

Matthias Braun's avatar
Matthias Braun committed
661
662
663
664
665
static void amd64_finish(void)
{
	amd64_free_opcodes();
}

666
667
668
669
670
671
672
static const regalloc_if_t amd64_regalloc_if = {
	.spill_cost  = 7,
	.reload_cost = 5,
	.new_spill   = amd64_new_spill,
	.new_reload  = amd64_new_reload,
};

673
static void amd64_generate_code(FILE *output, const char *cup_name)
674
{
675
	amd64_constants = pmap_create();
676
	be_begin(output, cup_name);
677
	unsigned *const sp_is_non_ssa = rbitset_alloca(N_AMD64_REGISTERS);
678
	rbitset_set(sp_is_non_ssa, REG_RSP);
679

680
681
682
	foreach_irp_irg(i, irg) {
		if (!be_step_first(irg))
			continue;
683

684
685
686
		struct obstack *obst = be_get_be_obst(irg);
		be_birg_from_irg(irg)->isa_link = OALLOCZ(obst, amd64_irg_data_t);

687
		be_birg_from_irg(irg)->non_ssa_regs = sp_is_non_ssa;
688
		amd64_select_instructions(irg);
689

690
691
692
693
694
695
696
		be_step_schedule(irg);

		be_timer_push(T_RA_PREPARATION);
		be_sched_fix_flags(irg, &amd64_reg_classes[CLASS_amd64_flags], NULL,
						   NULL, NULL);
		be_timer_pop(T_RA_PREPARATION);

697
		be_step_regalloc(irg, &amd64_regalloc_if);
698
699
700
701
702
703
704
705

		amd64_finish_and_emit(irg);

		be_step_last(irg);
	}

	be_finish();
	pmap_destroy(amd64_constants);
Matthias Braun's avatar
Matthias Braun committed
706
707
}

708
709
static void amd64_lower_for_target(void)
{
710
	/* lower compound param handling */
711
	lower_calls_with_compounds(LF_RETURN_HIDDEN, NULL);
712
	be_after_irp_transform("lower-calls");
713

714
	foreach_irp_irg(i, irg) {
715
		lower_switch(irg, 4, 256, mode_Iu);
716
		be_after_transform(irg, "lower-switch");
717
718
	}

Tobias Rapp's avatar
Tobias Rapp committed
719
720
721
722
	foreach_irp_irg(i, irg) {
		/* lower for mode_b stuff */
		ir_lower_mode_b(irg, mode_Lu);
		be_after_transform(irg, "lower-modeb");
Matthias Braun's avatar
Matthias Braun committed
723
724
		lower_alloc(irg, AMD64_PO2_STACK_ALIGNMENT);
		be_after_transform(irg, "lower-alloc");
Tobias Rapp's avatar
Tobias Rapp committed
725
726
	}

727
	foreach_irp_irg(i, irg) {
728
729
730
731
		/* Turn all small CopyBs into loads/stores, and turn all bigger
		 * CopyBs into memcpy calls, because we cannot handle CopyB nodes
		 * during code generation yet.
		 * TODO:  Adapt this once custom CopyB handling is implemented. */
732
		lower_CopyB(irg, 64, 65, true);
733
		be_after_transform(irg, "lower-copyb");
734
	}
735

736
	ir_builtin_kind supported[6];
737
	size_t  s = 0;
738
739
740
	supported[s++] = ir_bk_ffs;
	supported[s++] = ir_bk_clz;
	supported[s++] = ir_bk_ctz;
741
	supported[s++] = ir_bk_compare_swap;
742
	supported[s++] = ir_bk_saturating_increment;
743
	supported[s++] = ir_bk_va_start;
744
745
746

	assert(s <= ARRAY_SIZE(supported));
	lower_builtins(s, supported);
747
	be_after_irp_transform("lower-builtins");
748
749
}

750
751
752
static int amd64_is_mux_allowed(ir_node *sel, ir_node *mux_false,
                                ir_node *mux_true)
{
Matthias Braun's avatar
Matthias Braun committed
753
754
755
	/* optimizable by middleend */
	if (ir_is_optimizable_mux(sel, mux_false, mux_true))
		return true;
756
757
758
	return false;
}

759
static const ir_settings_arch_dep_t amd64_arch_dep = {
760
761
762
763
764
765
766
	.also_use_subs        = true,
	.maximum_shifts       = 4,
	.highest_shift_amount = 63,
	.evaluate             = NULL,
	.allow_mulhs          = true,
	.allow_mulhu          = true,
	.max_bits_for_mulh    = 32,
767
};
768
769

static backend_params amd64_backend_params = {
770
	.experimental                  = "the amd64 backend is highly experimental and unfinished (consider the ia32 backend)",
771
	.byte_order_big_endian         = false,
772
	.pic_supported                 = true,
773
774
775
776
777
778
779
780
781
782
	.unaligned_memaccess_supported = true,
	.modulo_shift                  = 32,
	.dep_param                     = &amd64_arch_dep,
	.allow_ifconv                  = amd64_is_mux_allowed,
	.machine_size                  = 64,
	.mode_float_arithmetic         = NULL,  /* will be set later */
	.type_long_long                = NULL,  /* will be set later */
	.type_unsigned_long_long       = NULL,  /* will be set later */
	.type_long_double              = NULL,  /* will be set later */
	.stack_param_align             = 8,
783
784
	.float_int_overflow            = ir_overflow_indefinite,
	.vararg                        = {
785
786
		.va_list_type = NULL,  /* Will be set later */
		.lower_va_arg = amd64_lower_va_arg,
787
	},
788
789
};

790
static const backend_params *amd64_get_backend_params(void) {
791
	return &amd64_backend_params;
792
793
794
795
}

static int amd64_is_valid_clobber(const char *clobber)
{
796
	return x86_parse_clobber(amd64_additional_clobber_names, clobber) != NULL;
797
798
}

799
800
static void amd64_init_types(void)
{
801
802
803
	ir_mode *const ptr_mode = new_reference_mode("p64", irma_twos_complement, 64, 64);
	set_modeP(ptr_mode);

804
805
806
807
	/* use an int128 mode for xmm registers for now, so that firm allows us to
	 * create constants with the xmm mode... */
	amd64_mode_xmm = new_int_mode("x86_xmm", irma_twos_complement, 128, 0, 0);

Matthias Braun's avatar
Matthias Braun committed
808
	x86_init_x87_type();
809
810
	amd64_backend_params.type_long_double = x86_type_E;

Manuel Mohr's avatar
Manuel Mohr committed
811
	amd64_backend_params.vararg.va_list_type = amd64_build_va_list_type();
812
813
814
815
816
817
}

static void amd64_init(void)
{
	amd64_init_types();
	amd64_register_init();
818
	amd64_create_opcodes();
819
	amd64_cconv_init();
820
	x86_set_be_asm_constraint_support(&amd64_asm_constraints);
821
822
}

823
824
825
826
827
828
static unsigned amd64_get_op_estimated_cost(const ir_node *node)
{
	(void)node;/* TODO */
	return 1;
}

829
static arch_isa_if_t const amd64_isa_if = {
830
831
832
833
834
835
836
837
838
839
840
841
	.n_registers           = N_AMD64_REGISTERS,
	.registers             = amd64_registers,
	.n_register_classes    = N_AMD64_CLASSES,
	.register_classes      = amd64_reg_classes,
	.init                  = amd64_init,
	.finish                = amd64_finish,
	.get_params            = amd64_get_backend_params,
	.generate_code         = amd64_generate_code,
	.lower_for_target      = amd64_lower_for_target,
	.is_valid_clobber      = amd64_is_valid_clobber,
	.handle_intrinsics     = amd64_handle_intrinsics,
	.get_op_estimated_cost = amd64_get_op_estimated_cost,
842
843
};

Matthias Braun's avatar
Matthias Braun committed
844
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_amd64)
845
846
847
848
void be_init_arch_amd64(void)
{
	be_register_isa_if("amd64", &amd64_isa_if);
	FIRM_DBG_REGISTER(dbg, "firm.be.amd64.cg");
849

850
	static const lc_opt_table_entry_t options[] = {
851
852
		LC_OPT_ENT_BOOL("x64abi",      "Use x64 ABI (otherwise system V)", &amd64_use_x64_abi),
		LC_OPT_ENT_BOOL("no-red-zone", "gcc compatibility",                &amd64_use_red_zone),
853
854
		LC_OPT_LAST
	};
855
856
857
	lc_opt_entry_t *be_grp    = lc_opt_get_grp(firm_opt_get_root(), "be");
	lc_opt_entry_t *amd64_grp = lc_opt_get_grp(be_grp, "amd64");
	lc_opt_add_table(amd64_grp, options);
858

859
860
	amd64_init_transform();
}