bearch_amd64.c 25.2 KB
Newer Older
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
4
5
6
7
8
9
 */

/**
 * @file
 * @brief    The main amd64 backend driver file.
 */
10
11
12
13
#include "amd64_emitter.h"
#include "amd64_finish.h"
#include "amd64_new_nodes.h"
#include "amd64_transform.h"
14
#include "amd64_varargs.h"
15
#include "bearch_amd64_t.h"
Matthias Braun's avatar
Matthias Braun committed
16
17
18
#include "beflags.h"
#include "beirg.h"
#include "bemodule.h"
19
#include "bera.h"
Matthias Braun's avatar
Matthias Braun committed
20
#include "besched.h"
21
22
#include "bespillslots.h"
#include "bestack.h"
23
#include "beutil.h"
24
#include "debug.h"
25
#include "gen_amd64_regalloc_if.h"
26
#include "irarch_t.h"
27
#include "ircons.h"
Matthias Braun's avatar
Matthias Braun committed
28
#include "irgmod.h"
29
#include "irgopt.h"
Matthias Braun's avatar
Matthias Braun committed
30
31
#include "irgwalk.h"
#include "iropt_t.h"
32
#include "irtools.h"
Matthias Braun's avatar
Matthias Braun committed
33
#include "lower_alloc.h"
Matthias Braun's avatar
Matthias Braun committed
34
35
#include "lower_builtins.h"
#include "lower_calls.h"
Tobias Rapp's avatar
Tobias Rapp committed
36
#include "lower_mode_b.h"
37
38
#include "lowering.h"
#include "panic.h"
39
40
41

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

42
43
pmap *amd64_constants;

44
45
ir_mode *amd64_mode_E;
ir_type *amd64_type_E;
46
ir_mode *amd64_mode_xmm;
47

48
49
static ir_entity *amd64_get_frame_entity(const ir_node *node)
{
50
51
	if (!is_amd64_irn(node))
		return NULL;
Matthias Braun's avatar
Matthias Braun committed
52
53
54
55
	if (!amd64_has_addr_attr(node))
		return NULL;
	const amd64_addr_attr_t *attr = get_amd64_addr_attr_const(node);
	ir_entity *entity = attr->addr.immediate.entity;
56
57
	if (entity == NULL)
		return NULL;
58
59
60
61
62
63
64
65
	ir_type *owner = get_entity_owner(entity);
	if (is_frame_type(owner))
		return entity;
	ir_graph *irg = get_irn_irg(node);
	be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
	if (owner == layout->arg_type)
		return entity;
	return NULL;
66
67
}

68
69
70
static int get_insn_mode_bytes(amd64_insn_mode_t insn_mode)
{
	switch (insn_mode) {
71
72
73
74
75
76
	case INSN_MODE_8:       return 1;
	case INSN_MODE_16:      return 2;
	case INSN_MODE_32:      return 4;
	case INSN_MODE_64:      return 8;
	case INSN_MODE_128:     return 16;
	case INSN_MODE_INVALID: break;
77
78
79
80
	}
	panic("bad insn mode");
}

81
82
83
84
/**
 * This function is called by the generic backend to correct offsets for
 * nodes accessing the stack.
 */
Matthias Braun's avatar
Matthias Braun committed
85
static void amd64_set_frame_offset(ir_node *node, int offset)
86
{
87
88
	if (!is_amd64_irn(node))
		return;
Matthias Braun's avatar
Matthias Braun committed
89
90
	amd64_addr_attr_t *attr = get_amd64_addr_attr(node);
	attr->addr.immediate.offset += offset;
91
	if (is_amd64_pop_am(node)) {
92
93
		ir_graph          *irg    = get_irn_irg(node);
		be_stack_layout_t *layout = be_get_irg_stack_layout(irg);
Matthias Braun's avatar
Matthias Braun committed
94
95
		if (layout->sp_relative)
			attr->addr.immediate.offset -= get_insn_mode_bytes(attr->insn_mode);
96
	}
97
98
	attr->addr.immediate.kind = X86_IMM_VALUE;
	attr->addr.immediate.entity = NULL;
99
100
}

101
static int amd64_get_sp_bias(const ir_node *node)
102
{
103
	if (is_amd64_push_am(node)) {
Matthias Braun's avatar
Matthias Braun committed
104
105
		const amd64_addr_attr_t *attr = get_amd64_addr_attr_const(node);
		return get_insn_mode_bytes(attr->insn_mode);
106
	} else if (is_amd64_push_reg(node)) {
107
		/* 64-bit register size */
108
		return AMD64_REGISTER_SIZE;
109
	} else if (is_amd64_pop_am(node)) {
Matthias Braun's avatar
Matthias Braun committed
110
111
		const amd64_addr_attr_t *attr = get_amd64_addr_attr_const(node);
		return -get_insn_mode_bytes(attr->insn_mode);
112
	} else if (is_amd64_leave(node)) {
113
		return SP_BIAS_RESET;
114
	}
115

116
117
118
	return 0;
}

119
120
static const arch_register_req_t *am_pushpop_base_reqs[] = {
	&amd64_single_reg_req_gp_rsp,
121
	&amd64_class_reg_req_gp,
122
	&arch_memory_requirement,
123
124
125
};

static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp,
126
127
                            ir_node *mem, ir_entity *ent,
                            amd64_insn_mode_t insn_mode)
128
129
130
131
132
133
{
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *frame = get_irg_frame(irg);

Matthias Braun's avatar
Matthias Braun committed
134
135
	amd64_addr_t addr;
	memset(&addr, 0, sizeof(addr));
136
	addr.base_input       = 1;
Matthias Braun's avatar
Matthias Braun committed
137
138
	addr.index_input      = NO_INPUT;
	addr.immediate.entity = ent;
139
	ir_node *in[] = { sp, frame, mem };
140
141
	ir_node *push = new_bd_amd64_push_am(dbgi, block, ARRAY_SIZE(in), in,
	                                     insn_mode, addr);
142
	arch_set_irn_register_reqs_in(push, am_pushpop_base_reqs);
143
144
145
146
	sched_add_before(schedpoint, push);
	return push;
}

147
148
static ir_node *create_pop(ir_node *node, ir_node *schedpoint, ir_node *sp,
                           ir_entity *ent, amd64_insn_mode_t insn_mode)
149
150
151
152
153
154
{
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *block = get_nodes_block(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *frame = get_irg_frame(irg);

Matthias Braun's avatar
Matthias Braun committed
155
156
	amd64_addr_t addr;
	memset(&addr, 0, sizeof(addr));
157
	addr.base_input  = 1;
Matthias Braun's avatar
Matthias Braun committed
158
159
	addr.index_input = NO_INPUT;
	addr.immediate.entity = ent;
160
161
	ir_node *in[] = { sp, frame, get_irg_no_mem(irg) };

162
163
	ir_node *pop = new_bd_amd64_pop_am(dbgi, block, ARRAY_SIZE(in), in,
	                                   insn_mode, addr);
164
	arch_set_irn_register_reqs_in(pop, am_pushpop_base_reqs);
165
166
167
168
169
170
171
	sched_add_before(schedpoint, pop);

	return pop;
}

static ir_node* create_spproj(ir_node *pred, int pos)
{
172
	return be_new_Proj_reg(pred, pos, &amd64_registers[REG_RSP]);
173
174
175
176
177
178
179
180
181
182
}

/**
 * Transform MemPerm, currently we do this the ugly way and produce
 * push/pop into/from memory cascades. This is possible without using
 * any registers.
 */
static void transform_MemPerm(ir_node *node)
{
	ir_graph *irg   = get_irn_irg(node);
Christoph Mallon's avatar
Christoph Mallon committed
183
	ir_node  *sp    = be_get_Start_proj(irg, &amd64_registers[REG_RSP]);
184
185
186
187
188
189
190
191
192
193
194
	int       arity = be_get_MemPerm_entity_arity(node);
	ir_node **pops  = ALLOCAN(ir_node*, arity);
	int       i;

	/* create Pushs */
	for (i = 0; i < arity; ++i) {
		ir_entity *inent = be_get_MemPerm_in_entity(node, i);
		ir_entity *outent = be_get_MemPerm_out_entity(node, i);
		ir_type *enttype = get_entity_type(inent);
		unsigned entsize = get_type_size_bytes(enttype);
		unsigned entsize2 = get_type_size_bytes(get_entity_type(outent));
195
		ir_node *mem = get_irn_n(node, i);
196
197
198
199
200

		/* work around cases where entities have different sizes */
		if (entsize2 < entsize)
			entsize = entsize2;

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
		int offset = 0;
		do {
			amd64_insn_mode_t insn_mode;
			if (entsize%2 == 1) {
				insn_mode = INSN_MODE_8;
			} else if (entsize % 4 == 2) {
				insn_mode = INSN_MODE_16;
			} else if (entsize % 8 == 4) {
				insn_mode = INSN_MODE_32;
			} else {
				assert(entsize%8 == 0);
				insn_mode = INSN_MODE_64;
			}

			ir_node *push = create_push(node, node, sp, mem, inent, insn_mode);
216
			sp = create_spproj(push, pn_amd64_push_am_stack);
217
218
219
220
221
222
			get_amd64_addr_attr(push)->addr.immediate.offset = offset;

			unsigned size = get_insn_mode_bytes(insn_mode);
			offset  += size;
			entsize -= size;
		} while(entsize > 0);
223
224
225
226
		set_irn_n(node, i, new_r_Bad(irg, mode_X));
	}

	/* create pops */
Tobias Rapp's avatar
Tobias Rapp committed
227
	for (i = arity; i-- > 0; ) {
228
229
230
231
232
233
234
235
236
237
		ir_entity *inent = be_get_MemPerm_in_entity(node, i);
		ir_entity *outent = be_get_MemPerm_out_entity(node, i);
		ir_type *enttype = get_entity_type(outent);
		unsigned entsize = get_type_size_bytes(enttype);
		unsigned entsize2 = get_type_size_bytes(get_entity_type(inent));

		/* work around cases where entities have different sizes */
		if (entsize2 < entsize)
			entsize = entsize2;

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
		int      offset = entsize;
		ir_node *pop;
		do {
			amd64_insn_mode_t insn_mode;
			if (entsize%2 == 1) {
				insn_mode = INSN_MODE_8;
			} else if (entsize % 4 == 2) {
				insn_mode = INSN_MODE_16;
			} else if (entsize % 8 == 4) {
				insn_mode = INSN_MODE_32;
			} else {
				assert(entsize%8 == 0);
				insn_mode = INSN_MODE_64;
			}

			pop = create_pop(node, node, sp, outent, insn_mode);
254
			sp  = create_spproj(pop, pn_amd64_pop_am_stack);
255
256
257
258
259
260

			unsigned size = get_insn_mode_bytes(insn_mode);
			offset  -= size;
			entsize -= size;
			get_amd64_addr_attr(pop)->addr.immediate.offset = offset;
		} while(entsize > 0);
261
262
263
		pops[i] = pop;
	}

264
	ir_node *const keep = be_new_Keep_one(sp);
265
266
267
268
269
	sched_replace(node, keep);

	/* exchange memprojs */
	foreach_out_edge_safe(node, edge) {
		ir_node *proj = get_edge_src_irn(edge);
270
		int p = get_Proj_num(proj);
271
272
273
274

		assert(p < arity);

		set_Proj_pred(proj, pops[p]);
275
		set_Proj_num(proj, pn_amd64_pop_am_M);
276
277
278
279
280
281
	}

	/* remove memperm */
	kill_node(node);
}

282
283
284
285
static void amd64_after_ra_walker(ir_node *block, void *data)
{
	(void) data;

286
	sched_foreach_reverse_safe(block, node) {
Matthias Braun's avatar
Matthias Braun committed
287
		if (be_is_MemPerm(node)) {
288
			transform_MemPerm(node);
289
290
291
292
		}
	}
}

293
294
/**
 * rewrite unsigned long -> float/double conversion
295
296
297
298
299
300
301
302
303
304
305
306
 * x86_64 only has a signed conversion so we do some crazy sse construction
 * instead (first seen this pattern in llvm): We split the 64bit value into
 * 2 32bit vals and place them into the mantissa parts of apropriately choosen
 * float values and later add the 2 floats together. In pseudo code:
 *
 * a = (vector unsigned long, unsigned long) x;
 * b = (vector unsigned, unsigned, unsigned, unsigned)
 *       upper half of 0x1p+52, upper half of 0x1p+84, 0, 0
 * c = repack (a[0], b[0], a[1], b[1])
 * d = (vector double) 0x1p+52, 0x1p+84
 * e = c - d
 * f = e[0] + e[1]
307
308
309
 */
static void rewrite_unsigned_float_Conv(ir_node *node)
{
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
	ir_graph  *irg    = get_irn_irg(node);
	dbg_info  *dbgi   = get_irn_dbg_info(node);
	ir_node   *block  = get_nodes_block(node);
	ir_node   *in     = get_Conv_op(node);
	ir_node   *in_xmm = new_r_Conv(block, in, amd64_mode_xmm);
	ir_tarval *magic0
		= new_integer_tarval_from_str("4530000043300000", 16, 0, 16,
		                              amd64_mode_xmm);
	ir_node   *const0 = new_r_Const(irg, magic0);
	collect_new_start_block_node(const0);
	ir_node   *punpck = new_bd_amd64_l_punpckldq(dbgi, block, in_xmm, const0);
	ir_tarval *magic1
		= new_integer_tarval_from_str("45300000000000004330000000000000", 32,
		                              0, 16, amd64_mode_xmm);
	ir_node   *const1 = new_r_Const(irg, magic1);
	collect_new_start_block_node(const1);
	ir_node   *subpd  = new_bd_amd64_l_subpd(dbgi, block, punpck, const1);
	ir_node   *haddpd = new_bd_amd64_l_haddpd(dbgi, block, subpd, subpd);
	ir_mode   *mode   = get_irn_mode(node);
	ir_node   *conv   = new_r_Conv(block, haddpd, mode);
	exchange(node, conv);
331
332
}

333
334
335
336
337
/* Creates a 64-bit constant with only the sign bit set,
 * i.e. returns 0x8000000000000000
 */
static ir_node *create_sign_bit_const(ir_graph *irg)
{
338
	ir_tarval *sign_tv = create_sign_tv(mode_Ls);
339
340
341
342
	return new_r_Const(irg, sign_tv);
}

/* rewrite float/double -> unsigned long conversion
343
344
345
346
347
348
349
350
 * x86_64 only has a signed conversion so we rewrite to the following:
 *
 * if (x >= 9223372036854775808.) {
 *   converted ^= (int)(x-9223372036854775808.) ^ 0x8000000000000000;
 * } else {
 *   converted = (int)x;
 * }
 * return (unsigned)converted;
351
352
353
354
355
356
357
358
359
360
 */
static void rewrite_float_unsigned_Conv(ir_node *node)
{
	ir_graph *irg        = get_irn_irg(node);
	dbg_info *dbgi       = get_irn_dbg_info(node);
	ir_node *lower_block = get_nodes_block(node);
	ir_mode *dest_mode   = get_irn_mode(node);

	part_block(node);

361
362
363
364
365
366
	ir_node   *block    = get_nodes_block(node);
	ir_node   *fp_x     = get_Conv_op(node);
	ir_mode   *src_mode = get_irn_mode(fp_x);
	double     d_const  = 9223372036854775808.;
	ir_tarval *tv       = new_tarval_from_double(d_const, src_mode);
	ir_node   *fp_const = new_r_Const(irg, tv);
367
	collect_new_start_block_node(fp_const);
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383

	/* Test if the sign bit is needed */
	ir_node *cmp         = new_rd_Cmp(dbgi, block, fp_x, fp_const,
	                                 ir_relation_greater_equal);
	ir_node *cond        = new_rd_Cond(dbgi, block, cmp);
	ir_node *proj_true   = new_r_Proj(cond, mode_X, pn_Cond_true);
	ir_node *proj_false  = new_r_Proj(cond, mode_X, pn_Cond_false);
	ir_node *in_true[1]  = { proj_true };
	ir_node *in_false[1] = { proj_false };

	/* true block: Do some arithmetic to use the signed conversion */
	ir_node *true_block  = new_r_Block(irg, ARRAY_SIZE(in_true), in_true);
	ir_node *true_jmp    = new_r_Jmp(true_block);
	ir_node *sub         = new_r_Sub(true_block, fp_const, fp_x, src_mode);
	ir_node *sub_conv    = new_rd_Conv(dbgi, true_block, sub, mode_Ls);
	ir_node *sign_bit    = create_sign_bit_const(irg);
384
	collect_new_start_block_node(sign_bit);
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
	ir_node *xor         = new_r_Eor(true_block, sub_conv, sign_bit, mode_Ls);
	ir_node *true_res    = new_rd_Conv(dbgi, true_block, xor, dest_mode);

	/* false block: Simply convert */
	ir_node *false_block  = new_r_Block(irg, ARRAY_SIZE(in_false), in_false);
	ir_node *false_jmp    = new_r_Jmp(false_block);
	ir_node *false_signed = new_rd_Conv(dbgi, false_block, fp_x, mode_Ls);
	ir_node *false_res    = new_rd_Conv(dbgi, false_block, false_signed,
	                                    dest_mode);

	/* lower block */
	ir_node *lower_in[2] = { true_jmp, false_jmp };
	ir_node *phi_in[2]   = { true_res, false_res };

	set_irn_in(lower_block, ARRAY_SIZE(lower_in), lower_in);
	ir_node *phi = new_r_Phi(lower_block, ARRAY_SIZE(phi_in), phi_in,
	                         dest_mode);
402
	collect_new_phi_node(phi);
403
404
405
	exchange(node, phi);
}

406
407
408
409
410
411
412
413
414
415
416
417
418
419
static bool amd64_rewrite_Conv(ir_node *node)
{
	ir_mode *to_mode    = get_irn_mode(node);
	ir_node *op         = get_Conv_op(node);
	ir_mode *from_mode  = get_irn_mode(op);
	bool     to_float   = mode_is_float(to_mode);
	bool     from_float = mode_is_float(from_mode);

	if (to_float && !from_float && !mode_is_signed(from_mode)
	    && get_mode_size_bits(from_mode) == 64) {
		rewrite_unsigned_float_Conv(node);
		return true;
	} else if (from_float && !to_float && !mode_is_signed(to_mode)
	           && get_mode_size_bits(to_mode) == 64) {
420
		rewrite_float_unsigned_Conv(node);
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
		return true;
	}

	return false;
}

static void amd64_intrinsics_walker(ir_node *node, void *data)
{
	bool *changed = (bool*)data;
	if (is_Conv(node)) {
		if (amd64_rewrite_Conv(node))
			*changed = true;
	}
}

static void amd64_handle_intrinsics(ir_graph *irg)
{
	ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);
439
	collect_phiprojs_and_start_block_nodes(irg);
440
441
442
443
444
445
446
447
448
449
450
451
452
	bool changed = false;
	irg_walk_graph(irg, amd64_intrinsics_walker, NULL, &changed);
	ir_free_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_PHI_LIST);

	if (changed) {
		confirm_irg_properties(irg,
		        IR_GRAPH_PROPERTY_NO_BADS
		        | IR_GRAPH_PROPERTY_NO_CRITICAL_EDGES
		        | IR_GRAPH_PROPERTY_MANY_RETURNS
		        | IR_GRAPH_PROPERTY_ONE_RETURN);
	}
}

453
454
static void amd64_set_frame_entity(ir_node *node, ir_entity *entity,
                                   const ir_type *type)
455
{
456
	(void)type;
Matthias Braun's avatar
Matthias Braun committed
457
458
	amd64_addr_attr_t *attr = get_amd64_addr_attr(node);
	attr->addr.immediate.entity = entity;
459
460
}

461
462
static bool is_frame_load(const ir_node *node)
{
463
464
	return is_amd64_mov_gp(node) || is_amd64_movs(node)
	    || is_amd64_movs_xmm(node) || is_amd64_movdqu(node);
465
466
}

467
468
469
470
471
/**
 * Collects nodes that need frame entities assigned.
 */
static void amd64_collect_frame_entity_nodes(ir_node *node, void *data)
{
472
	/* we are only interested to report Load nodes */
473
	if (!is_frame_load(node))
Matthias Braun's avatar
Matthias Braun committed
474
475
		return;

Matthias Braun's avatar
Matthias Braun committed
476
477
	const amd64_addr_attr_t *attr = get_amd64_addr_attr_const(node);
	if (attr->needs_frame_ent) {
478
479
480
481
482
		be_fec_env_t  *env  = (be_fec_env_t*)data;
		/* TODO: improve this */
		const ir_mode *mode = is_amd64_movdqu(node) ? amd64_mode_xmm
		                                            : mode_Lu;
		const ir_type *type = get_type_for_mode(mode);
483
		be_load_needs_frame_entity(env, node, type);
484
485
486
	}
}

487
488
static int determine_rbp_input(ir_node *ret)
{
489
	arch_register_t const *const bp = &amd64_registers[REG_RBP];
490
491
492
493
494
495
496
	foreach_irn_in(ret, i, input) {
		if (arch_get_irn_register(input) == bp)
			return i;
	}
    panic("no rbp input found at %+F", ret);
}

497
498
499
500
501
/**
 * prepare graph and perform code selection.
 */
static void amd64_select_instructions(ir_graph *irg)
{
502
503
	amd64_adjust_pic(irg);

504
505
506
507
508
	be_timer_push(T_CODEGEN);
	amd64_transform_graph(irg);
	be_timer_pop(T_CODEGEN);

	be_dump(DUMP_BE, irg, "code-selection");
509
510
511
512

	optimize_graph_df(irg);

	be_dump(DUMP_BE, irg, "opt");
513
514
}

515
516
static void introduce_epilogue(ir_node *ret)
{
517
518
519
520
521
522
523
	ir_graph          *irg        = get_irn_irg(ret);
	ir_node           *block      = get_nodes_block(ret);
	ir_type           *frame_type = get_irg_frame_type(irg);
	unsigned           frame_size = get_type_size_bytes(frame_type);
	be_stack_layout_t *layout     = be_get_irg_stack_layout(irg);
	ir_node           *first_sp   = get_irn_n(ret, n_amd64_ret_stack);
	ir_node           *curr_sp    = first_sp;
524

525
	if (!layout->sp_relative) {
526
527
528
529
530
		int      const n_rbp    = determine_rbp_input(ret);
		ir_node       *curr_bp  = get_irn_n(ret, n_rbp);
		ir_node       *curr_mem = get_irn_n(ret, n_amd64_ret_mem);
		ir_node *const leave    = new_bd_amd64_leave(NULL, block, curr_bp, curr_mem);
		curr_mem = be_new_Proj(leave, pn_amd64_leave_M);
531
532
		curr_bp = be_new_Proj_reg(leave, pn_amd64_leave_frame, &amd64_registers[REG_RBP]);
		curr_sp = be_new_Proj_reg(leave, pn_amd64_leave_stack, &amd64_registers[REG_RSP]);
533
534
		sched_add_before(ret, leave);

535
536
		set_irn_n(ret, n_amd64_ret_mem, curr_mem);
		set_irn_n(ret, n_rbp,           curr_bp);
537
538
	} else {
		if (frame_size > 0) {
539
540
			ir_node *incsp = amd64_new_IncSP(block, curr_sp,
			                                 -(int)frame_size, 0);
541
542
543
544
			sched_add_before(ret, incsp);
			curr_sp = incsp;
		}
	}
545
	set_irn_n(ret, n_amd64_ret_stack, curr_sp);
546
547
548
549
550

	/* keep verifier happy... */
	if (get_irn_n_edges(first_sp) == 0 && is_Proj(first_sp)) {
		kill_node(first_sp);
	}
551
552
}

553
static void introduce_prologue(ir_graph *const irg)
554
555
{
	const arch_register_t *sp         = &amd64_registers[REG_RSP];
556
	const arch_register_t *bp         = &amd64_registers[REG_RBP];
557
558
559
560
561
	ir_node               *start      = get_irg_start(irg);
	ir_node               *block      = get_nodes_block(start);
	ir_type               *frame_type = get_irg_frame_type(irg);
	unsigned               frame_size = get_type_size_bytes(frame_type);
	be_stack_layout_t     *layout     = be_get_irg_stack_layout(irg);
Christoph Mallon's avatar
Christoph Mallon committed
562
	ir_node               *initial_sp = be_get_Start_proj(irg, sp);
563

564
	if (!layout->sp_relative) {
565
		/* push rbp */
566
		ir_node *const mem        = get_irg_initial_mem(irg);
Christoph Mallon's avatar
Christoph Mallon committed
567
		ir_node *const initial_bp = be_get_Start_proj(irg, bp);
568
569
570
571
		ir_node *const push       = new_bd_amd64_push_reg(NULL, block, initial_sp, mem, initial_bp);
		sched_add_after(start, push);
		ir_node *const curr_mem   = be_new_Proj(push, pn_amd64_push_reg_M);
		edges_reroute_except(mem, curr_mem, push);
572
		ir_node *const curr_sp    = be_new_Proj_reg(push, pn_amd64_push_reg_stack, sp);
573
574
575
576

		/* move rsp to rbp */
		ir_node *const curr_bp = be_new_Copy(block, curr_sp);
		sched_add_after(push, curr_bp);
577
		arch_copy_irn_out_info(curr_bp, 0, initial_bp);
578
		edges_reroute_except(initial_bp, curr_bp, push);
579

580
		ir_node *incsp = amd64_new_IncSP(block, curr_sp, frame_size, 0);
581
		sched_add_after(curr_bp, incsp);
582
		edges_reroute_except(initial_sp, incsp, push);
583
584

		/* make sure the initial IncSP is really used by someone */
585
		be_keep_if_unused(incsp);
586
587

		layout->initial_bias = -8;
588
589
	} else {
		if (frame_size > 0) {
590
591
			ir_node *const incsp = amd64_new_IncSP(block, initial_sp,
			                                       frame_size, 0);
592
			sched_add_after(start, incsp);
593
			edges_reroute_except(initial_sp, incsp, incsp);
594
595
		}
	}
596
}
597

598
599
static void introduce_prologue_epilogue(ir_graph *irg)
{
600
601
	/* introduce epilogue for every return node */
	foreach_irn_in(get_irg_end_block(irg), i, ret) {
602
		assert(is_amd64_ret(ret));
603
604
		introduce_epilogue(ret);
	}
605
606

	introduce_prologue(irg);
607
608
}

609
610
611
/**
 * Called immediatly before emit phase.
 */
612
static void amd64_finish_and_emit(ir_graph *irg)
613
{
614
	be_stack_layout_t *stack_layout = be_get_irg_stack_layout(irg);
615
	bool               at_begin     = stack_layout->sp_relative;
616
	be_fec_env_t      *fec_env      = be_new_frame_entity_coalescer(irg);
617
618
619

	/* create and coalesce frame entities */
	irg_walk_graph(irg, NULL, amd64_collect_frame_entity_nodes, fec_env);
620
	be_assign_entities(fec_env, amd64_set_frame_entity, at_begin);
621
	be_free_frame_entity_coalescer(fec_env);
622

623
	irg_block_walk_graph(irg, NULL, amd64_after_ra_walker, NULL);
624

625
626
	introduce_prologue_epilogue(irg);

627
	/* fix stack entity offsets */
628
	be_fix_stack_nodes(irg, &amd64_registers[REG_RSP]);
629
	be_birg_from_irg(irg)->non_ssa_regs = NULL;
630
631
	be_abi_fix_stack_bias(irg, amd64_get_sp_bias, amd64_set_frame_offset,
	                      amd64_get_frame_entity);
632
633
634

	/* Fix 2-address code constraints. */
	amd64_finish_irg(irg);
635
636

	/* emit code */
637
	be_timer_push(T_EMIT);
638
	amd64_emit_function(irg);
639
	be_timer_pop(T_EMIT);
640
}
641

Matthias Braun's avatar
Matthias Braun committed
642
643
644
645
646
static void amd64_finish(void)
{
	amd64_free_opcodes();
}

647
648
649
650
651
652
653
static const regalloc_if_t amd64_regalloc_if = {
	.spill_cost  = 7,
	.reload_cost = 5,
	.new_spill   = amd64_new_spill,
	.new_reload  = amd64_new_reload,
};

654
static void amd64_generate_code(FILE *output, const char *cup_name)
655
{
656
	amd64_constants = pmap_create();
657
	be_begin(output, cup_name);
658
659
	unsigned *const sp_is_non_ssa = rbitset_malloc(N_AMD64_REGISTERS);
	rbitset_set(sp_is_non_ssa, REG_RSP);
660

661
662
663
	foreach_irp_irg(i, irg) {
		if (!be_step_first(irg))
			continue;
664

665
		be_birg_from_irg(irg)->non_ssa_regs = sp_is_non_ssa;
666
		amd64_select_instructions(irg);
667

668
669
670
671
672
673
674
		be_step_schedule(irg);

		be_timer_push(T_RA_PREPARATION);
		be_sched_fix_flags(irg, &amd64_reg_classes[CLASS_amd64_flags], NULL,
						   NULL, NULL);
		be_timer_pop(T_RA_PREPARATION);

675
		be_step_regalloc(irg, &amd64_regalloc_if);
676
677
678
679
680
681
682
683

		amd64_finish_and_emit(irg);

		be_step_last(irg);
	}

	be_finish();
	pmap_destroy(amd64_constants);
Matthias Braun's avatar
Matthias Braun committed
684
685
}

686
687
static void amd64_lower_for_target(void)
{
688
	/* lower compound param handling */
689
	lower_calls_with_compounds(LF_RETURN_HIDDEN);
690
	be_after_irp_transform("lower-calls");
691

692
	foreach_irp_irg(i, irg) {
693
		lower_switch(irg, 4, 256, mode_Iu);
694
		be_after_transform(irg, "lower-switch");
695
696
	}

Tobias Rapp's avatar
Tobias Rapp committed
697
698
699
700
	foreach_irp_irg(i, irg) {
		/* lower for mode_b stuff */
		ir_lower_mode_b(irg, mode_Lu);
		be_after_transform(irg, "lower-modeb");
Matthias Braun's avatar
Matthias Braun committed
701
702
		lower_alloc(irg, AMD64_PO2_STACK_ALIGNMENT);
		be_after_transform(irg, "lower-alloc");
Tobias Rapp's avatar
Tobias Rapp committed
703
704
	}

705
	foreach_irp_irg(i, irg) {
706
707
708
709
		/* Turn all small CopyBs into loads/stores, and turn all bigger
		 * CopyBs into memcpy calls, because we cannot handle CopyB nodes
		 * during code generation yet.
		 * TODO:  Adapt this once custom CopyB handling is implemented. */
710
		lower_CopyB(irg, 64, 65, true);
711
		be_after_transform(irg, "lower-copyb");
712
	}
713

714
	ir_builtin_kind supported[2];
715
716
	size_t  s = 0;
	supported[s++] = ir_bk_saturating_increment;
717
	supported[s++] = ir_bk_va_start;
718
719
720

	assert(s <= ARRAY_SIZE(supported));
	lower_builtins(s, supported);
721
	be_after_irp_transform("lower-builtins");
722
723
}

724
725
726
static int amd64_is_mux_allowed(ir_node *sel, ir_node *mux_false,
                                ir_node *mux_true)
{
Matthias Braun's avatar
Matthias Braun committed
727
728
729
	/* optimizable by middleend */
	if (ir_is_optimizable_mux(sel, mux_false, mux_true))
		return true;
730
731
732
	return false;
}

733
static const ir_settings_arch_dep_t amd64_arch_dep = {
734
735
736
737
738
739
740
	.also_use_subs        = true,
	.maximum_shifts       = 4,
	.highest_shift_amount = 63,
	.evaluate             = NULL,
	.allow_mulhs          = true,
	.allow_mulhu          = true,
	.max_bits_for_mulh    = 32,
741
};
742
743
744
745
746
747
748
749
750
751
752
753
754
755

static backend_params amd64_backend_params = {
	.byte_order_big_endian         = false,
	.pic_supported                 = false,
	.unaligned_memaccess_supported = true,
	.modulo_shift                  = 32,
	.dep_param                     = &amd64_arch_dep,
	.allow_ifconv                  = amd64_is_mux_allowed,
	.machine_size                  = 64,
	.mode_float_arithmetic         = NULL,  /* will be set later */
	.type_long_long                = NULL,  /* will be set later */
	.type_unsigned_long_long       = NULL,  /* will be set later */
	.type_long_double              = NULL,  /* will be set later */
	.stack_param_align             = 8,
756
757
	.float_int_overflow            = ir_overflow_indefinite,
	.vararg                        = {
758
759
		.va_list_type = NULL,  /* Will be set later */
		.lower_va_arg = amd64_lower_va_arg,
760
	},
761
762
};

763
static const backend_params *amd64_get_backend_params(void) {
764
	return &amd64_backend_params;
765
766
767
768
}

static int amd64_is_valid_clobber(const char *clobber)
{
769
	return x86_parse_clobber(amd64_additional_clobber_names, clobber) != NULL;
770
771
}

772
773
static void amd64_init_types(void)
{
774
775
776
	ir_mode *const ptr_mode = new_reference_mode("p64", irma_twos_complement, 64, 64);
	set_modeP(ptr_mode);

777
778
779
780
781
782
	amd64_mode_E = new_float_mode("E", irma_x86_extended_float, 15, 64,
	                              ir_overflow_indefinite);
	amd64_type_E = new_type_primitive(amd64_mode_E);
	set_type_size_bytes(amd64_type_E, 16);
	set_type_alignment_bytes(amd64_type_E, 16);

783
784
785
786
	/* use an int128 mode for xmm registers for now, so that firm allows us to
	 * create constants with the xmm mode... */
	amd64_mode_xmm = new_int_mode("x86_xmm", irma_twos_complement, 128, 0, 0);

787
	amd64_backend_params.type_long_double = amd64_type_E;
788
789

	amd64_backend_params.vararg.va_list_type = amd64_build_va_list_type();
790
791
792
793
794
795
}

static void amd64_init(void)
{
	amd64_init_types();
	amd64_register_init();
796
	amd64_create_opcodes();
797
	amd64_cconv_init();
798
	x86_set_be_asm_constraint_support(&amd64_asm_constraints);
799
800
}

801
802
803
804
805
806
static unsigned amd64_get_op_estimated_cost(const ir_node *node)
{
	(void)node;/* TODO */
	return 1;
}

807
static arch_isa_if_t const amd64_isa_if = {
808
809
810
811
812
813
814
815
816
817
818
819
	.n_registers           = N_AMD64_REGISTERS,
	.registers             = amd64_registers,
	.n_register_classes    = N_AMD64_CLASSES,
	.register_classes      = amd64_reg_classes,
	.init                  = amd64_init,
	.finish                = amd64_finish,
	.get_params            = amd64_get_backend_params,
	.generate_code         = amd64_generate_code,
	.lower_for_target      = amd64_lower_for_target,
	.is_valid_clobber      = amd64_is_valid_clobber,
	.handle_intrinsics     = amd64_handle_intrinsics,
	.get_op_estimated_cost = amd64_get_op_estimated_cost,
820
821
};

Matthias Braun's avatar
Matthias Braun committed
822
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_amd64)
823
824
825
826
void be_init_arch_amd64(void)
{
	be_register_isa_if("amd64", &amd64_isa_if);
	FIRM_DBG_REGISTER(dbg, "firm.be.amd64.cg");
827

828
829
830
831
832
833
834
835
836
	static const lc_opt_table_entry_t options[] = {
		LC_OPT_ENT_BOOL("x64abi", "Use x64 ABI (otherwise system V)",
						&amd64_use_x64_abi),
		LC_OPT_LAST
	};
	lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be");
	lc_opt_entry_t *x86_64_grp = lc_opt_get_grp(be_grp, "x86_64");
	lc_opt_add_table(x86_64_grp, options);

837
	amd64_init_finish();
838
839
	amd64_init_transform();
}