amd64_varargs.c 17.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/*
 * This file is part of libFirm.
 * Copyright (C) 2015 University of Karlsruhe.
 */

/**
 * @file
 * @brief       Implements vararg handling for AMD64
 * @author      Andreas Fried
 */
#include "amd64_varargs.h"

13
#include "amd64_bearch_t.h"
14
15
16
17
18
19
20
21
22
#include "amd64_new_nodes.h"
#include "amd64_nodes_attr.h"
#include "amd64_transform.h"
#include "be.h"
#include "besched.h"
#include "bitfiddle.h"
#include "gen_amd64_regalloc_if.h"
#include "ident.h"
#include "ircons.h"
23
#include "iredges_t.h"
24
#include "irgmod.h"
25
#include "irgraph_t.h"
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "panic.h"
#include "tv.h"
#include "typerep.h"
#include "util.h"

static struct va_list_members {
	ir_entity *gp_offset;
	ir_entity *xmm_offset;
	ir_entity *reg_save_ptr;
	ir_entity *stack_args_ptr;
} va_list_members;

static size_t            n_gp_params;
static size_t            n_xmm_params;
/* The register save area, and the slots for GP and XMM registers
 * inside of it. */
static ir_entity        *reg_save_area;
static ir_entity       **gp_save_slots;
static ir_entity       **xmm_save_slots;
/* Parameter entity pointing to the first variadic parameter on the
 * stack. */
static ir_entity        *stack_args_param;

void amd64_set_va_stack_args_param(ir_entity *param)
{
	stack_args_param = param;
}

ir_type *amd64_build_va_list_type(void)
{
	ir_type *const int_type     = new_type_primitive(mode_Is);
	ir_type *const ptr_type     = new_type_pointer(new_type_primitive(mode_ANY));
	ir_type *const va_list_type = new_type_struct(new_id_from_str("builtin:va_list"));

	va_list_members.gp_offset      = new_entity(va_list_type, new_id_from_str("gp_offset"),      int_type);
	va_list_members.xmm_offset     = new_entity(va_list_type, new_id_from_str("xmm_offset"),     int_type);
	va_list_members.stack_args_ptr = new_entity(va_list_type, new_id_from_str("stack_args_ptr"), ptr_type);
	va_list_members.reg_save_ptr   = new_entity(va_list_type, new_id_from_str("reg_save_ptr"),   ptr_type);

	default_layout_compound_type(va_list_type);
	return va_list_type;
}

void amd64_collect_variadic_params(be_start_out *const outs, x86_cconv_t *const cconv)
{
	size_t gp_params  = 0;
	size_t xmm_params = 0;
	size_t p          = 0;
	for (size_t const n = cconv->n_parameters; p < n; p++) {
		const arch_register_t *reg = cconv->parameters[p].reg;
		if (reg) {
			if (reg->cls == &amd64_reg_classes[CLASS_amd64_gp]) {
				++gp_params;
			} else if (reg->cls == &amd64_reg_classes[CLASS_amd64_xmm]) {
				++xmm_params;
			} else {
				panic("unexpected register class");
			}
		}
	}

	n_gp_params  = gp_params;
	n_xmm_params = xmm_params;

	/* amd64_decide_calling_convention has appended the registers
	 * which might hold variadic arguments to the parameters
	 * array, first GP, then XMM. Get them out now. */
	for (size_t i = gp_params + xmm_params, n = cconv->n_param_regs; i < n; i++, p++) {
		const arch_register_t *reg = cconv->parameters[p].reg;
		outs[reg->global_index] = BE_START_REG;
	}
}

void amd64_insert_reg_save_area(ir_graph *irg, x86_cconv_t *cconv)
{
	ir_entity *const irg_ent = get_irg_entity(irg);
	ident     *const irg_id  = get_entity_ident(irg_ent);

	ident   *reg_save_type_id = new_id_fmt("__va_reg_save_%s_t", irg_id);
	ir_type *reg_save_type    = new_type_struct(reg_save_type_id);

	const size_t max_xmm_params = cconv->n_xmm_regs;
	const size_t max_gp_params  = cconv->n_param_regs - max_xmm_params;

	gp_save_slots = XMALLOCNZ(ir_entity*, max_gp_params);
	for (size_t i = 0; i < max_gp_params; i++) {
		ident *id = new_id_fmt("save_gp%d", i);
		gp_save_slots[i] = new_entity(reg_save_type, id, get_type_for_mode(mode_Lu));
	}

	xmm_save_slots = XMALLOCNZ(ir_entity*, max_xmm_params);
	for (size_t i = 0; i < max_xmm_params; i++) {
		ident *id = new_id_fmt("save_xmm%d", i);
		xmm_save_slots[i] = new_entity(reg_save_type, id, get_type_for_mode(amd64_mode_xmm));
	}

	default_layout_compound_type(reg_save_type);

	ir_type *frame_type  = get_irg_frame_type(irg);
	ident   *reg_save_id = new_id_fmt("__va_reg_save_%s", irg_id);
	reg_save_area = new_entity(frame_type, reg_save_id, reg_save_type);
}

129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*
 * Lowering of va_arg
 *
 * For explanation see e.g.:
 * http://andrewl.dreamhosters.com/blog/variadic_functions_in_amd64_linux/index.html
 * and the AMD64 ABI (http://www.x86-64.org/documentation/abi.pdf)
 *
 * Given:
 * va_list ap;
 * va_start(ap);
 *
 * Generate one of the following for "va_arg(ap, T)":
 *
 * If T is an integral or pointer type:
 * if (ap.gp_offset < 6*8) {
 *         T result = ap.reg_save_ptr[gp_offset];
 *         gp_offset += 8;
 *         return result;
 * } else {
 *         T result = *ap.stack_args_ptr;
 *         ap.stack_args_ptr += (sizeof(T) rounded up to multiple of 8);
 *         return result;
 * }
 *
 * If T is an SSE floating point type:
 * if (ap.xmm_offset < 6*8+8*16) {
 *         T result = ap.reg_save_ptr[xmm_offset];
 *         xmm_offset += 16;
 *         return result;
 * } else {
 *         T result = *ap.stack_args_ptr;
 *         ap.stack_args_ptr += (sizeof(T) rounded up to multiple of 8);
 *         return result;
 * }
 *
 * If T is an x87 floating point type (i.e. long double):
 * T result = *ap.stack_args_ptr;
 * ap.stack_args_ptr += sizeof(T)
 * return result;
 */

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
static ir_node *load_result(dbg_info *dbgi, ir_node *block, ir_node *ptr, ir_type *type, ir_node **mem)
{
	ir_mode *mode    = get_type_mode(type);
	if (mode == NULL) {
		mode = mode_P;
	}
	ir_node *load    = new_rd_Load(dbgi, block, *mem, ptr, mode, type, cons_none);
	ir_node *result  = new_rd_Proj(dbgi, load, mode, pn_Load_res);
	ir_node *new_mem = new_rd_Proj(dbgi, load, mode_M, pn_Load_M);
	*mem = new_mem;
	return result;
}

static void make_store(dbg_info *dbgi, ir_node *block, ir_node *ptr, ir_node *value, ir_type *type, ir_node **mem)
{
	ir_node *store   = new_rd_Store(dbgi, block, *mem, ptr, value, type, cons_none);
	ir_node *new_mem = new_rd_Proj(dbgi, store, mode_M, pn_Store_M);
	*mem = new_mem;
}

190
static ir_node *load_va_from_stack(dbg_info *dbgi, ir_node *block, ir_mode *resmode, ir_type *restype, ir_node *ap, ir_node **mem)
191
{
192
	ir_graph *irg = get_irn_irg(block);
193

194
195
196
197
	// Load stack_args_ptr
	ir_node *stack_args_ptr  = new_rd_Member(dbgi, block, ap, va_list_members.stack_args_ptr);
	ir_type *stack_args_type = get_entity_type(va_list_members.stack_args_ptr);
	ir_node *stack_args      = load_result(dbgi, block, stack_args_ptr, stack_args_type, mem);
198

199
200
	// Load result from stack
	ir_node *result          = load_result(dbgi, block, stack_args, restype, mem);
201

202
203
	// Increment stack_args and write back
	long     increment       = round_up2(get_mode_size_bytes(resmode), 8);
204
205
	ir_mode *offset_mode     = get_reference_offset_mode(mode_P);
	ir_node *sizeof_resmode  = new_r_Const_long(irg, offset_mode, increment);
206
	ir_node *stack_args_inc  = new_rd_Add(dbgi, block, stack_args, sizeof_resmode);
207
	make_store(dbgi, block, stack_args_ptr, stack_args_inc, stack_args_type, mem);
208

209
210
211
212
213
214
215
216
217
	return result;
}

static ir_node *load_va_from_register_or_stack(dbg_info *dbgi, ir_node *block,
                                               ir_mode *resmode, ir_type *restype,
                                               ir_node *max, ir_entity *offset_entity, ir_node *stride,
                                               ir_node *ap, ir_node **mem)
{
	ir_graph *irg = get_irn_irg(block);
218
219
220
221

	// Load the current register offset
	ir_node *offset_ptr  = new_rd_Member(dbgi, block, ap, offset_entity);
	ir_type *offset_type = get_entity_type(offset_entity);
222
	ir_node *offset      = load_result(dbgi, block, offset_ptr, offset_type, mem);
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

	// Compare it to the maximum value
	ir_node *cmp = new_rd_Cmp(dbgi, block, offset, max, ir_relation_less);

	// Construct the if-diamond
	ir_node *lower_block = part_block_edges(cmp);
	ir_node *upper_block = get_nodes_block(cmp);
	ir_node *cond        = new_rd_Cond(dbgi, upper_block, cmp);
	ir_node *proj_true   = new_r_Proj(cond, mode_X, pn_Cond_true);
	ir_node *proj_false  = new_r_Proj(cond, mode_X, pn_Cond_false);
	ir_node *in_true[1]  = { proj_true };
	ir_node *in_false[1] = { proj_false };
	ir_node *true_block  = new_r_Block(irg, ARRAY_SIZE(in_true),  in_true);
	ir_node *false_block = new_r_Block(irg, ARRAY_SIZE(in_false), in_false);
	ir_node *true_jmp    = new_r_Jmp(true_block);
	ir_node *false_jmp   = new_r_Jmp(false_block);
	ir_node *lower_in[2] = { true_jmp, false_jmp };
	set_irn_in(lower_block, ARRAY_SIZE(lower_in), lower_in);

	// True side: Load from the register save area
	// Load reg_save_ptr
244
	ir_node *true_mem        = *mem;
245
246
247
248
249
250
	ir_node *reg_save_ptr    = new_rd_Member(dbgi, true_block, ap, va_list_members.reg_save_ptr);
	ir_type *reg_save_type   = get_entity_type(va_list_members.reg_save_ptr);
	ir_node *reg_save        = load_result(dbgi, true_block, reg_save_ptr, reg_save_type, &true_mem);

	// Load from reg_save + offset
	ir_mode *mode_reg_save   = get_irn_mode(reg_save);
251
252
	ir_mode *offset_mode     = get_reference_offset_mode(mode_reg_save);
	ir_node *conv_offset     = new_r_Conv(true_block, offset, offset_mode);
253
	ir_node *true_result_ptr = new_rd_Add(dbgi, true_block, reg_save, conv_offset);
254
255
256
	ir_node *true_result     = load_result(dbgi, true_block, true_result_ptr, restype, &true_mem);

	// Increment offset and write back
257
	ir_node *offset_inc      = new_rd_Add(dbgi, true_block, offset, stride);
258
259
260
	make_store(dbgi, true_block, offset_ptr, offset_inc, offset_type, &true_mem);

	// False side: Load from the stack
261
262
	ir_node *false_mem    = *mem;
	ir_node *false_result = load_va_from_stack(dbgi, false_block, resmode, restype, ap, &false_mem);
263
264
265
266
267
268

	// Phi both sides together
	ir_node *phiM_in[]  = { true_mem, false_mem };
	ir_node *phiM       = new_rd_Phi(dbgi, lower_block, ARRAY_SIZE(phiM_in), phiM_in, mode_M);
	ir_node *phi_in[]   = { true_result, false_result };
	ir_node *phi        = new_rd_Phi(dbgi, lower_block, ARRAY_SIZE(phi_in), phi_in, resmode);
269
270
271
272
273
274
275

	*mem = phiM;
	return phi;
}

void amd64_lower_va_arg(ir_node *node)
{
276
277
278
279
280
	static const size_t n_gp_args  = 6;
	static const size_t n_xmm_args = 8;
	static const size_t gp_size    = 8;
	static const size_t xmm_size   = 16;

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
	ir_type *restype = get_method_res_type(get_Builtin_type(node), 0);
	ir_mode *resmode = get_type_mode(restype);
	if (resmode == NULL) {
		resmode = mode_P;
	}
	ir_mode *mode_long_double = get_type_mode(be_get_backend_param()->type_long_double);

	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_graph *irg   = get_irn_irg(node);
	ir_node  *block = get_nodes_block(node);
	ir_node  *ap    = get_irn_n(node, pn_Builtin_max + 1);
	ir_node  *mem   = get_Builtin_mem(node);
	ir_node  *result;
	if (resmode == mode_long_double) {
		result = load_va_from_stack(dbgi, block, resmode, restype, ap, &mem);
	} else {
		ir_node   *max;
		ir_entity *offset_entity;
		ir_node   *stride;
		if (mode_is_int(resmode) || mode_is_reference(resmode)) {
301
			max           = new_r_Const_long(irg, mode_Is, n_gp_args * gp_size);
302
			offset_entity = va_list_members.gp_offset;
303
			stride        = new_r_Const_long(irg, mode_Is, gp_size);
304
		} else if (mode_is_float(resmode)) {
305
306
			max           = new_r_Const_long(irg, mode_Is,
			                                 n_gp_args * gp_size + n_xmm_args * xmm_size);
307
			offset_entity = va_list_members.xmm_offset;
308
			stride        = new_r_Const_long(irg, mode_Is, xmm_size);
309
310
311
312
313
314
315
316
317
		} else {
			panic("amd64_lower_va_arg does not support mode %+F", resmode);
		}
		result = load_va_from_register_or_stack(dbgi, block,
		                                        resmode, restype,
		                                        max, offset_entity, stride,
		                                        ap, &mem);
	}
	ir_node *tuple_in[] = { mem, result };
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
	turn_into_tuple(node, ARRAY_SIZE(tuple_in), tuple_in);

	clear_irg_properties(irg, IR_GRAPH_PROPERTY_NO_TUPLES | IR_GRAPH_PROPERTY_NO_BADS);
}

/*
 * Make a mov to store the given immediate value into (base + base_offset),
 * i.e. mov $value, base_offset(%base_reg)
 */
static ir_node *make_mov_imm32_to_offset_mem(dbg_info *dbgi, ir_node *block, ir_node *mem, ir_node *base, ir_entity *offset_ent, long value)
{
	ir_node *const mov_in[] = { base, mem };
	int32_t  const offset   = get_entity_offset(offset_ent);
	amd64_binop_addr_attr_t mov_attr = {
		.base = {
333
334
			.base = {
				.op_mode = AMD64_OP_ADDR_IMM,
335
				.size    = X86_SIZE_32,
336
			},
337
338
339
340
341
			.addr = {
				.immediate = {
					.offset = offset,
					.kind   = X86_IMM_VALUE,
				},
342
343
344
				.variant    = X86_ADDR_BASE,
				.base_input = 0,
				.mem_input  = 1,
345
346
347
348
349
350
351
352
353
			},
		},
		.u = {
			.immediate = {
				.offset = value,
				.kind   = X86_IMM_VALUE,
			},
		},
	};
354
	return new_bd_amd64_mov_store(dbgi, block, ARRAY_SIZE(mov_in), mov_in, gp_am_reqs[1], &mov_attr);
355
356
357
358
359
360
361
362
363
364
365
366
}

/*
 * Make a mov to store the given value into (base + base_offset),
 * i.e. mov %value_reg, base_offset(%base_reg)
 */
static ir_node *make_mov_val64_to_offset_mem(dbg_info *dbgi, ir_node *block, ir_node *mem, ir_node *base, ir_entity *entity, ir_entity *offset_ent, ir_node *value)
{
	ir_node *const mov_in[] = { value, base, mem };
	int32_t  const offset   = get_entity_offset(offset_ent);
	amd64_binop_addr_attr_t mov_attr = {
		.base = {
367
368
			.base = {
				.op_mode = AMD64_OP_ADDR_REG,
369
				.size    = X86_SIZE_64,
370
			},
371
372
373
374
			.addr = {
				.immediate = {
					.entity = entity,
					.offset = offset,
375
					.kind   = entity ? X86_IMM_FRAMEENT : X86_IMM_VALUE,
376
				},
377
378
379
				.variant    = X86_ADDR_BASE,
				.base_input = 1,
				.mem_input  = 2,
380
381
382
383
384
385
			},
		},
		.u = {
			.reg_input = 0,
		},
	};
386
	return new_bd_amd64_mov_store(dbgi, block, ARRAY_SIZE(mov_in), mov_in, gp_am_reqs[2], &mov_attr);
387
388
389
390
391
392
393
394
395
396
397
398
}

/*
 * Make a mov to store the given XMM value into (base + base_offset),
 * i.e. movsd %xmm_value_reg, base_offset(%base_reg)
 */
static ir_node *make_mov_xmmval64_to_offset_mem(dbg_info *dbgi, ir_node *block, ir_node *mem, ir_node *base, ir_entity *entity, ir_entity *offset_ent, ir_node *value)
{
	ir_node *const mov_in[] = { value, base, mem };
	int32_t  const offset   = get_entity_offset(offset_ent);
	amd64_binop_addr_attr_t mov_attr = {
		.base = {
399
400
			.base = {
				.op_mode = AMD64_OP_ADDR_REG,
401
				.size    = X86_SIZE_64,
402
			},
403
404
405
406
			.addr = {
				.immediate = {
					.entity = entity,
					.offset = offset,
407
					.kind   = X86_IMM_FRAMEENT,
408
				},
409
410
411
				.variant    = X86_ADDR_BASE,
				.base_input = 1,
				.mem_input  = 2,
412
413
414
415
416
417
			},
		},
		.u = {
			.reg_input = 0,
		},
	};
418
	return new_bd_amd64_movs_store_xmm(dbgi, block, ARRAY_SIZE(mov_in), mov_in, xmm_reg_mem_reqs, &mov_attr);
419
420
421
422
423
424
425
426
427
428
}

/*
 * Make a lea to compute the address of the given entity
 * i.e. lea entity_offset(%base_reg), %result_reg
 */
static ir_node *make_lea_with_offset_entity(dbg_info *dbgi, ir_node *block,
                                            ir_node *base, ir_entity *offset)
{
	ir_node *lea_in[] = { base };
429
	x86_addr_t lea_addr = {
430
431
		.immediate = {
			.entity = offset,
432
			.kind   = X86_IMM_FRAMEENT,
433
		},
434
435
		.variant    = X86_ADDR_BASE,
		.base_input = 0,
436
	};
437
	return new_bd_amd64_lea(dbgi, block, ARRAY_SIZE(lea_in), lea_in, reg_reqs, X86_SIZE_64, lea_addr);
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
}

ir_node *amd64_initialize_va_list(dbg_info *dbgi, ir_node *block, x86_cconv_t *cconv,
                                  ir_node *mem, ir_node *ap, ir_node *frame)
{
	const size_t max_xmm_params = cconv->n_xmm_regs;
	const size_t max_gp_params  = cconv->n_param_regs - max_xmm_params;

	size_t const initial_gp_offset = n_gp_params * 8;
	mem = make_mov_imm32_to_offset_mem(dbgi, block, mem, ap, va_list_members.gp_offset, initial_gp_offset);

	// XMM parameters are behind the gp parameters in reg_save_area.
	size_t const initial_xmm_offset = max_gp_params * 8 + n_xmm_params * 16;
	mem = make_mov_imm32_to_offset_mem(dbgi, block, mem, ap, va_list_members.xmm_offset, initial_xmm_offset);

	ir_node *const reg_save_ptr = make_lea_with_offset_entity(dbgi, block, frame, reg_save_area);
	mem = make_mov_val64_to_offset_mem(dbgi, block, mem, ap, NULL, va_list_members.reg_save_ptr, reg_save_ptr);

	ir_node *const stack_args = make_lea_with_offset_entity(dbgi, block, frame, stack_args_param);
	mem = make_mov_val64_to_offset_mem(dbgi, block, mem, ap, NULL, va_list_members.stack_args_ptr, stack_args);

	return mem;
}

void amd64_save_vararg_registers(ir_graph *const irg, x86_cconv_t const *const cconv, ir_node *const frame)
{
	size_t         gp_params      = n_gp_params;
	size_t         xmm_params     = n_xmm_params;
	size_t         reg_params     = gp_params + xmm_params;
	size_t   const max_reg_params = cconv->n_param_regs;
	ir_node *const block          = get_irg_start_block(irg);
	ir_node *const initial_mem    = get_irg_initial_mem(irg);
	ir_node       *mem            = initial_mem;
	ir_node       *first_mov      = NULL;
	for (size_t p = cconv->n_parameters; reg_params != max_reg_params; ++p, ++reg_params) {
		arch_register_t const *const reg       = cconv->parameters[p].reg;
		ir_node               *const reg_value = be_get_Start_proj(irg, reg);
		if (reg->cls == &amd64_reg_classes[CLASS_amd64_gp]) {
			mem = make_mov_val64_to_offset_mem(NULL, block, mem, frame, reg_save_area, gp_save_slots[gp_params++], reg_value);
		} else if (reg->cls == &amd64_reg_classes[CLASS_amd64_xmm]) {
			mem = make_mov_xmmval64_to_offset_mem(NULL, block, mem, frame, reg_save_area, xmm_save_slots[xmm_params++], reg_value);
		} else {
			panic("unexpected register class");
		}
		if (!first_mov)
			first_mov = mem;
	}

	if (mem != initial_mem) {
		edges_reroute_except(initial_mem, mem, first_mov);
		set_irg_initial_mem(irg, initial_mem);
	}

	// We are now done with vararg handling for this irg, free the memory.
	free(gp_save_slots);
	free(xmm_save_slots);
}