ia32_optimize.c 32.6 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
 * @file
 * @brief       Implements several optimizations for IA32.
9
 * @author      Matthias Braun, Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11
#include "debug.h"
12
#include "irnode_t.h"
13
14
#include "irprog_t.h"
#include "ircons.h"
15
#include "irtools.h"
16
#include "firm_types.h"
17
#include "iredges_t.h"
Christian Würdig's avatar
Christian Würdig committed
18
19
#include "tv.h"
#include "irgmod.h"
20
#include "irgwalk.h"
21
#include "heights.h"
22
#include "irprintf.h"
23
#include "irdump.h"
Matthias Braun's avatar
Matthias Braun committed
24
#include "panic.h"
Christian Würdig's avatar
Christian Würdig committed
25

26
#include "be_t.h"
27
#include "bediagnostic.h"
28
29
30
#include "benode.h"
#include "besched.h"
#include "bepeephole.h"
31
32

#include "ia32_new_nodes.h"
33
#include "ia32_optimize.h"
34
#include "bearch_ia32_t.h"
Matthias Braun's avatar
Matthias Braun committed
35
#include "gen_ia32_regalloc_if.h"
Christian Würdig's avatar
Christian Würdig committed
36
#include "ia32_transform.h"
37
#include "ia32_architecture.h"
38
#include "util.h"
39

40
41
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

42
43
44
45
46
47
48
49
50
static bool is_hl_register(arch_register_t const *const reg)
{
	return
		reg == &ia32_registers[REG_EAX] ||
		reg == &ia32_registers[REG_EBX] ||
		reg == &ia32_registers[REG_ECX] ||
		reg == &ia32_registers[REG_EDX];
}

51
static void copy_mark(const ir_node *old, ir_node *newn)
52
53
{
	if (is_ia32_is_reload(old))
54
		set_ia32_is_reload(newn);
55
	if (is_ia32_is_spill(old))
56
		set_ia32_is_spill(newn);
57
	if (is_ia32_is_remat(old))
58
		set_ia32_is_remat(newn);
59
60
}

61
62
63
static void replace(ir_node *const old, ir_node *const newn)
{
	copy_mark(old, newn);
64
	be_peephole_replace(old, newn);
65
66
}

67
68
typedef enum produces_flag_t {
	produces_no_flag,
69
70
	produces_zero_sign,
	produces_zero_in_carry
71
72
} produces_flag_t;

Michael Beck's avatar
Michael Beck committed
73
/**
74
75
76
 * Return which usable flag the given node produces about the result.
 * That is zero (ZF) and sign(SF).
 * We do not check for carry (CF) or overflow (OF).
Michael Beck's avatar
Michael Beck committed
77
78
 *
 * @param node  the node to check
79
 * @param pn    the projection number of the used result
Michael Beck's avatar
Michael Beck committed
80
 */
81
static produces_flag_t check_produces_zero_sign(ir_node *node, unsigned pn)
82
{
Michael Beck's avatar
Michael Beck committed
83
	if (!is_ia32_irn(node))
84
		return produces_no_flag;
85

yb9976's avatar
yb9976 committed
86
87
	ir_node *count;

Michael Beck's avatar
Michael Beck committed
88
	switch (get_ia32_irn_opcode(node)) {
89
		case iro_ia32_Adc:
Christoph Mallon's avatar
Christoph Mallon committed
90
		case iro_ia32_Add:
91
		case iro_ia32_And:
Christoph Mallon's avatar
Christoph Mallon committed
92
93
94
		case iro_ia32_Dec:
		case iro_ia32_Inc:
		case iro_ia32_Neg:
95
		case iro_ia32_Or:
96
		case iro_ia32_Popcnt:
97
		case iro_ia32_Sbb:
Christoph Mallon's avatar
Christoph Mallon committed
98
99
		case iro_ia32_Sub:
		case iro_ia32_Xor:
100
101
102
103
			break;

		case iro_ia32_ShlD:
		case iro_ia32_ShrD:
104
			assert((int)n_ia32_ShlD_count == (int)n_ia32_ShrD_count);
105
106
107
			count = get_irn_n(node, n_ia32_ShlD_count);
			goto check_shift_amount;

Christoph Mallon's avatar
Christoph Mallon committed
108
		case iro_ia32_Sar:
109
110
		case iro_ia32_Shl:
		case iro_ia32_Shr:
111
112
			assert((int)n_ia32_Shl_count == (int)n_ia32_Shr_count
					&& (int)n_ia32_Shl_count == (int)n_ia32_Sar_count);
113
			count = get_irn_n(node, n_ia32_Shl_count);
Christoph Mallon's avatar
Christoph Mallon committed
114
check_shift_amount:
115
116
117
118
119
			/* when shift count is zero the flags are not affected, so we can only
			 * do this for constants != 0 */
			if (!is_ia32_Immediate(count))
				return produces_no_flag;

yb9976's avatar
yb9976 committed
120
			const ia32_immediate_attr_t *imm_attr = get_ia32_immediate_attr_const(count);
121
			if (imm_attr->imm.entity != NULL)
122
				return produces_no_flag;
123
			if ((imm_attr->imm.offset & 0x1f) == 0)
124
125
126
127
128
				return produces_no_flag;
			break;

		case iro_ia32_Mul:
			return pn == pn_ia32_Mul_res_high ?
129
				produces_zero_in_carry : produces_no_flag;
130
131
132

		default:
			return produces_no_flag;
133
	}
134

135
	return pn == pn_ia32_res ? produces_zero_sign : produces_no_flag;
136
137
}

138
139
140
141
142
143
144
145
146
147
148
149
150
static void make_xor(ir_node *const and, ir_node *(*const cons)(dbg_info*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*, ir_node*), ir_mode *const mode, arch_register_t const *const reg)
{
	dbg_info *const dbgi  = get_irn_dbg_info(and);
	ir_node  *const block = get_nodes_block(and);
	ir_node  *const noreg = get_irn_n(and, n_ia32_And_base);
	ir_node  *const nomem = get_irn_n(and, n_ia32_And_mem);
	ir_node  *const left  = get_irn_n(and, n_ia32_And_left);
	ir_node  *const xor   = cons(dbgi, block, noreg, noreg, nomem, left, left);
	set_ia32_ls_mode(xor, mode);
	arch_set_irn_register_out(xor, pn_ia32_Xor_res, reg);
	replace(and, xor);
}

151
152
/**
 * Use shorter instructions:
153
154
155
 * r & 0xFFFFFF00 -> xorb rl, rl (6 bytes -> 2 bytes)
 * r & 0xFFFF00FF -> xorb rh, rh (6 bytes -> 2 bytes)
 * r & 0xFFFF0000 -> xorw rx, rx (6 bytes -> 3 bytes)
156
157
158
159
160
161
162
163
164
165
166
 */
static void peephole_ia32_And(ir_node *const node)
{
	ir_node *const right = get_irn_n(node, n_ia32_And_right);
	if (!is_ia32_Immediate(right))
		return;

	ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
	if (imm->imm.entity)
		return;

167
168
	/* Perform the replacement only, if the flags are unused.
	 * Xor sets the flags differently than And. */
169
170
171
172
173
174
	if (get_irn_mode(node) == mode_T && get_Proj_for_pn(node, pn_ia32_And_flags))
		return;

	arch_register_t const *const reg = arch_get_irn_register_out(node, pn_ia32_And_res);
	uint32_t               const val = imm->imm.offset;
	if (val == 0xFFFF0000) {
175
		make_xor(node, &new_bd_ia32_Xor, mode_Hu, reg);
176
177
178
179
180
181
	} else if (is_hl_register(reg)) {
		if (val == 0xFFFFFF00) {
			make_xor(node, &new_bd_ia32_Xor_8bit, mode_Bu, reg);
		} else if (val == 0xFFFF00FF) {
			make_xor(node, &new_bd_ia32_Xor_8bit, ia32_mode_8h, reg);
		}
182
183
184
	}
}

185
186
187
188
189
190
191
192
/**
 * Replace Cmp(x, 0) by a Test(x, x)
 */
static void peephole_ia32_Cmp(ir_node *const node)
{
	if (get_ia32_op_type(node) != ia32_Normal)
		return;

193
	ir_node *const right = get_irn_n(node, n_ia32_Cmp_right);
194
195
196
	if (!is_ia32_Immediate(right))
		return;

197
	ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
198
	if (imm->imm.entity != NULL || imm->imm.offset != 0)
199
200
		return;

201
202
	dbg_info *const dbgi         = get_irn_dbg_info(node);
	ir_node  *const block        = get_nodes_block(node);
203
	ir_graph *const irg          = get_irn_irg(node);
204
205
206
	ir_node  *const noreg        = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem        = get_irg_no_mem(irg);
	ir_node  *const op           = get_irn_n(node, n_ia32_Cmp_left);
207
	int       const ins_permuted = get_ia32_attr(node)->ins_permuted;
208

209
210
211
212
213
	ir_mode *const ls_mode = get_ia32_ls_mode(node);
	ir_node *const test    = get_mode_size_bits(ls_mode) == 8
		? new_bd_ia32_Test_8bit(dbgi, block, noreg, noreg, nomem, op, op, ins_permuted)
		: new_bd_ia32_Test     (dbgi, block, noreg, noreg, nomem, op, op, ins_permuted);
	set_ia32_ls_mode(test, ls_mode);
214
	arch_set_irn_register_out(test, pn_ia32_Test_eflags, &ia32_registers[REG_EFLAGS]);
215

216
	foreach_out_edge_safe(node, edge) {
217
218
219
220
221
222
		ir_node *const user = get_edge_src_irn(edge);

		if (is_Proj(user))
			exchange(user, test);
	}

223
	replace(node, test);
224
225
}

Michael Beck's avatar
Michael Beck committed
226
227
/**
 * Peephole optimization for Test instructions.
228
 * - Remove the Test, if an appropriate flag was produced which is still live
229
 * - Change a Test(x, c) to 8Bit, if 0 <= c < 128 (3 byte shorter opcode)
Michael Beck's avatar
Michael Beck committed
230
 */
231
232
static void peephole_ia32_Test(ir_node *node)
{
233
234
	ir_node *left  = get_irn_n(node, n_ia32_Test_left);
	ir_node *right = get_irn_n(node, n_ia32_Test_right);
235

236
	if (left == right) { /* we need a test for 0 */
yb9976's avatar
yb9976 committed
237
		ir_node *block = get_nodes_block(node);
238
		if (get_nodes_block(left) != block)
239
240
			return;

241
		unsigned pn = pn_ia32_res;
yb9976's avatar
yb9976 committed
242
		ir_node *op = left;
243
		if (is_Proj(op)) {
244
			pn = get_Proj_num(op);
245
			op = get_Proj_pred(op);
246
		}
247

248
249
		/* walk schedule up and abort when we find left or some other node
		 * destroys the flags */
yb9976's avatar
yb9976 committed
250
		ir_node *schedpoint = node;
251
252
		for (;;) {
			schedpoint = sched_prev(schedpoint);
253
			if (schedpoint == op)
254
255
256
257
258
				break;
			if (arch_irn_is(schedpoint, modify_flags))
				return;
			if (schedpoint == block)
				panic("couldn't find left");
259
260
		}

yb9976's avatar
yb9976 committed
261
		produces_flag_t produced = check_produces_zero_sign(op, pn);
262
263
264
265
		if (produced == produces_no_flag)
			return;

		/* make sure users only look at the sign/zero flag */
266
		foreach_out_edge(node, edge) {
yb9976's avatar
yb9976 committed
267
268
269
270
271
272
273
274
275
276
277
			ir_node *user = get_edge_src_irn(edge);
			if (is_ia32_CMovcc(user) || is_ia32_Jcc(user) ||
			    is_ia32_Setcc(user) || is_ia32_SetccMem(user)) {
				x86_condition_code_t  cc  = get_ia32_condcode(user);

				if (cc == x86_cc_equal || cc == x86_cc_not_equal)
					continue;
				if (produced == produces_zero_sign
					&& (cc == x86_cc_sign || cc == x86_cc_not_sign)) {
					continue;
				}
278
			}
279
			return;
280
		}
281

yb9976's avatar
yb9976 committed
282
		ir_mode *op_mode = get_ia32_ls_mode(op);
yb9976's avatar
yb9976 committed
283
284
285
286
287
288
289
		if (op_mode == NULL)
			op_mode = get_irn_mode(op);

		/* Make sure we operate on the same bit size */
		if (get_mode_size_bits(op_mode) != get_mode_size_bits(get_ia32_ls_mode(node)))
			return;

290
291
292
293
		if (produced == produces_zero_in_carry) {
			/* patch users to look at the carry instead of the zero flag */
			foreach_out_edge(node, edge) {
				ir_node              *user = get_edge_src_irn(edge);
294
				x86_condition_code_t  cc   = get_ia32_condcode(user);
295
296

				switch (cc) {
297
298
				case x86_cc_equal:     cc = x86_cc_above_equal; break;
				case x86_cc_not_equal: cc = x86_cc_below;       break;
299
300
301
302
303
304
				default: panic("unexpected pn");
				}
				set_ia32_condcode(user, cc);
			}
		}

305
		if (get_irn_mode(op) != mode_T) {
306
			/* If there are other users, reroute them to result proj */
307
			if (get_irn_n_edges(op) != 2) {
308
309
310
				be_peephole_to_tuple(op);
			} else {
				set_irn_mode(op, mode_T);
311
			}
312
313
314
		} else {
			if (get_irn_n_edges(left) == 2)
				kill_node(left);
315
		}
316

317
		ir_node *const flags_proj = be_new_Proj_reg(op, pn_ia32_flags, &ia32_registers[REG_EFLAGS]);
318
319
320
321
		be_peephole_exchange(node, flags_proj);
	} else if (is_ia32_Immediate(right)) {
		ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);

322
		/* A test with an entity is rather strange, but better safe than sorry */
323
		if (imm->imm.entity != NULL)
324
325
			return;

326
327
328
329
330
		/*
		 * We have to take care that we end up with the same sign flag:
		 * testl(128, 128) -> SF=0
		 * testb(128, 128) -> SF=1
		 */
331
		unsigned offset = imm->imm.offset;
332
		if (get_ia32_op_type(node) == ia32_AddrModeS) {
333
			ia32_attr_t *const attr = get_ia32_attr(node);
334
			ir_graph    *const irg  = get_irn_irg(node);
335

336
			if ((offset & 0xFFFFFF80) == 0) {
337
				/* attr->am_imm.offset += 0; */
338
			} else if ((offset & 0xFFFF80FF) == 0) {
339
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >>  8);
340
				set_irn_n(node, n_ia32_Test_right, imm_node);
341
				attr->am_imm.offset += 1;
342
			} else if ((offset & 0xFF80FFFF) == 0) {
343
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >> 16);
344
				set_irn_n(node, n_ia32_Test_right, imm_node);
345
				attr->am_imm.offset += 2;
346
			} else if ((offset & 0x00FFFFFF) == 0) {
347
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >> 24);
348
				set_irn_n(node, n_ia32_Test_right, imm_node);
349
				attr->am_imm.offset += 3;
350
351
352
			} else {
				return;
			}
353
		} else if ((offset & 0xFFFFFF80) == 0) {
354
			arch_register_t const* const reg = arch_get_irn_register(left);
355
			if (!is_hl_register(reg))
356
				return;
357
358
		} else {
			return;
359
360
361
362
363
364
		}

		/* Technically we should build a Test8Bit because of the register
		 * constraints, but nobody changes registers at this point anymore. */
		set_ia32_ls_mode(node, mode_Bu);
	}
365
366
}

367
368
369
370
371
/**
 * AMD Athlon works faster when RET is not destination of
 * conditional jump or directly preceded by other jump instruction.
 * Can be avoided by placing a Rep prefix before the return.
 */
372
373
static void peephole_ia32_Return(ir_node *node)
{
374
375
376
377
	if (!ia32_cg_config.use_pad_return)
		return;

	/* check if this return is the first on the block */
378
	sched_foreach_non_phi_reverse_before(node, irn) {
Christoph Mallon's avatar
Christoph Mallon committed
379
		if (be_is_Start(irn))
380
			continue;
381
382
		/* arg, IncSP 0 nodes might occur, ignore these */
		if (be_is_IncSP(irn) && be_get_IncSP_offset(irn) == 0)
383
			continue;
384
		return;
385
	}
386

387
	/* ensure, that the 3 byte return is generated */
388
389
	ia32_return_attr_t *attr = get_ia32_return_attr(node);
	attr->emit_pop = true;
390
391
392
}

/* only optimize up to 48 stores behind IncSPs */
393
#define MAXPUSH_OPTIMIZE    48
394
395

/**
Michael Beck's avatar
Michael Beck committed
396
 * Tries to create Push's from IncSP, Store combinations.
397
398
 * The Stores are replaced by Push's, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
399
 */
400
static void peephole_IncSP_Store_to_push(ir_node *irn)
401
{
402
	int inc_ofs = be_get_IncSP_offset(irn);
403
	if (inc_ofs < 4)
404
405
		return;

yb9976's avatar
yb9976 committed
406
407
408
409
410
	int      maxslot                  = -1;
	ir_node *stores[MAXPUSH_OPTIMIZE];

	memset(stores, 0, sizeof(stores));

411
412
	/*
	 * We first walk the schedule after the IncSP node as long as we find
413
	 * suitable Stores that could be transformed to a Push.
414
415
416
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
417
	sched_foreach_after(irn, node) {
418
419
		/* it has to be a Store */
		if (!is_ia32_Store(node))
420
421
			break;

422
423
		/* it has to use our sp value */
		if (get_irn_n(node, n_ia32_base) != irn)
424
			continue;
425
		/* Store has to be attached to NoMem */
yb9976's avatar
yb9976 committed
426
		ir_node *mem = get_irn_n(node, n_ia32_mem);
427
		if (!is_NoMem(mem))
428
429
			continue;

430
431
		/* unfortunately we can't support the full AMs possible for push at the
		 * moment. TODO: fix this */
432
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
433
434
			break;

yb9976's avatar
yb9976 committed
435
		int offset = get_ia32_am_offs_int(node);
436
437
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);
438

439
440
		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
441
442
			break;

443
		if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
444
			continue;
yb9976's avatar
yb9976 committed
445
446

		int storeslot = offset >> 2;
447
448
449

		/* storing into the same slot twice is bad (and shouldn't happen...) */
		if (stores[storeslot] != NULL)
450
451
452
			break;

		stores[storeslot] = node;
453
454
		if (storeslot > maxslot)
			maxslot = storeslot;
455
456
	}

yb9976's avatar
yb9976 committed
457
458
	ir_node *curr_sp = irn;
	int      i;
459
460
461
462
	for (i = -1; i < maxslot; ++i) {
		if (stores[i + 1] == NULL)
			break;
	}
463

464
	/* walk through the Stores and create Pushs for them */
465
466
467
468
	ir_node  *const block      = get_nodes_block(irn);
	ir_graph *const irg        = get_irn_irg(irn);
	ir_node  *const noreg      = ia32_new_NoReg_gp(irg);
	ir_node        *first_push = NULL;
469
	for (; i >= 0; --i) {
470
471
472
473
474
		ir_node  *const store = stores[i];
		dbg_info *const dbgi  = get_irn_dbg_info(store);
		ir_node  *const mem   = get_irn_n(store, n_ia32_mem);
		ir_node  *const val   = get_irn_n(store, n_ia32_unary_op);
		ir_node  *const push  = new_bd_ia32_Push(dbgi, block, noreg, noreg, mem, val, curr_sp, ia32_mode_gp);
475
		copy_mark(store, push);
476

477
478
479
		if (first_push == NULL)
			first_push = push;

480
		sched_add_after(skip_Proj(curr_sp), push);
481

482
		/* create stackpointer Proj */
483
		curr_sp = be_new_Proj_reg(push, pn_ia32_Push_stack, &ia32_registers[REG_ESP]);
Michael Beck's avatar
Michael Beck committed
484

485
		/* use the memproj now */
486
		be_peephole_exchange(store, push);
Matthias Braun's avatar
bugfix    
Matthias Braun committed
487

488
		inc_ofs -= 4;
489
490
	}

491
	edges_reroute_except(irn, curr_sp, first_push);
492
	be_set_IncSP_offset(irn, inc_ofs);
493
494
}

495
496
497
498
499
500
501
502
/**
 * Tries to create Pops from Load, IncSP combinations.
 * The Loads are replaced by Pops, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
 */
static void peephole_Load_IncSP_to_pop(ir_node *irn)
{

503
	int inc_ofs = -be_get_IncSP_offset(irn);
504
505
506
	if (inc_ofs < 4)
		return;

yb9976's avatar
yb9976 committed
507
508
509
510
511
512
	ir_node  *loads[MAXPUSH_OPTIMIZE];
	unsigned  regmask                 = 0;
	unsigned  copymask                = ~0;

	memset(loads, 0, sizeof(loads));

513
514
515
516
517
518
	/*
	 * We first walk the schedule before the IncSP node as long as we find
	 * suitable Loads that could be transformed to a Pop.
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
yb9976's avatar
yb9976 committed
519
	int      maxslot = -1;
520
521
	ir_node *pred_sp = be_get_IncSP_pred(irn);
	sched_foreach_reverse_before(irn, node) {
522
523
524
		/* it has to be a Load */
		if (!is_ia32_Load(node)) {
			if (be_is_Copy(node)) {
yb9976's avatar
yb9976 committed
525
				const arch_register_t *dreg = arch_get_irn_register(node);
526
				if (dreg->cls != &ia32_reg_classes[CLASS_ia32_gp]) {
527
528
529
					/* not a GP copy, ignore */
					continue;
				}
yb9976's avatar
yb9976 committed
530
				const arch_register_t *sreg = arch_get_irn_register(be_get_Copy_op(node));
531
532
533
534
				if (regmask & copymask & (1 << sreg->index)) {
					break;
				}
				if (regmask & copymask & (1 << dreg->index)) {
535
536
					break;
				}
537
538
539
540
				/* we CAN skip Copies if neither the destination nor the source
				 * is not in our regmask, ie none of our future Pop will overwrite it */
				regmask |= (1 << dreg->index) | (1 << sreg->index);
				copymask &= ~((1 << dreg->index) | (1 << sreg->index));
541
542
543
544
545
546
				continue;
			}
			break;
		}

		/* it has to use our predecessor sp value */
547
548
549
550
551
		if (get_irn_n(node, n_ia32_base) != pred_sp) {
			/* it would be ok if this load does not use a Pop result,
			 * but we do not check this */
			break;
		}
552
553

		/* should have NO index */
554
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
555
556
			break;

yb9976's avatar
yb9976 committed
557
		int offset = get_ia32_am_offs_int(node);
558
559
560
561
562
563
564
565
566
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);

		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
			break;

		if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
			continue;
567
568
569
		/* ignore those outside the possible windows */
		if (offset > inc_ofs - 4)
			continue;
yb9976's avatar
yb9976 committed
570
		int loadslot = offset >> 2;
571
572
573
574
575

		/* loading from the same slot twice is bad (and shouldn't happen...) */
		if (loads[loadslot] != NULL)
			break;

yb9976's avatar
yb9976 committed
576
		const arch_register_t *dreg = arch_get_irn_register_out(node, pn_ia32_Load_res);
577
578
579
580
581
582
583
584
585
586
587
588
589
590
		if (regmask & (1 << dreg->index)) {
			/* this register is already used */
			break;
		}
		regmask |= 1 << dreg->index;

		loads[loadslot] = node;
		if (loadslot > maxslot)
			maxslot = loadslot;
	}

	if (maxslot < 0)
		return;

591
	/* find the first slot */
yb9976's avatar
yb9976 committed
592
	int i;
593
594
595
596
597
598
	for (i = maxslot; i >= 0; --i) {
		ir_node *load = loads[i];

		if (load == NULL)
			break;
	}
599

yb9976's avatar
yb9976 committed
600
601
	int ofs = inc_ofs - (maxslot + 1) * 4;
	inc_ofs = (i + 1) * 4;
602

603
	/* create a new IncSP if needed */
604
	ir_node *const block = get_nodes_block(irn);
605
	if (inc_ofs > 0) {
606
		pred_sp = ia32_new_IncSP(block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
607
608
609
		sched_add_before(irn, pred_sp);
	}

610
	/* walk through the Loads and create Pops for them */
611
	for (++i; i <= maxslot; ++i) {
yb9976's avatar
yb9976 committed
612
613
614
		ir_node               *load = loads[i];
		ir_node               *mem  = get_irn_n(load, n_ia32_mem);
		const arch_register_t *reg  = arch_get_irn_register_out(load, pn_ia32_Load_res);
615

yb9976's avatar
yb9976 committed
616
		ir_node *pop = new_bd_ia32_Pop(get_irn_dbg_info(load), block, mem, pred_sp);
617
		arch_set_irn_register_out(pop, pn_ia32_Load_res, reg);
618

619
620
		copy_mark(load, pop);

621
		/* create stackpointer Proj */
622
		pred_sp = be_new_Proj_reg(pop, pn_ia32_Pop_stack, &ia32_registers[REG_ESP]);
623
624

		sched_add_before(irn, pop);
625
		be_peephole_exchange(load, pop);
626
	}
627

628
	be_set_IncSP_offset(irn, -ofs);
629
630
631
632
	be_set_IncSP_pred(irn, pred_sp);
}


633
634
635
/**
 * Find a free GP register if possible, else return NULL.
 */
636
static const arch_register_t *get_free_gp_reg(ir_graph *irg)
637
{
638
	be_irg_t *birg = be_birg_from_irg(irg);
639

yb9976's avatar
yb9976 committed
640
	for (int i = 0; i < N_ia32_gp_REGS; ++i) {
641
		const arch_register_t *reg = &ia32_reg_classes[CLASS_ia32_gp].regs[i];
642
		if (!rbitset_is_set(birg->allocatable_regs, reg->global_index))
643
644
			continue;

645
		if (be_peephole_get_value(reg->global_index) == NULL)
646
			return reg;
647
	}
648
649
650
651

	return NULL;
}

652
653
654
655
656
657
658
659
660
661
662
/**
 * Creates a Pop instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
663
static ir_node *create_pop(dbg_info *dbgi, ir_node *block,
664
                           ir_node *stack, ir_node *schedpoint,
665
                           const arch_register_t *reg)
666
{
667
668
669
	ir_graph *const irg = get_irn_irg(block);
	ir_node  *const mem = get_irg_no_mem(irg);
	ir_node  *const pop = new_bd_ia32_Pop(dbgi, block, mem, stack);
670
671
	sched_add_before(schedpoint, pop);

672
	ir_node *const val  = be_new_Proj_reg(pop, pn_ia32_Pop_res, reg);
673
	ir_node *const keep = be_new_Keep_one(val);
674
675
	sched_add_before(schedpoint, keep);

676
	return be_new_Proj_reg(pop, pn_ia32_Pop_stack, &ia32_registers[REG_ESP]);
677
678
}

679
/**
Michael Beck's avatar
Michael Beck committed
680
 * Optimize an IncSp by replacing it with Push/Pop.
681
 */
682
683
684
static void peephole_be_IncSP(ir_node *node)
{
	/* first optimize incsp->incsp combinations */
685
686
	if (be_peephole_IncSP_IncSP(node))
		return;
687

688
689
690
	/* transform IncSP->Store combinations to Push where possible */
	peephole_IncSP_Store_to_push(node);

691
692
693
	/* transform Load->IncSP combinations to Pop where possible */
	peephole_Load_IncSP_to_pop(node);

694
	/* replace IncSP -4 by Pop freereg when possible */
yb9976's avatar
yb9976 committed
695
	int offset = be_get_IncSP_offset(node);
696
697
698
699
	if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
	    (offset != -4 || ia32_cg_config.use_add_esp_4) &&
	    (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
	    (offset != +8 || ia32_cg_config.use_sub_esp_8))
700
701
		return;

702
703
704
	ir_node  *stack = be_get_IncSP_pred(node);
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *block = get_nodes_block(node);
705
706
	if (offset < 0) {
		/* we need a free register for pop */
707
708
		arch_register_t const *const reg = get_free_gp_reg(get_irn_irg(node));
		if (!reg)
709
			return;
710

711
		do {
712
			stack = create_pop(dbgi, block, stack, node, reg);
713
		} while ((offset += 4) != 0);
714
	} else {
715
		do {
716
			stack = new_bd_ia32_PushEax(dbgi, block, stack);
717
			arch_set_irn_register(stack, &ia32_registers[REG_ESP]);
718
			sched_add_before(node, stack);
719
		} while ((offset -= 4) != 0);
720
721
	}

Christoph Mallon's avatar
Christoph Mallon committed
722
	be_peephole_exchange(node, stack);
723
724
}

Michael Beck's avatar
Michael Beck committed
725
/**
726
 * Peephole optimization for ia32_Const's
Michael Beck's avatar
Michael Beck committed
727
 */
728
static void peephole_ia32_Const(ir_node *node)
729
{
730
731
732
	const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);

	/* try to transform a mov 0, reg to xor reg reg */
733
	if (attr->imm.offset != 0 || attr->imm.entity != NULL)
734
735
		return;
	if (ia32_cg_config.use_mov_0)
736
		return;
Michael Beck's avatar
Michael Beck committed
737
	/* xor destroys the flags, so no-one must be using them */
738
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
739
		return;
740

yb9976's avatar
yb9976 committed
741
	const arch_register_t *reg = arch_get_irn_register(node);
742
743
	assert(be_peephole_get_reg_value(reg) == NULL);

yb9976's avatar
yb9976 committed
744
745
746
	ir_node  *block = get_nodes_block(node);
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *xorn  = new_bd_ia32_Xor0(dbgi, block);
747
	arch_set_irn_register(xorn, reg);
748
	replace(node, xorn);
749
}
750

751
static bool is_disp_const(ir_node const *const node, int32_t const val)
752
{
753
	return !get_ia32_am_ent(node) && get_ia32_am_offs_int(node) == val;
754
755
756
}

/**
757
 * Transforms a Lea into an Add or Shl if possible.
758
759
760
761
762
 */
static void peephole_ia32_Lea(ir_node *node)
{
	assert(is_ia32_Lea(node));

763
764
	/* We can only do this if it is allowed to clobber the flags. */
	if (be_peephole_get_value(REG_EFLAGS))
765
766
		return;

767
	/* Frame entities should already be expressed in the offsets. */
768
	assert(get_ia32_attr_const(node)->am_imm.kind != X86_IMM_FRAMEOFFSET);
769

770
771
772
773
774
775
776
777
778
779
780
781
782
	/* We can transform Leas where the out register is the same as either the
	 * base or index register back to an Add or Shl. */
	ir_node                     *op1;
	ir_node                     *op2;
	ir_node               *const base    = get_irn_n(node, n_ia32_Lea_base);
	ir_node               *const index   = get_irn_n(node, n_ia32_Lea_index);
	arch_register_t const *const out_reg = arch_get_irn_register(node);
	if (out_reg == arch_get_irn_register(base)) {
		op1 = base;
		op2 = index;
	} else if (out_reg == arch_get_irn_register(index)) {
		op1 = index;
		op2 = base;
783
784
	} else {
#ifdef DEBUG_libfirm
785
		/* We shouldn't construct these in the first place. */
786
		if (is_ia32_NoReg_GP(base) && is_ia32_NoReg_GP(index))
787
			be_warningf(node, "found unoptimized Lea which is a Const");
788
#endif
789
		return; /* Neither base nor index use the same register as the Lea. */
790
791
	}

792
	ir_node       *res;
793
	bool     const has_2ops = !is_ia32_NoReg_GP(op2);
794
795
796
797
798
	bool     const has_disp = !is_disp_const(node, 0);
	unsigned const scale    = get_ia32_am_scale(node);
	if (scale == 0) {
		if (!has_2ops) {
			/* Lea base/index + disp -> Add+Imm. */
799
#ifdef DEBUG_libfirm
800
			if (!has_disp)
801
				be_warningf(node, "found unoptimized Lea which is a Copy");
802
#endif
803
804
805
806
807
808
809
810
811
812
813
814
			if (ia32_cg_config.use_incdec) {
				if (is_disp_const(node, 1)) {
					dbg_info *const dbgi  = get_irn_dbg_info(node);
					ir_node  *const block = get_nodes_block(node);
					res = new_bd_ia32_Inc(dbgi, block, op1);
					goto exchange;
				} else if (is_disp_const(node, -1)) {
					dbg_info *const dbgi  = get_irn_dbg_info(node);
					ir_node  *const block = get_nodes_block(node);
					res = new_bd_ia32_Dec(dbgi, block, op1);
					goto exchange;
				}
815
			}
816
817
			ir_graph          *const irg  = get_irn_irg(node);
			ia32_attr_t const *const attr = get_ia32_attr_const(node);
818
			op2 = ia32_create_Immediate_full(irg, &attr->am_imm);
819
820
		} else if (has_disp) {
			return; /* Lea has base, index and displacement. */
821
		}
822
823
824
825
826
827
828
829
830
831
832
833
834
835
		/* Lea base + index, base/index + disp -> Add. */
		dbg_info *const dbgi  = get_irn_dbg_info(node);
		ir_node  *const block = get_nodes_block(node);
		ir_graph *const irg   = get_irn_irg(node);
		ir_node  *const noreg = ia32_new_NoReg_gp(irg);
		ir_node  *const nomem = get_irg_no_mem(irg);
		res = new_bd_ia32_Add(dbgi, block, noreg, noreg, nomem, op1, op2);
		set_ia32_commutative(res);
	} else if (!has_2ops && !has_disp) {
		/* Lea index * scale -> Shl+Imm. */
		assert(op1 == index);
		dbg_info *const dbgi  = get_irn_dbg_info(node);
		ir_node  *const block = get_nodes_block(node);
		ir_graph *const irg   = get_irn_irg(node);
836
		ir_node  *const amt   = ia32_create_Immediate(irg, scale);
837
		res = new_bd_ia32_Shl(dbgi, block, op1, amt);
838
	} else {
839
		return; /* Lea has scaled index as well as base and/or displacement. */
840
841
842
	}

exchange:
843
844
	/* Replace the Lea by Add/Shl. */
	arch_set_irn_register(res, out_reg);
845
	SET_IA32_ORIG_NODE(res, node);
846
	replace(node, res);
847
848
}

849
850
851
/**
 * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
 */
852
static void peephole_ia32_ImulImm_split(ir_node *imul)
853
{
854
855
	/* Ignore, if no memory, imm form. */
	if (get_ia32_op_type(imul) != ia32_AddrModeS)
856
857
		return;
	/* we need a free register */
yb9976's avatar
yb9976 committed
858
	const arch_register_t *reg = get_free_gp_reg(get_irn_irg(imul));
859
860
861
862
	if (reg == NULL)
		return;

	/* fine, we can rebuild it */
yb9976's avatar
yb9976 committed
863
	ir_node *res = ia32_turn_back_am(imul);
Christoph Mallon's avatar