ia32_optimize.c 33.8 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
 * @file
 * @brief       Implements several optimizations for IA32.
9
 * @author      Matthias Braun, Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11
12
13
#include "irnode.h"
#include "irprog_t.h"
#include "ircons.h"
14
#include "irtools.h"
15
#include "firm_types.h"
Christian Würdig's avatar
Christian Würdig committed
16
17
18
#include "iredges.h"
#include "tv.h"
#include "irgmod.h"
19
#include "irgwalk.h"
20
#include "heights.h"
21
#include "irprintf.h"
22
#include "irdump.h"
Matthias Braun's avatar
Matthias Braun committed
23
#include "panic.h"
24
#include "firmstat_t.h"
Christian Würdig's avatar
Christian Würdig committed
25

26
27
28
29
30
#include "be_t.h"
#include "beabi.h"
#include "benode.h"
#include "besched.h"
#include "bepeephole.h"
31
32

#include "ia32_new_nodes.h"
33
#include "ia32_optimize.h"
34
#include "bearch_ia32_t.h"
Matthias Braun's avatar
Matthias Braun committed
35
#include "gen_ia32_regalloc_if.h"
36
#include "ia32_common_transform.h"
Christian Würdig's avatar
Christian Würdig committed
37
#include "ia32_transform.h"
Christian Würdig's avatar
Christian Würdig committed
38
#include "ia32_dbg_stat.h"
39
#include "ia32_architecture.h"
40

41
42
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

43
static void copy_mark(const ir_node *old, ir_node *newn)
44
45
{
	if (is_ia32_is_reload(old))
46
		set_ia32_is_reload(newn);
47
	if (is_ia32_is_spill(old))
48
		set_ia32_is_spill(newn);
49
	if (is_ia32_is_remat(old))
50
		set_ia32_is_remat(newn);
51
52
}

53
54
typedef enum produces_flag_t {
	produces_no_flag,
55
56
	produces_zero_sign,
	produces_zero_in_carry
57
58
} produces_flag_t;

Michael Beck's avatar
Michael Beck committed
59
/**
60
61
62
 * Return which usable flag the given node produces about the result.
 * That is zero (ZF) and sign(SF).
 * We do not check for carry (CF) or overflow (OF).
Michael Beck's avatar
Michael Beck committed
63
64
 *
 * @param node  the node to check
65
 * @param pn    the projection number of the used result
Michael Beck's avatar
Michael Beck committed
66
 */
67
static produces_flag_t check_produces_zero_sign(ir_node *node, int pn)
68
{
Michael Beck's avatar
Michael Beck committed
69
	if (!is_ia32_irn(node))
70
		return produces_no_flag;
71

yb9976's avatar
yb9976 committed
72
73
	ir_node *count;

Michael Beck's avatar
Michael Beck committed
74
	switch (get_ia32_irn_opcode(node)) {
75
76
77
78
79
80
81
82
83
84
85
86
87
88
		case iro_ia32_Add:
		case iro_ia32_Adc:
		case iro_ia32_And:
		case iro_ia32_Or:
		case iro_ia32_Xor:
		case iro_ia32_Sub:
		case iro_ia32_Sbb:
		case iro_ia32_Neg:
		case iro_ia32_Inc:
		case iro_ia32_Dec:
			break;

		case iro_ia32_ShlD:
		case iro_ia32_ShrD:
89
			assert((int)n_ia32_ShlD_count == (int)n_ia32_ShrD_count);
90
91
92
93
94
95
			count = get_irn_n(node, n_ia32_ShlD_count);
			goto check_shift_amount;

		case iro_ia32_Shl:
		case iro_ia32_Shr:
		case iro_ia32_Sar:
96
97
			assert((int)n_ia32_Shl_count == (int)n_ia32_Shr_count
					&& (int)n_ia32_Shl_count == (int)n_ia32_Sar_count);
98
			count = get_irn_n(node, n_ia32_Shl_count);
Christoph Mallon's avatar
Christoph Mallon committed
99
check_shift_amount:
100
101
102
103
104
			/* when shift count is zero the flags are not affected, so we can only
			 * do this for constants != 0 */
			if (!is_ia32_Immediate(count))
				return produces_no_flag;

yb9976's avatar
yb9976 committed
105
			const ia32_immediate_attr_t *imm_attr = get_ia32_immediate_attr_const(count);
106
			if (imm_attr->entity != NULL)
107
108
109
110
111
112
113
				return produces_no_flag;
			if ((imm_attr->offset & 0x1f) == 0)
				return produces_no_flag;
			break;

		case iro_ia32_Mul:
			return pn == pn_ia32_Mul_res_high ?
114
				produces_zero_in_carry : produces_no_flag;
115
116
117

		default:
			return produces_no_flag;
118
	}
119

120
	return pn == pn_ia32_res ? produces_zero_sign : produces_no_flag;
121
122
}

123
124
125
126
127
128
129
130
/**
 * Replace Cmp(x, 0) by a Test(x, x)
 */
static void peephole_ia32_Cmp(ir_node *const node)
{
	if (get_ia32_op_type(node) != ia32_Normal)
		return;

131
	ir_node *const right = get_irn_n(node, n_ia32_Cmp_right);
132
133
134
	if (!is_ia32_Immediate(right))
		return;

135
	ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
136
	if (imm->entity != NULL || imm->offset != 0)
137
138
		return;

139
140
141
142
143
144
145
	dbg_info *const dbgi         = get_irn_dbg_info(node);
	ir_node  *const block        = get_nodes_block(node);
	ir_graph *const irg          = get_Block_irg(block);
	ir_node  *const noreg        = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem        = get_irg_no_mem(irg);
	ir_node  *const op           = get_irn_n(node, n_ia32_Cmp_left);
	int       const ins_permuted = get_ia32_attr(node)->data.ins_permuted;
146

147
148
149
150
151
	ir_mode *const ls_mode = get_ia32_ls_mode(node);
	ir_node *const test    = get_mode_size_bits(ls_mode) == 8
		? new_bd_ia32_Test_8bit(dbgi, block, noreg, noreg, nomem, op, op, ins_permuted)
		: new_bd_ia32_Test     (dbgi, block, noreg, noreg, nomem, op, op, ins_permuted);
	set_ia32_ls_mode(test, ls_mode);
152

153
	arch_register_t const *const reg = arch_get_irn_register_out(node, pn_ia32_Cmp_eflags);
154
	arch_set_irn_register_out(test, pn_ia32_Test_eflags, reg);
155

156
	foreach_out_edge_safe(node, edge) {
157
158
159
160
161
162
		ir_node *const user = get_edge_src_irn(edge);

		if (is_Proj(user))
			exchange(user, test);
	}

163
	sched_add_before(node, test);
164
	copy_mark(node, test);
165
166
167
	be_peephole_exchange(node, test);
}

Michael Beck's avatar
Michael Beck committed
168
169
/**
 * Peephole optimization for Test instructions.
170
 * - Remove the Test, if an appropriate flag was produced which is still live
171
 * - Change a Test(x, c) to 8Bit, if 0 <= c < 128 (3 byte shorter opcode)
Michael Beck's avatar
Michael Beck committed
172
 */
173
174
static void peephole_ia32_Test(ir_node *node)
{
175
176
	ir_node *left  = get_irn_n(node, n_ia32_Test_left);
	ir_node *right = get_irn_n(node, n_ia32_Test_right);
177

178
	if (left == right) { /* we need a test for 0 */
yb9976's avatar
yb9976 committed
179
		ir_node *block = get_nodes_block(node);
180
		if (get_nodes_block(left) != block)
181
182
			return;

yb9976's avatar
yb9976 committed
183
184
		int      pn = pn_ia32_res;
		ir_node *op = left;
185
186
187
		if (is_Proj(op)) {
			pn = get_Proj_proj(op);
			op = get_Proj_pred(op);
188
		}
189

190
191
		/* walk schedule up and abort when we find left or some other node
		 * destroys the flags */
yb9976's avatar
yb9976 committed
192
		ir_node *schedpoint = node;
193
194
		for (;;) {
			schedpoint = sched_prev(schedpoint);
195
			if (schedpoint == op)
196
197
198
199
200
				break;
			if (arch_irn_is(schedpoint, modify_flags))
				return;
			if (schedpoint == block)
				panic("couldn't find left");
201
202
		}

yb9976's avatar
yb9976 committed
203
		produces_flag_t produced = check_produces_zero_sign(op, pn);
204
205
206
207
		if (produced == produces_no_flag)
			return;

		/* make sure users only look at the sign/zero flag */
208
		foreach_out_edge(node, edge) {
yb9976's avatar
yb9976 committed
209
210
211
212
213
214
215
216
217
218
219
			ir_node *user = get_edge_src_irn(edge);
			if (is_ia32_CMovcc(user) || is_ia32_Jcc(user) ||
			    is_ia32_Setcc(user) || is_ia32_SetccMem(user)) {
				x86_condition_code_t  cc  = get_ia32_condcode(user);

				if (cc == x86_cc_equal || cc == x86_cc_not_equal)
					continue;
				if (produced == produces_zero_sign
					&& (cc == x86_cc_sign || cc == x86_cc_not_sign)) {
					continue;
				}
220
			}
221
			return;
222
		}
223

yb9976's avatar
yb9976 committed
224
		ir_mode *op_mode = get_ia32_ls_mode(op);
yb9976's avatar
yb9976 committed
225
226
227
228
229
230
231
		if (op_mode == NULL)
			op_mode = get_irn_mode(op);

		/* Make sure we operate on the same bit size */
		if (get_mode_size_bits(op_mode) != get_mode_size_bits(get_ia32_ls_mode(node)))
			return;

232
233
234
235
		if (produced == produces_zero_in_carry) {
			/* patch users to look at the carry instead of the zero flag */
			foreach_out_edge(node, edge) {
				ir_node              *user = get_edge_src_irn(edge);
236
				x86_condition_code_t  cc   = get_ia32_condcode(user);
237
238

				switch (cc) {
239
240
				case x86_cc_equal:     cc = x86_cc_above_equal; break;
				case x86_cc_not_equal: cc = x86_cc_below;       break;
241
242
243
244
245
246
				default: panic("unexpected pn");
				}
				set_ia32_condcode(user, cc);
			}
		}

247
248
		if (get_irn_mode(op) != mode_T) {
			set_irn_mode(op, mode_T);
249
250

			/* If there are other users, reroute them to result proj */
251
			if (get_irn_n_edges(op) != 2) {
252
				ir_node *res = new_r_Proj(op, ia32_mode_gp, pn_ia32_res);
253
				edges_reroute_except(op, res, res);
254
			}
255
256
257
		} else {
			if (get_irn_n_edges(left) == 2)
				kill_node(left);
258
		}
259

yb9976's avatar
yb9976 committed
260
261
		ir_mode *flags_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
		ir_node *flags_proj = new_r_Proj(op, flags_mode, pn_ia32_flags);
262
		arch_set_irn_register(flags_proj, &ia32_registers[REG_EFLAGS]);
263

264
		assert(get_irn_mode(node) != mode_T);
265

266
267
268
269
		be_peephole_exchange(node, flags_proj);
	} else if (is_ia32_Immediate(right)) {
		ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);

270
271
		/* A test with an entity is rather strange, but better safe than sorry */
		if (imm->entity != NULL)
272
273
			return;

274
275
276
277
278
		/*
		 * We have to take care that we end up with the same sign flag:
		 * testl(128, 128) -> SF=0
		 * testb(128, 128) -> SF=1
		 */
yb9976's avatar
yb9976 committed
279
		unsigned offset = imm->offset;
280
		if (get_ia32_op_type(node) == ia32_AddrModeS) {
281
			ia32_attr_t *const attr = get_ia32_attr(node);
282
			ir_graph    *const irg  = get_irn_irg(node);
283

284
			if ((offset & 0xFFFFFF80) == 0) {
285
				/* attr->am_offs += 0; */
286
			} else if ((offset & 0xFFFF80FF) == 0) {
287
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >>  8);
288
				set_irn_n(node, n_ia32_Test_right, imm_node);
289
				attr->am_offs += 1;
290
			} else if ((offset & 0xFF80FFFF) == 0) {
291
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >> 16);
292
				set_irn_n(node, n_ia32_Test_right, imm_node);
293
294
				attr->am_offs += 2;
			} else if ((offset & 0x00FFFFFF) == 0) {
295
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >> 24);
296
				set_irn_n(node, n_ia32_Test_right, imm_node);
297
298
299
300
				attr->am_offs += 3;
			} else {
				return;
			}
301
		} else if ((offset & 0xFFFFFF80) == 0) {
302
303
			arch_register_t const* const reg = arch_get_irn_register(left);

304
305
306
307
			if (reg != &ia32_registers[REG_EAX] &&
					reg != &ia32_registers[REG_EBX] &&
					reg != &ia32_registers[REG_ECX] &&
					reg != &ia32_registers[REG_EDX]) {
308
309
				return;
			}
310
311
		} else {
			return;
312
313
314
315
316
317
		}

		/* Technically we should build a Test8Bit because of the register
		 * constraints, but nobody changes registers at this point anymore. */
		set_ia32_ls_mode(node, mode_Bu);
	}
318
319
}

320
321
322
323
324
/**
 * AMD Athlon works faster when RET is not destination of
 * conditional jump or directly preceded by other jump instruction.
 * Can be avoided by placing a Rep prefix before the return.
 */
325
326
static void peephole_ia32_Return(ir_node *node)
{
327
328
329
330
	if (!ia32_cg_config.use_pad_return)
		return;

	/* check if this return is the first on the block */
331
	sched_foreach_reverse_before(node, irn) {
332
		if (is_Phi(irn) || be_is_Start(irn))
333
			continue;
334
335
		/* arg, IncSP 0 nodes might occur, ignore these */
		if (be_is_IncSP(irn) && be_get_IncSP_offset(irn) == 0)
336
			continue;
337
		return;
338
	}
339

340
	/* ensure, that the 3 byte return is generated */
341
	be_Return_set_emit_pop(node, 1);
342
343
344
}

/* only optimize up to 48 stores behind IncSPs */
345
#define MAXPUSH_OPTIMIZE    48
346
347

/**
Michael Beck's avatar
Michael Beck committed
348
 * Tries to create Push's from IncSP, Store combinations.
349
350
 * The Stores are replaced by Push's, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
351
 */
352
static void peephole_IncSP_Store_to_push(ir_node *irn)
353
{
354
	int inc_ofs = be_get_IncSP_offset(irn);
355
	if (inc_ofs < 4)
356
357
		return;

yb9976's avatar
yb9976 committed
358
359
360
361
362
	int      maxslot                  = -1;
	ir_node *stores[MAXPUSH_OPTIMIZE];

	memset(stores, 0, sizeof(stores));

363
364
	/*
	 * We first walk the schedule after the IncSP node as long as we find
365
	 * suitable Stores that could be transformed to a Push.
366
367
368
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
369
	sched_foreach_after(irn, node) {
370
371
		/* it has to be a Store */
		if (!is_ia32_Store(node))
372
373
			break;

374
375
		/* it has to use our sp value */
		if (get_irn_n(node, n_ia32_base) != irn)
376
			continue;
377
		/* Store has to be attached to NoMem */
yb9976's avatar
yb9976 committed
378
		ir_node *mem = get_irn_n(node, n_ia32_mem);
379
		if (!is_NoMem(mem))
380
381
			continue;

382
383
		/* unfortunately we can't support the full AMs possible for push at the
		 * moment. TODO: fix this */
384
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
385
386
			break;

yb9976's avatar
yb9976 committed
387
		int offset = get_ia32_am_offs_int(node);
388
389
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);
390

391
392
		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
393
394
			break;

395
		if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
396
			continue;
yb9976's avatar
yb9976 committed
397
398

		int storeslot = offset >> 2;
399
400
401

		/* storing into the same slot twice is bad (and shouldn't happen...) */
		if (stores[storeslot] != NULL)
402
403
404
			break;

		stores[storeslot] = node;
405
406
		if (storeslot > maxslot)
			maxslot = storeslot;
407
408
	}

yb9976's avatar
yb9976 committed
409
410
	ir_node *curr_sp = irn;
	int      i;
411
412
413
414
	for (i = -1; i < maxslot; ++i) {
		if (stores[i + 1] == NULL)
			break;
	}
415

416
	/* walk through the Stores and create Pushs for them */
yb9976's avatar
yb9976 committed
417
418
419
420
	ir_node  *block      = get_nodes_block(irn);
	ir_mode  *spmode     = get_irn_mode(irn);
	ir_graph *irg        = get_irn_irg(irn);
	ir_node  *first_push = NULL;
421
	for (; i >= 0; --i) {
yb9976's avatar
yb9976 committed
422
423
424
425
426
427
428
429
		ir_node               *store = stores[i];
		ir_node               *noreg = ia32_new_NoReg_gp(irg);
		ir_node               *val   = get_irn_n(store, n_ia32_unary_op);
		ir_node               *mem   = get_irn_n(store, n_ia32_mem);
		const arch_register_t *spreg = arch_get_irn_register(curr_sp);

		ir_node *push = new_bd_ia32_Push(get_irn_dbg_info(store), block, noreg, noreg,
		                                 mem, val, curr_sp);
Matthias Braun's avatar
Matthias Braun committed
430
		set_ia32_ls_mode(push, ia32_mode_gp);
431
		copy_mark(store, push);
432

433
434
435
		if (first_push == NULL)
			first_push = push;

436
		sched_add_after(skip_Proj(curr_sp), push);
437

438
		/* create stackpointer Proj */
439
		curr_sp = new_r_Proj(push, spmode, pn_ia32_Push_stack);
440
		arch_set_irn_register(curr_sp, spreg);
Michael Beck's avatar
Michael Beck committed
441

442
		/* create memory Proj */
yb9976's avatar
yb9976 committed
443
		ir_node *mem_proj = new_r_Proj(push, mode_M, pn_ia32_Push_M);
444

445
		/* rewire Store Projs */
446
		foreach_out_edge_safe(store, edge) {
447
448
449
450
451
452
453
454
455
456
457
458
			ir_node *proj = get_edge_src_irn(edge);
			if (!is_Proj(proj))
				continue;
			switch (get_Proj_proj(proj)) {
			case pn_ia32_Store_M:
				exchange(proj, mem_proj);
				break;
			default:
				panic("unexpected Proj on Store->IncSp");
			}
		}

459
		/* use the memproj now */
460
		be_peephole_exchange(store, push);
Matthias Braun's avatar
bugfix    
Matthias Braun committed
461

462
		inc_ofs -= 4;
463
464
	}

465
	foreach_out_edge_safe(irn, edge) {
466
467
468
469
470
		ir_node *const src = get_edge_src_irn(edge);

		if (src == first_push)
			continue;

yb9976's avatar
yb9976 committed
471
		const int pos = get_edge_src_pos(edge);
472
473
474
		set_irn_n(src, pos, curr_sp);
	}

475
	be_set_IncSP_offset(irn, inc_ofs);
476
477
}

478
479
480
481
482
483
484
485
/**
 * Tries to create Pops from Load, IncSP combinations.
 * The Loads are replaced by Pops, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
 */
static void peephole_Load_IncSP_to_pop(ir_node *irn)
{

486
	int inc_ofs = -be_get_IncSP_offset(irn);
487
488
489
	if (inc_ofs < 4)
		return;

yb9976's avatar
yb9976 committed
490
491
492
493
494
495
	ir_node  *loads[MAXPUSH_OPTIMIZE];
	unsigned  regmask                 = 0;
	unsigned  copymask                = ~0;

	memset(loads, 0, sizeof(loads));

496
497
498
499
500
501
	/*
	 * We first walk the schedule before the IncSP node as long as we find
	 * suitable Loads that could be transformed to a Pop.
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
yb9976's avatar
yb9976 committed
502
	int      maxslot = -1;
503
504
	ir_node *pred_sp = be_get_IncSP_pred(irn);
	sched_foreach_reverse_before(irn, node) {
505
506
507
		/* it has to be a Load */
		if (!is_ia32_Load(node)) {
			if (be_is_Copy(node)) {
yb9976's avatar
yb9976 committed
508
				const arch_register_t *dreg = arch_get_irn_register(node);
509
				if (dreg->reg_class != &ia32_reg_classes[CLASS_ia32_gp]) {
510
511
512
					/* not a GP copy, ignore */
					continue;
				}
yb9976's avatar
yb9976 committed
513
				const arch_register_t *sreg = arch_get_irn_register(be_get_Copy_op(node));
514
515
516
517
				if (regmask & copymask & (1 << sreg->index)) {
					break;
				}
				if (regmask & copymask & (1 << dreg->index)) {
518
519
					break;
				}
520
521
522
523
				/* we CAN skip Copies if neither the destination nor the source
				 * is not in our regmask, ie none of our future Pop will overwrite it */
				regmask |= (1 << dreg->index) | (1 << sreg->index);
				copymask &= ~((1 << dreg->index) | (1 << sreg->index));
524
525
526
527
528
529
				continue;
			}
			break;
		}

		/* it has to use our predecessor sp value */
530
531
532
533
534
		if (get_irn_n(node, n_ia32_base) != pred_sp) {
			/* it would be ok if this load does not use a Pop result,
			 * but we do not check this */
			break;
		}
535
536

		/* should have NO index */
537
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
538
539
			break;

yb9976's avatar
yb9976 committed
540
		int offset = get_ia32_am_offs_int(node);
541
542
543
544
545
546
547
548
549
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);

		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
			break;

		if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
			continue;
550
551
552
		/* ignore those outside the possible windows */
		if (offset > inc_ofs - 4)
			continue;
yb9976's avatar
yb9976 committed
553
		int loadslot = offset >> 2;
554
555
556
557
558

		/* loading from the same slot twice is bad (and shouldn't happen...) */
		if (loads[loadslot] != NULL)
			break;

yb9976's avatar
yb9976 committed
559
		const arch_register_t *dreg = arch_get_irn_register_out(node, pn_ia32_Load_res);
560
561
562
563
564
565
566
567
568
569
570
571
572
573
		if (regmask & (1 << dreg->index)) {
			/* this register is already used */
			break;
		}
		regmask |= 1 << dreg->index;

		loads[loadslot] = node;
		if (loadslot > maxslot)
			maxslot = loadslot;
	}

	if (maxslot < 0)
		return;

574
	/* find the first slot */
yb9976's avatar
yb9976 committed
575
	int i;
576
577
578
579
580
581
	for (i = maxslot; i >= 0; --i) {
		ir_node *load = loads[i];

		if (load == NULL)
			break;
	}
582

yb9976's avatar
yb9976 committed
583
584
	int ofs = inc_ofs - (maxslot + 1) * 4;
	inc_ofs = (i + 1) * 4;
585

586
	/* create a new IncSP if needed */
yb9976's avatar
yb9976 committed
587
588
	const arch_register_t *esp   = &ia32_registers[REG_ESP];
	ir_node               *const   block = get_nodes_block(irn);
589
	if (inc_ofs > 0) {
590
		pred_sp = be_new_IncSP(esp, block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
591
592
593
		sched_add_before(irn, pred_sp);
	}

594
	/* walk through the Loads and create Pops for them */
595
	for (++i; i <= maxslot; ++i) {
yb9976's avatar
yb9976 committed
596
597
598
		ir_node               *load = loads[i];
		ir_node               *mem  = get_irn_n(load, n_ia32_mem);
		const arch_register_t *reg  = arch_get_irn_register_out(load, pn_ia32_Load_res);
599

yb9976's avatar
yb9976 committed
600
		ir_node *pop = new_bd_ia32_Pop(get_irn_dbg_info(load), block, mem, pred_sp);
601
		arch_set_irn_register_out(pop, pn_ia32_Load_res, reg);
Matthias Braun's avatar
Matthias Braun committed
602
		set_ia32_ls_mode(pop, ia32_mode_gp);
603

604
605
		copy_mark(load, pop);

606
		/* create stackpointer Proj */
607
		pred_sp = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_stack);
608
		arch_set_irn_register(pred_sp, esp);
609
610
611
612

		sched_add_before(irn, pop);

		/* rewire now */
613
		foreach_out_edge_safe(load, edge) {
614
615
616
617
618
619
620
621
622
			ir_node *proj = get_edge_src_irn(edge);

			set_Proj_pred(proj, pop);
		}

		/* we can remove the Load now */
		sched_remove(load);
		kill_node(load);
	}
623

624
	be_set_IncSP_offset(irn, -ofs);
625
626
627
628
	be_set_IncSP_pred(irn, pred_sp);
}


629
630
631
/**
 * Find a free GP register if possible, else return NULL.
 */
632
static const arch_register_t *get_free_gp_reg(ir_graph *irg)
633
{
634
	be_irg_t *birg = be_birg_from_irg(irg);
635

yb9976's avatar
yb9976 committed
636
	for (int i = 0; i < N_ia32_gp_REGS; ++i) {
637
		const arch_register_t *reg = &ia32_reg_classes[CLASS_ia32_gp].regs[i];
638
		if (!rbitset_is_set(birg->allocatable_regs, reg->global_index))
639
640
			continue;

641
		if (be_peephole_get_value(reg->global_index) == NULL)
642
			return reg;
643
	}
644
645
646
647

	return NULL;
}

648
649
650
651
652
653
654
655
656
657
658
/**
 * Creates a Pop instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
659
static ir_node *create_pop(dbg_info *dbgi, ir_node *block,
660
                           ir_node *stack, ir_node *schedpoint,
661
                           const arch_register_t *reg)
662
{
663
	const arch_register_t *esp = &ia32_registers[REG_ESP];
yb9976's avatar
yb9976 committed
664
	ir_graph              *irg = get_irn_irg(block);
665

yb9976's avatar
yb9976 committed
666
	ir_node *pop = new_bd_ia32_Pop(dbgi, block, get_irg_no_mem(irg), stack);
Matthias Braun's avatar
Matthias Braun committed
667
	set_ia32_ls_mode(pop, ia32_mode_gp);
668

669
	stack = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_stack);
670
	arch_set_irn_register(stack, esp);
yb9976's avatar
yb9976 committed
671
	ir_node *val = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_res);
672
	arch_set_irn_register(val, reg);
673
674
675

	sched_add_before(schedpoint, pop);

yb9976's avatar
yb9976 committed
676
677
	ir_node *in[1] = { val };
	ir_node *keep  = be_new_Keep(block, 1, in);
678
679
680
681
682
	sched_add_before(schedpoint, keep);

	return stack;
}

683
/**
Michael Beck's avatar
Michael Beck committed
684
 * Optimize an IncSp by replacing it with Push/Pop.
685
 */
686
687
static void peephole_be_IncSP(ir_node *node)
{
688
	const arch_register_t *esp = &ia32_registers[REG_ESP];
689
690

	/* first optimize incsp->incsp combinations */
691
	node = be_peephole_IncSP_IncSP(node);
692

693
694
695
	/* transform IncSP->Store combinations to Push where possible */
	peephole_IncSP_Store_to_push(node);

696
697
698
	/* transform Load->IncSP combinations to Pop where possible */
	peephole_Load_IncSP_to_pop(node);

699
	if (arch_get_irn_register(node) != esp)
700
701
		return;

702
	/* replace IncSP -4 by Pop freereg when possible */
yb9976's avatar
yb9976 committed
703
	int offset = be_get_IncSP_offset(node);
704
705
706
707
	if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
	    (offset != -4 || ia32_cg_config.use_add_esp_4) &&
	    (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
	    (offset != +8 || ia32_cg_config.use_sub_esp_8))
708
709
		return;

yb9976's avatar
yb9976 committed
710
711
	ir_node *stack;

712
713
	if (offset < 0) {
		/* we need a free register for pop */
yb9976's avatar
yb9976 committed
714
		const arch_register_t *reg = get_free_gp_reg(get_irn_irg(node));
715
		if (reg == NULL)
716
			return;
717

yb9976's avatar
yb9976 committed
718
719
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);
720

yb9976's avatar
yb9976 committed
721
		stack = be_get_IncSP_pred(node);
722
		stack = create_pop(dbgi, block, stack, node, reg);
723

724
		if (offset == -8) {
725
			stack = create_pop(dbgi, block, stack, node, reg);
726
		}
727
	} else {
yb9976's avatar
yb9976 committed
728
729
730
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);

731
		stack = be_get_IncSP_pred(node);
732
		stack = new_bd_ia32_PushEax(dbgi, block, stack);
733
734
		arch_set_irn_register(stack, esp);
		sched_add_before(node, stack);
735
736

		if (offset == +8) {
737
			stack = new_bd_ia32_PushEax(dbgi, block, stack);
738
739
			arch_set_irn_register(stack, esp);
			sched_add_before(node, stack);
740
		}
741
742
	}

Christoph Mallon's avatar
Christoph Mallon committed
743
	be_peephole_exchange(node, stack);
744
745
}

Michael Beck's avatar
Michael Beck committed
746
/**
747
 * Peephole optimization for ia32_Const's
Michael Beck's avatar
Michael Beck committed
748
 */
749
static void peephole_ia32_Const(ir_node *node)
750
{
751
752
753
	const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);

	/* try to transform a mov 0, reg to xor reg reg */
754
	if (attr->offset != 0 || attr->entity != NULL)
755
756
		return;
	if (ia32_cg_config.use_mov_0)
757
		return;
Michael Beck's avatar
Michael Beck committed
758
	/* xor destroys the flags, so no-one must be using them */
759
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
760
		return;
761

yb9976's avatar
yb9976 committed
762
	const arch_register_t *reg = arch_get_irn_register(node);
763
764
	assert(be_peephole_get_reg_value(reg) == NULL);

yb9976's avatar
yb9976 committed
765
766
767
	ir_node  *block = get_nodes_block(node);
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *xorn  = new_bd_ia32_Xor0(dbgi, block);
768
	arch_set_irn_register(xorn, reg);
769

770
	sched_add_before(node, xorn);
771

772
773
	copy_mark(node, xorn);
	be_peephole_exchange(node, xorn);
774
}
775

776
static inline int is_noreg(const ir_node *node)
777
{
778
	return is_ia32_NoReg_GP(node);
779
780
}

781
ir_node *ia32_immediate_from_long(ir_graph *const irg, long const val)
782
{
783
784
	ir_node *start_block = get_irg_start_block(irg);
	ir_node *immediate   = new_bd_ia32_Immediate(NULL, start_block, NULL, 0, val);
785
	arch_set_irn_register(immediate, &ia32_registers[REG_GP_NOREG]);
786
787
788
789

	return immediate;
}

790
static ir_node *create_immediate_from_am(const ir_node *node)
791
{
yb9976's avatar
yb9976 committed
792
793
794
795
	ir_node           *block            = get_nodes_block(node);
	int                offset           = get_ia32_am_offs_int(node);
	const ia32_attr_t *attr             = get_ia32_attr_const(node);
	int                sc_no_pic_adjust = attr->data.am_sc_no_pic_adjust;
796
	ir_entity         *entity           = get_ia32_am_ent(node);
797

yb9976's avatar
yb9976 committed
798
	ir_node *res = new_bd_ia32_Immediate(NULL, block, entity, sc_no_pic_adjust, offset);
799
	arch_set_irn_register(res, &ia32_registers[REG_GP_NOREG]);
800
801
802
803
804
805
	return res;
}

static int is_am_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
806
	ir_entity *entity  = get_ia32_am_ent(node);
807
808
809
810
811
812
813

	return offset == 1 && entity == NULL;
}

static int is_am_minus_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
814
	ir_entity *entity  = get_ia32_am_ent(node);
815
816
817
818
819
820
821
822
823
824
825

	return offset == -1 && entity == NULL;
}

/**
 * Transforms a LEA into an Add or SHL if possible.
 */
static void peephole_ia32_Lea(ir_node *node)
{
	assert(is_ia32_Lea(node));

Christoph Mallon's avatar
Christoph Mallon committed
826
	/* we can only do this if it is allowed to clobber the flags */
827
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
828
829
		return;

yb9976's avatar
yb9976 committed
830
831
832
833
	ir_node *base  = get_irn_n(node, n_ia32_Lea_base);
	ir_node *index = get_irn_n(node, n_ia32_Lea_index);
	const arch_register_t *base_reg;
	const arch_register_t *index_reg;
834

835
	if (is_noreg(base)) {
836
837
838
		base     = NULL;
		base_reg = NULL;
	} else {
839
		base_reg = arch_get_irn_register(base);
840
	}
841
	if (is_noreg(index)) {
842
843
844
		index     = NULL;
		index_reg = NULL;
	} else {
845
		index_reg = arch_get_irn_register(index);
846
847
	}

848
	if (base == NULL && index == NULL) {
849
850
		/* we shouldn't construct these in the first place... */
#ifdef DEBUG_libfirm
yb9976's avatar
yb9976 committed
851
		ir_fprintf(stderr, "Optimization warning: found immediate only lea\n");
852
853
854
855
#endif
		return;
	}

yb9976's avatar
yb9976 committed
856
857
858
859
860
	const arch_register_t *out_reg        = arch_get_irn_register(node);
	int                    scale          = get_ia32_am_scale(node);
	int                    has_immediates;
	ir_node               *op1;
	ir_node               *op2;
861
862
863
	assert(!is_ia32_need_stackent(node) || get_ia32_frame_ent(node) != NULL);
	/* check if we have immediates values (frame entities should already be
	 * expressed in the offsets) */
864
	if (get_ia32_am_offs_int(node) != 0 || get_ia32_am_ent(node) != NULL) {
865
866
867
868
869
870
871
		has_immediates = 1;
	} else {
		has_immediates = 0;
	}

	/* we can transform leas where the out register is the same as either the
	 * base or index register back to an Add or Shl */
872
873
	if (out_reg == base_reg) {
		if (index == NULL) {
874
#ifdef DEBUG_libfirm
875
			if (!has_immediates) {
yb9976's avatar
yb9976 committed
876
				ir_fprintf(stderr, "Optimization warning: found lea which is "
877
878
879
880
881
882
				           "just a copy\n");
			}
#endif
			op1 = base;
			goto make_add_immediate;
		}
883
		if (scale == 0 && !has_immediates) {