ia32_optimize.c 33.6 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
 * @file
 * @brief       Implements several optimizations for IA32.
9
 * @author      Matthias Braun, Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11
12
13
#include "irnode.h"
#include "irprog_t.h"
#include "ircons.h"
14
#include "irtools.h"
15
#include "firm_types.h"
Christian Würdig's avatar
Christian Würdig committed
16
17
18
#include "iredges.h"
#include "tv.h"
#include "irgmod.h"
19
#include "irgwalk.h"
20
#include "heights.h"
21
#include "irprintf.h"
22
#include "irdump.h"
Matthias Braun's avatar
Matthias Braun committed
23
#include "panic.h"
24
#include "firmstat_t.h"
Christian Würdig's avatar
Christian Würdig committed
25

26
27
28
29
#include "be_t.h"
#include "benode.h"
#include "besched.h"
#include "bepeephole.h"
30
31

#include "ia32_new_nodes.h"
32
#include "ia32_optimize.h"
33
#include "bearch_ia32_t.h"
Matthias Braun's avatar
Matthias Braun committed
34
#include "gen_ia32_regalloc_if.h"
35
#include "ia32_common_transform.h"
Christian Würdig's avatar
Christian Würdig committed
36
#include "ia32_transform.h"
37
#include "ia32_architecture.h"
38

39
40
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

41
static void copy_mark(const ir_node *old, ir_node *newn)
42
43
{
	if (is_ia32_is_reload(old))
44
		set_ia32_is_reload(newn);
45
	if (is_ia32_is_spill(old))
46
		set_ia32_is_spill(newn);
47
	if (is_ia32_is_remat(old))
48
		set_ia32_is_remat(newn);
49
50
}

51
52
typedef enum produces_flag_t {
	produces_no_flag,
53
54
	produces_zero_sign,
	produces_zero_in_carry
55
56
} produces_flag_t;

Michael Beck's avatar
Michael Beck committed
57
/**
58
59
60
 * Return which usable flag the given node produces about the result.
 * That is zero (ZF) and sign(SF).
 * We do not check for carry (CF) or overflow (OF).
Michael Beck's avatar
Michael Beck committed
61
62
 *
 * @param node  the node to check
63
 * @param pn    the projection number of the used result
Michael Beck's avatar
Michael Beck committed
64
 */
65
static produces_flag_t check_produces_zero_sign(ir_node *node, unsigned pn)
66
{
Michael Beck's avatar
Michael Beck committed
67
	if (!is_ia32_irn(node))
68
		return produces_no_flag;
69

yb9976's avatar
yb9976 committed
70
71
	ir_node *count;

Michael Beck's avatar
Michael Beck committed
72
	switch (get_ia32_irn_opcode(node)) {
73
74
75
76
77
78
79
80
81
82
83
84
85
86
		case iro_ia32_Add:
		case iro_ia32_Adc:
		case iro_ia32_And:
		case iro_ia32_Or:
		case iro_ia32_Xor:
		case iro_ia32_Sub:
		case iro_ia32_Sbb:
		case iro_ia32_Neg:
		case iro_ia32_Inc:
		case iro_ia32_Dec:
			break;

		case iro_ia32_ShlD:
		case iro_ia32_ShrD:
87
			assert((int)n_ia32_ShlD_count == (int)n_ia32_ShrD_count);
88
89
90
91
92
93
			count = get_irn_n(node, n_ia32_ShlD_count);
			goto check_shift_amount;

		case iro_ia32_Shl:
		case iro_ia32_Shr:
		case iro_ia32_Sar:
94
95
			assert((int)n_ia32_Shl_count == (int)n_ia32_Shr_count
					&& (int)n_ia32_Shl_count == (int)n_ia32_Sar_count);
96
			count = get_irn_n(node, n_ia32_Shl_count);
Christoph Mallon's avatar
Christoph Mallon committed
97
check_shift_amount:
98
99
100
101
102
			/* when shift count is zero the flags are not affected, so we can only
			 * do this for constants != 0 */
			if (!is_ia32_Immediate(count))
				return produces_no_flag;

yb9976's avatar
yb9976 committed
103
			const ia32_immediate_attr_t *imm_attr = get_ia32_immediate_attr_const(count);
104
			if (imm_attr->entity != NULL)
105
106
107
108
109
110
111
				return produces_no_flag;
			if ((imm_attr->offset & 0x1f) == 0)
				return produces_no_flag;
			break;

		case iro_ia32_Mul:
			return pn == pn_ia32_Mul_res_high ?
112
				produces_zero_in_carry : produces_no_flag;
113
114
115

		default:
			return produces_no_flag;
116
	}
117

118
	return pn == pn_ia32_res ? produces_zero_sign : produces_no_flag;
119
120
}

121
122
123
124
125
126
127
128
/**
 * Replace Cmp(x, 0) by a Test(x, x)
 */
static void peephole_ia32_Cmp(ir_node *const node)
{
	if (get_ia32_op_type(node) != ia32_Normal)
		return;

129
	ir_node *const right = get_irn_n(node, n_ia32_Cmp_right);
130
131
132
	if (!is_ia32_Immediate(right))
		return;

133
	ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
134
	if (imm->entity != NULL || imm->offset != 0)
135
136
		return;

137
138
	dbg_info *const dbgi         = get_irn_dbg_info(node);
	ir_node  *const block        = get_nodes_block(node);
139
	ir_graph *const irg          = get_irn_irg(node);
140
141
142
143
	ir_node  *const noreg        = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem        = get_irg_no_mem(irg);
	ir_node  *const op           = get_irn_n(node, n_ia32_Cmp_left);
	int       const ins_permuted = get_ia32_attr(node)->data.ins_permuted;
144

145
146
147
148
149
	ir_mode *const ls_mode = get_ia32_ls_mode(node);
	ir_node *const test    = get_mode_size_bits(ls_mode) == 8
		? new_bd_ia32_Test_8bit(dbgi, block, noreg, noreg, nomem, op, op, ins_permuted)
		: new_bd_ia32_Test     (dbgi, block, noreg, noreg, nomem, op, op, ins_permuted);
	set_ia32_ls_mode(test, ls_mode);
150

151
	arch_register_t const *const reg = arch_get_irn_register_out(node, pn_ia32_Cmp_eflags);
152
	arch_set_irn_register_out(test, pn_ia32_Test_eflags, reg);
153

154
	foreach_out_edge_safe(node, edge) {
155
156
157
158
159
160
		ir_node *const user = get_edge_src_irn(edge);

		if (is_Proj(user))
			exchange(user, test);
	}

161
	sched_add_before(node, test);
162
	copy_mark(node, test);
163
164
165
	be_peephole_exchange(node, test);
}

Michael Beck's avatar
Michael Beck committed
166
167
/**
 * Peephole optimization for Test instructions.
168
 * - Remove the Test, if an appropriate flag was produced which is still live
169
 * - Change a Test(x, c) to 8Bit, if 0 <= c < 128 (3 byte shorter opcode)
Michael Beck's avatar
Michael Beck committed
170
 */
171
172
static void peephole_ia32_Test(ir_node *node)
{
173
174
	ir_node *left  = get_irn_n(node, n_ia32_Test_left);
	ir_node *right = get_irn_n(node, n_ia32_Test_right);
175

176
	if (left == right) { /* we need a test for 0 */
yb9976's avatar
yb9976 committed
177
		ir_node *block = get_nodes_block(node);
178
		if (get_nodes_block(left) != block)
179
180
			return;

181
		unsigned pn = pn_ia32_res;
yb9976's avatar
yb9976 committed
182
		ir_node *op = left;
183
		if (is_Proj(op)) {
184
			pn = get_Proj_num(op);
185
			op = get_Proj_pred(op);
186
		}
187

188
189
		/* walk schedule up and abort when we find left or some other node
		 * destroys the flags */
yb9976's avatar
yb9976 committed
190
		ir_node *schedpoint = node;
191
192
		for (;;) {
			schedpoint = sched_prev(schedpoint);
193
			if (schedpoint == op)
194
195
196
197
198
				break;
			if (arch_irn_is(schedpoint, modify_flags))
				return;
			if (schedpoint == block)
				panic("couldn't find left");
199
200
		}

yb9976's avatar
yb9976 committed
201
		produces_flag_t produced = check_produces_zero_sign(op, pn);
202
203
204
205
		if (produced == produces_no_flag)
			return;

		/* make sure users only look at the sign/zero flag */
206
		foreach_out_edge(node, edge) {
yb9976's avatar
yb9976 committed
207
208
209
210
211
212
213
214
215
216
217
			ir_node *user = get_edge_src_irn(edge);
			if (is_ia32_CMovcc(user) || is_ia32_Jcc(user) ||
			    is_ia32_Setcc(user) || is_ia32_SetccMem(user)) {
				x86_condition_code_t  cc  = get_ia32_condcode(user);

				if (cc == x86_cc_equal || cc == x86_cc_not_equal)
					continue;
				if (produced == produces_zero_sign
					&& (cc == x86_cc_sign || cc == x86_cc_not_sign)) {
					continue;
				}
218
			}
219
			return;
220
		}
221

yb9976's avatar
yb9976 committed
222
		ir_mode *op_mode = get_ia32_ls_mode(op);
yb9976's avatar
yb9976 committed
223
224
225
226
227
228
229
		if (op_mode == NULL)
			op_mode = get_irn_mode(op);

		/* Make sure we operate on the same bit size */
		if (get_mode_size_bits(op_mode) != get_mode_size_bits(get_ia32_ls_mode(node)))
			return;

230
231
232
233
		if (produced == produces_zero_in_carry) {
			/* patch users to look at the carry instead of the zero flag */
			foreach_out_edge(node, edge) {
				ir_node              *user = get_edge_src_irn(edge);
234
				x86_condition_code_t  cc   = get_ia32_condcode(user);
235
236

				switch (cc) {
237
238
				case x86_cc_equal:     cc = x86_cc_above_equal; break;
				case x86_cc_not_equal: cc = x86_cc_below;       break;
239
240
241
242
243
244
				default: panic("unexpected pn");
				}
				set_ia32_condcode(user, cc);
			}
		}

245
246
		if (get_irn_mode(op) != mode_T) {
			set_irn_mode(op, mode_T);
247
248

			/* If there are other users, reroute them to result proj */
249
			if (get_irn_n_edges(op) != 2) {
250
				ir_node *res = new_r_Proj(op, ia32_mode_gp, pn_ia32_res);
251
				edges_reroute_except(op, res, res);
252
			}
253
254
255
		} else {
			if (get_irn_n_edges(left) == 2)
				kill_node(left);
256
		}
257

yb9976's avatar
yb9976 committed
258
259
		ir_mode *flags_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
		ir_node *flags_proj = new_r_Proj(op, flags_mode, pn_ia32_flags);
260
		arch_set_irn_register(flags_proj, &ia32_registers[REG_EFLAGS]);
261

262
		assert(get_irn_mode(node) != mode_T);
263

264
265
266
267
		be_peephole_exchange(node, flags_proj);
	} else if (is_ia32_Immediate(right)) {
		ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);

268
269
		/* A test with an entity is rather strange, but better safe than sorry */
		if (imm->entity != NULL)
270
271
			return;

272
273
274
275
276
		/*
		 * We have to take care that we end up with the same sign flag:
		 * testl(128, 128) -> SF=0
		 * testb(128, 128) -> SF=1
		 */
yb9976's avatar
yb9976 committed
277
		unsigned offset = imm->offset;
278
		if (get_ia32_op_type(node) == ia32_AddrModeS) {
279
			ia32_attr_t *const attr = get_ia32_attr(node);
280
			ir_graph    *const irg  = get_irn_irg(node);
281

282
			if ((offset & 0xFFFFFF80) == 0) {
283
				/* attr->am_offs += 0; */
284
			} else if ((offset & 0xFFFF80FF) == 0) {
285
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >>  8);
286
				set_irn_n(node, n_ia32_Test_right, imm_node);
287
				attr->am_offs += 1;
288
			} else if ((offset & 0xFF80FFFF) == 0) {
289
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >> 16);
290
				set_irn_n(node, n_ia32_Test_right, imm_node);
291
292
				attr->am_offs += 2;
			} else if ((offset & 0x00FFFFFF) == 0) {
293
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >> 24);
294
				set_irn_n(node, n_ia32_Test_right, imm_node);
295
296
297
298
				attr->am_offs += 3;
			} else {
				return;
			}
299
		} else if ((offset & 0xFFFFFF80) == 0) {
300
301
			arch_register_t const* const reg = arch_get_irn_register(left);

302
303
304
305
			if (reg != &ia32_registers[REG_EAX] &&
					reg != &ia32_registers[REG_EBX] &&
					reg != &ia32_registers[REG_ECX] &&
					reg != &ia32_registers[REG_EDX]) {
306
307
				return;
			}
308
309
		} else {
			return;
310
311
312
313
314
315
		}

		/* Technically we should build a Test8Bit because of the register
		 * constraints, but nobody changes registers at this point anymore. */
		set_ia32_ls_mode(node, mode_Bu);
	}
316
317
}

318
319
320
321
322
/**
 * AMD Athlon works faster when RET is not destination of
 * conditional jump or directly preceded by other jump instruction.
 * Can be avoided by placing a Rep prefix before the return.
 */
323
324
static void peephole_ia32_Return(ir_node *node)
{
325
326
327
328
	if (!ia32_cg_config.use_pad_return)
		return;

	/* check if this return is the first on the block */
329
	sched_foreach_reverse_before(node, irn) {
330
		if (is_Phi(irn) || is_ia32_Start(irn))
331
			continue;
332
333
		/* arg, IncSP 0 nodes might occur, ignore these */
		if (be_is_IncSP(irn) && be_get_IncSP_offset(irn) == 0)
334
			continue;
335
		return;
336
	}
337

338
	/* ensure, that the 3 byte return is generated */
339
340
	ia32_return_attr_t *attr = get_ia32_return_attr(node);
	attr->emit_pop = true;
341
342
343
}

/* only optimize up to 48 stores behind IncSPs */
344
#define MAXPUSH_OPTIMIZE    48
345
346

/**
Michael Beck's avatar
Michael Beck committed
347
 * Tries to create Push's from IncSP, Store combinations.
348
349
 * The Stores are replaced by Push's, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
350
 */
351
static void peephole_IncSP_Store_to_push(ir_node *irn)
352
{
353
	int inc_ofs = be_get_IncSP_offset(irn);
354
	if (inc_ofs < 4)
355
356
		return;

yb9976's avatar
yb9976 committed
357
358
359
360
361
	int      maxslot                  = -1;
	ir_node *stores[MAXPUSH_OPTIMIZE];

	memset(stores, 0, sizeof(stores));

362
363
	/*
	 * We first walk the schedule after the IncSP node as long as we find
364
	 * suitable Stores that could be transformed to a Push.
365
366
367
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
368
	sched_foreach_after(irn, node) {
369
370
		/* it has to be a Store */
		if (!is_ia32_Store(node))
371
372
			break;

373
374
		/* it has to use our sp value */
		if (get_irn_n(node, n_ia32_base) != irn)
375
			continue;
376
		/* Store has to be attached to NoMem */
yb9976's avatar
yb9976 committed
377
		ir_node *mem = get_irn_n(node, n_ia32_mem);
378
		if (!is_NoMem(mem))
379
380
			continue;

381
382
		/* unfortunately we can't support the full AMs possible for push at the
		 * moment. TODO: fix this */
383
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
384
385
			break;

yb9976's avatar
yb9976 committed
386
		int offset = get_ia32_am_offs_int(node);
387
388
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);
389

390
391
		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
392
393
			break;

394
		if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
395
			continue;
yb9976's avatar
yb9976 committed
396
397

		int storeslot = offset >> 2;
398
399
400

		/* storing into the same slot twice is bad (and shouldn't happen...) */
		if (stores[storeslot] != NULL)
401
402
403
			break;

		stores[storeslot] = node;
404
405
		if (storeslot > maxslot)
			maxslot = storeslot;
406
407
	}

yb9976's avatar
yb9976 committed
408
409
	ir_node *curr_sp = irn;
	int      i;
410
411
412
413
	for (i = -1; i < maxslot; ++i) {
		if (stores[i + 1] == NULL)
			break;
	}
414

415
	/* walk through the Stores and create Pushs for them */
yb9976's avatar
yb9976 committed
416
417
418
419
	ir_node  *block      = get_nodes_block(irn);
	ir_mode  *spmode     = get_irn_mode(irn);
	ir_graph *irg        = get_irn_irg(irn);
	ir_node  *first_push = NULL;
420
	for (; i >= 0; --i) {
yb9976's avatar
yb9976 committed
421
422
423
424
425
426
427
428
		ir_node               *store = stores[i];
		ir_node               *noreg = ia32_new_NoReg_gp(irg);
		ir_node               *val   = get_irn_n(store, n_ia32_unary_op);
		ir_node               *mem   = get_irn_n(store, n_ia32_mem);
		const arch_register_t *spreg = arch_get_irn_register(curr_sp);

		ir_node *push = new_bd_ia32_Push(get_irn_dbg_info(store), block, noreg, noreg,
		                                 mem, val, curr_sp);
Matthias Braun's avatar
Matthias Braun committed
429
		set_ia32_ls_mode(push, ia32_mode_gp);
430
		copy_mark(store, push);
431

432
433
434
		if (first_push == NULL)
			first_push = push;

435
		sched_add_after(skip_Proj(curr_sp), push);
436

437
		/* create stackpointer Proj */
438
		curr_sp = new_r_Proj(push, spmode, pn_ia32_Push_stack);
439
		arch_set_irn_register(curr_sp, spreg);
Michael Beck's avatar
Michael Beck committed
440

441
		/* create memory Proj */
yb9976's avatar
yb9976 committed
442
		ir_node *mem_proj = new_r_Proj(push, mode_M, pn_ia32_Push_M);
443

Christoph Mallon's avatar
Christoph Mallon committed
444
445
446
		/* Rewire Store Proj. */
		ir_node *const proj = get_Proj_for_pn(store, pn_ia32_Store_M);
		if (proj)
447
448
				exchange(proj, mem_proj);

449
		/* use the memproj now */
450
		be_peephole_exchange(store, push);
Matthias Braun's avatar
bugfix    
Matthias Braun committed
451

452
		inc_ofs -= 4;
453
454
	}

455
	foreach_out_edge_safe(irn, edge) {
456
457
458
459
460
		ir_node *const src = get_edge_src_irn(edge);

		if (src == first_push)
			continue;

yb9976's avatar
yb9976 committed
461
		const int pos = get_edge_src_pos(edge);
462
463
464
		set_irn_n(src, pos, curr_sp);
	}

465
	be_set_IncSP_offset(irn, inc_ofs);
466
467
}

468
469
470
471
472
473
474
475
/**
 * Tries to create Pops from Load, IncSP combinations.
 * The Loads are replaced by Pops, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
 */
static void peephole_Load_IncSP_to_pop(ir_node *irn)
{

476
	int inc_ofs = -be_get_IncSP_offset(irn);
477
478
479
	if (inc_ofs < 4)
		return;

yb9976's avatar
yb9976 committed
480
481
482
483
484
485
	ir_node  *loads[MAXPUSH_OPTIMIZE];
	unsigned  regmask                 = 0;
	unsigned  copymask                = ~0;

	memset(loads, 0, sizeof(loads));

486
487
488
489
490
491
	/*
	 * We first walk the schedule before the IncSP node as long as we find
	 * suitable Loads that could be transformed to a Pop.
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
yb9976's avatar
yb9976 committed
492
	int      maxslot = -1;
493
494
	ir_node *pred_sp = be_get_IncSP_pred(irn);
	sched_foreach_reverse_before(irn, node) {
495
496
497
		/* it has to be a Load */
		if (!is_ia32_Load(node)) {
			if (be_is_Copy(node)) {
yb9976's avatar
yb9976 committed
498
				const arch_register_t *dreg = arch_get_irn_register(node);
499
				if (dreg->reg_class != &ia32_reg_classes[CLASS_ia32_gp]) {
500
501
502
					/* not a GP copy, ignore */
					continue;
				}
yb9976's avatar
yb9976 committed
503
				const arch_register_t *sreg = arch_get_irn_register(be_get_Copy_op(node));
504
505
506
507
				if (regmask & copymask & (1 << sreg->index)) {
					break;
				}
				if (regmask & copymask & (1 << dreg->index)) {
508
509
					break;
				}
510
511
512
513
				/* we CAN skip Copies if neither the destination nor the source
				 * is not in our regmask, ie none of our future Pop will overwrite it */
				regmask |= (1 << dreg->index) | (1 << sreg->index);
				copymask &= ~((1 << dreg->index) | (1 << sreg->index));
514
515
516
517
518
519
				continue;
			}
			break;
		}

		/* it has to use our predecessor sp value */
520
521
522
523
524
		if (get_irn_n(node, n_ia32_base) != pred_sp) {
			/* it would be ok if this load does not use a Pop result,
			 * but we do not check this */
			break;
		}
525
526

		/* should have NO index */
527
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
528
529
			break;

yb9976's avatar
yb9976 committed
530
		int offset = get_ia32_am_offs_int(node);
531
532
533
534
535
536
537
538
539
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);

		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
			break;

		if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
			continue;
540
541
542
		/* ignore those outside the possible windows */
		if (offset > inc_ofs - 4)
			continue;
yb9976's avatar
yb9976 committed
543
		int loadslot = offset >> 2;
544
545
546
547
548

		/* loading from the same slot twice is bad (and shouldn't happen...) */
		if (loads[loadslot] != NULL)
			break;

yb9976's avatar
yb9976 committed
549
		const arch_register_t *dreg = arch_get_irn_register_out(node, pn_ia32_Load_res);
550
551
552
553
554
555
556
557
558
559
560
561
562
563
		if (regmask & (1 << dreg->index)) {
			/* this register is already used */
			break;
		}
		regmask |= 1 << dreg->index;

		loads[loadslot] = node;
		if (loadslot > maxslot)
			maxslot = loadslot;
	}

	if (maxslot < 0)
		return;

564
	/* find the first slot */
yb9976's avatar
yb9976 committed
565
	int i;
566
567
568
569
570
571
	for (i = maxslot; i >= 0; --i) {
		ir_node *load = loads[i];

		if (load == NULL)
			break;
	}
572

yb9976's avatar
yb9976 committed
573
574
	int ofs = inc_ofs - (maxslot + 1) * 4;
	inc_ofs = (i + 1) * 4;
575

576
	/* create a new IncSP if needed */
yb9976's avatar
yb9976 committed
577
578
	const arch_register_t *esp   = &ia32_registers[REG_ESP];
	ir_node               *const   block = get_nodes_block(irn);
579
	if (inc_ofs > 0) {
580
		pred_sp = ia32_new_IncSP(block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
581
582
583
		sched_add_before(irn, pred_sp);
	}

584
	/* walk through the Loads and create Pops for them */
585
	for (++i; i <= maxslot; ++i) {
yb9976's avatar
yb9976 committed
586
587
588
		ir_node               *load = loads[i];
		ir_node               *mem  = get_irn_n(load, n_ia32_mem);
		const arch_register_t *reg  = arch_get_irn_register_out(load, pn_ia32_Load_res);
589

yb9976's avatar
yb9976 committed
590
		ir_node *pop = new_bd_ia32_Pop(get_irn_dbg_info(load), block, mem, pred_sp);
591
		arch_set_irn_register_out(pop, pn_ia32_Load_res, reg);
Matthias Braun's avatar
Matthias Braun committed
592
		set_ia32_ls_mode(pop, ia32_mode_gp);
593

594
595
		copy_mark(load, pop);

596
		/* create stackpointer Proj */
597
		pred_sp = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_stack);
598
		arch_set_irn_register(pred_sp, esp);
599
600
601
602

		sched_add_before(irn, pop);

		/* rewire now */
603
		foreach_out_edge_safe(load, edge) {
604
605
606
607
608
609
610
611
612
			ir_node *proj = get_edge_src_irn(edge);

			set_Proj_pred(proj, pop);
		}

		/* we can remove the Load now */
		sched_remove(load);
		kill_node(load);
	}
613

614
	be_set_IncSP_offset(irn, -ofs);
615
616
617
618
	be_set_IncSP_pred(irn, pred_sp);
}


619
620
621
/**
 * Find a free GP register if possible, else return NULL.
 */
622
static const arch_register_t *get_free_gp_reg(ir_graph *irg)
623
{
624
	be_irg_t *birg = be_birg_from_irg(irg);
625

yb9976's avatar
yb9976 committed
626
	for (int i = 0; i < N_ia32_gp_REGS; ++i) {
627
		const arch_register_t *reg = &ia32_reg_classes[CLASS_ia32_gp].regs[i];
628
		if (!rbitset_is_set(birg->allocatable_regs, reg->global_index))
629
630
			continue;

631
		if (be_peephole_get_value(reg->global_index) == NULL)
632
			return reg;
633
	}
634
635
636
637

	return NULL;
}

638
639
640
641
642
643
644
645
646
647
648
/**
 * Creates a Pop instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
649
static ir_node *create_pop(dbg_info *dbgi, ir_node *block,
650
                           ir_node *stack, ir_node *schedpoint,
651
                           const arch_register_t *reg)
652
{
653
	const arch_register_t *esp = &ia32_registers[REG_ESP];
yb9976's avatar
yb9976 committed
654
	ir_graph              *irg = get_irn_irg(block);
655

yb9976's avatar
yb9976 committed
656
	ir_node *pop = new_bd_ia32_Pop(dbgi, block, get_irg_no_mem(irg), stack);
Matthias Braun's avatar
Matthias Braun committed
657
	set_ia32_ls_mode(pop, ia32_mode_gp);
658

659
	stack = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_stack);
660
	arch_set_irn_register(stack, esp);
yb9976's avatar
yb9976 committed
661
	ir_node *val = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_res);
662
	arch_set_irn_register(val, reg);
663
664
665

	sched_add_before(schedpoint, pop);

yb9976's avatar
yb9976 committed
666
667
	ir_node *in[1] = { val };
	ir_node *keep  = be_new_Keep(block, 1, in);
668
669
670
671
672
	sched_add_before(schedpoint, keep);

	return stack;
}

673
/**
Michael Beck's avatar
Michael Beck committed
674
 * Optimize an IncSp by replacing it with Push/Pop.
675
 */
676
677
static void peephole_be_IncSP(ir_node *node)
{
678
	const arch_register_t *esp = &ia32_registers[REG_ESP];
679
680

	/* first optimize incsp->incsp combinations */
681
	node = be_peephole_IncSP_IncSP(node);
682

683
684
685
	/* transform IncSP->Store combinations to Push where possible */
	peephole_IncSP_Store_to_push(node);

686
687
688
	/* transform Load->IncSP combinations to Pop where possible */
	peephole_Load_IncSP_to_pop(node);

689
	if (arch_get_irn_register(node) != esp)
690
691
		return;

692
	/* replace IncSP -4 by Pop freereg when possible */
yb9976's avatar
yb9976 committed
693
	int offset = be_get_IncSP_offset(node);
694
695
696
697
	if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
	    (offset != -4 || ia32_cg_config.use_add_esp_4) &&
	    (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
	    (offset != +8 || ia32_cg_config.use_sub_esp_8))
698
699
		return;

yb9976's avatar
yb9976 committed
700
701
	ir_node *stack;

702
703
	if (offset < 0) {
		/* we need a free register for pop */
yb9976's avatar
yb9976 committed
704
		const arch_register_t *reg = get_free_gp_reg(get_irn_irg(node));
705
		if (reg == NULL)
706
			return;
707

yb9976's avatar
yb9976 committed
708
709
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);
710

yb9976's avatar
yb9976 committed
711
		stack = be_get_IncSP_pred(node);
712
		stack = create_pop(dbgi, block, stack, node, reg);
713

714
		if (offset == -8) {
715
			stack = create_pop(dbgi, block, stack, node, reg);
716
		}
717
	} else {
yb9976's avatar
yb9976 committed
718
719
720
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);

721
		stack = be_get_IncSP_pred(node);
722
		stack = new_bd_ia32_PushEax(dbgi, block, stack);
723
724
		arch_set_irn_register(stack, esp);
		sched_add_before(node, stack);
725
726

		if (offset == +8) {
727
			stack = new_bd_ia32_PushEax(dbgi, block, stack);
728
729
			arch_set_irn_register(stack, esp);
			sched_add_before(node, stack);
730
		}
731
732
	}

Christoph Mallon's avatar
Christoph Mallon committed
733
	be_peephole_exchange(node, stack);
734
735
}

Michael Beck's avatar
Michael Beck committed
736
/**
737
 * Peephole optimization for ia32_Const's
Michael Beck's avatar
Michael Beck committed
738
 */
739
static void peephole_ia32_Const(ir_node *node)
740
{
741
742
743
	const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);

	/* try to transform a mov 0, reg to xor reg reg */
744
	if (attr->offset != 0 || attr->entity != NULL)
745
746
		return;
	if (ia32_cg_config.use_mov_0)
747
		return;
Michael Beck's avatar
Michael Beck committed
748
	/* xor destroys the flags, so no-one must be using them */
749
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
750
		return;
751

yb9976's avatar
yb9976 committed
752
	const arch_register_t *reg = arch_get_irn_register(node);
753
754
	assert(be_peephole_get_reg_value(reg) == NULL);

yb9976's avatar
yb9976 committed
755
756
757
	ir_node  *block = get_nodes_block(node);
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *xorn  = new_bd_ia32_Xor0(dbgi, block);
758
	arch_set_irn_register(xorn, reg);
759

760
	sched_add_before(node, xorn);
761

762
763
	copy_mark(node, xorn);
	be_peephole_exchange(node, xorn);
764
}
765

766
static inline int is_noreg(const ir_node *node)
767
{
768
	return is_ia32_NoReg_GP(node);
769
770
}

771
ir_node *ia32_immediate_from_long(ir_graph *const irg, long const val)
772
{
773
774
	ir_node *start_block = get_irg_start_block(irg);
	ir_node *immediate   = new_bd_ia32_Immediate(NULL, start_block, NULL, 0, val);
775
	arch_set_irn_register(immediate, &ia32_registers[REG_GP_NOREG]);
776
777
778
779

	return immediate;
}

780
static ir_node *create_immediate_from_am(const ir_node *node)
781
{
yb9976's avatar
yb9976 committed
782
783
784
785
	ir_node           *block            = get_nodes_block(node);
	int                offset           = get_ia32_am_offs_int(node);
	const ia32_attr_t *attr             = get_ia32_attr_const(node);
	int                sc_no_pic_adjust = attr->data.am_sc_no_pic_adjust;
786
	ir_entity         *entity           = get_ia32_am_ent(node);
787

yb9976's avatar
yb9976 committed
788
	ir_node *res = new_bd_ia32_Immediate(NULL, block, entity, sc_no_pic_adjust, offset);
789
	arch_set_irn_register(res, &ia32_registers[REG_GP_NOREG]);
790
791
792
793
794
795
	return res;
}

static int is_am_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
796
	ir_entity *entity  = get_ia32_am_ent(node);
797
798
799
800
801
802
803

	return offset == 1 && entity == NULL;
}

static int is_am_minus_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
804
	ir_entity *entity  = get_ia32_am_ent(node);
805
806
807
808
809
810
811
812
813
814
815

	return offset == -1 && entity == NULL;
}

/**
 * Transforms a LEA into an Add or SHL if possible.
 */
static void peephole_ia32_Lea(ir_node *node)
{
	assert(is_ia32_Lea(node));

Christoph Mallon's avatar
Christoph Mallon committed
816
	/* we can only do this if it is allowed to clobber the flags */
817
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
818
819
		return;

yb9976's avatar
yb9976 committed
820
821
822
823
	ir_node *base  = get_irn_n(node, n_ia32_Lea_base);
	ir_node *index = get_irn_n(node, n_ia32_Lea_index);
	const arch_register_t *base_reg;
	const arch_register_t *index_reg;
824

825
	if (is_noreg(base)) {
826
827
828
		base     = NULL;
		base_reg = NULL;
	} else {
829
		base_reg = arch_get_irn_register(base);
830
	}
831
	if (is_noreg(index)) {
832
833
834
		index     = NULL;
		index_reg = NULL;
	} else {
835
		index_reg = arch_get_irn_register(index);
836
837
	}

838
	if (base == NULL && index == NULL) {
839
840
		/* we shouldn't construct these in the first place... */
#ifdef DEBUG_libfirm
yb9976's avatar
yb9976 committed
841
		ir_fprintf(stderr, "Optimization warning: found immediate only lea\n");
842
843
844
845
#endif
		return;
	}

yb9976's avatar
yb9976 committed
846
847
848
849
850
	const arch_register_t *out_reg        = arch_get_irn_register(node);
	int                    scale          = get_ia32_am_scale(node);
	int                    has_immediates;
	ir_node               *op1;
	ir_node               *op2;
851
852
853
	assert(!is_ia32_need_stackent(node) || get_ia32_frame_ent(node) != NULL);
	/* check if we have immediates values (frame entities should already be
	 * expressed in the offsets) */
854
	if (get_ia32_am_offs_int(node) != 0 || get_ia32_am_ent(node) != NULL) {
855
856
857
858
859
860
861
		has_immediates = 1;
	} else {
		has_immediates = 0;
	}

	/* we can transform leas where the out register is the same as either the
	 * base or index register back to an Add or Shl */
862
863
	if (out_reg == base_reg) {
		if (index == NULL) {
864
#ifdef DEBUG_libfirm
865
			if (!has_immediates) {
yb9976's avatar
yb9976 committed
866
				ir_fprintf(stderr, "Optimization warning: found lea which is "
867
868
869
870
871
872
				           "just a copy\n");
			}
#endif
			op1 = base;
			goto make_add_immediate;
		}