ia32_optimize.c 33.2 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
 * @file
 * @brief       Implements several optimizations for IA32.
9
 * @author      Matthias Braun, Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11
12
13
#include "irnode.h"
#include "irprog_t.h"
#include "ircons.h"
14
#include "irtools.h"
15
#include "firm_types.h"
Christian Würdig's avatar
Christian Würdig committed
16
17
18
#include "iredges.h"
#include "tv.h"
#include "irgmod.h"
19
#include "irgwalk.h"
20
#include "heights.h"
21
#include "irprintf.h"
22
#include "irdump.h"
Matthias Braun's avatar
Matthias Braun committed
23
#include "panic.h"
24
#include "firmstat_t.h"
Christian Würdig's avatar
Christian Würdig committed
25

26
27
28
29
#include "be_t.h"
#include "benode.h"
#include "besched.h"
#include "bepeephole.h"
30
31

#include "ia32_new_nodes.h"
32
#include "ia32_optimize.h"
33
#include "bearch_ia32_t.h"
Matthias Braun's avatar
Matthias Braun committed
34
#include "gen_ia32_regalloc_if.h"
35
#include "ia32_common_transform.h"
Christian Würdig's avatar
Christian Würdig committed
36
#include "ia32_transform.h"
37
#include "ia32_architecture.h"
38

39
40
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

41
static void copy_mark(const ir_node *old, ir_node *newn)
42
43
{
	if (is_ia32_is_reload(old))
44
		set_ia32_is_reload(newn);
45
	if (is_ia32_is_spill(old))
46
		set_ia32_is_spill(newn);
47
	if (is_ia32_is_remat(old))
48
		set_ia32_is_remat(newn);
49
50
}

51
52
typedef enum produces_flag_t {
	produces_no_flag,
53
54
	produces_zero_sign,
	produces_zero_in_carry
55
56
} produces_flag_t;

Michael Beck's avatar
Michael Beck committed
57
/**
58
59
60
 * Return which usable flag the given node produces about the result.
 * That is zero (ZF) and sign(SF).
 * We do not check for carry (CF) or overflow (OF).
Michael Beck's avatar
Michael Beck committed
61
62
 *
 * @param node  the node to check
63
 * @param pn    the projection number of the used result
Michael Beck's avatar
Michael Beck committed
64
 */
65
static produces_flag_t check_produces_zero_sign(ir_node *node, unsigned pn)
66
{
Michael Beck's avatar
Michael Beck committed
67
	if (!is_ia32_irn(node))
68
		return produces_no_flag;
69

yb9976's avatar
yb9976 committed
70
71
	ir_node *count;

Michael Beck's avatar
Michael Beck committed
72
	switch (get_ia32_irn_opcode(node)) {
73
74
75
76
77
78
79
80
81
82
83
84
85
86
		case iro_ia32_Add:
		case iro_ia32_Adc:
		case iro_ia32_And:
		case iro_ia32_Or:
		case iro_ia32_Xor:
		case iro_ia32_Sub:
		case iro_ia32_Sbb:
		case iro_ia32_Neg:
		case iro_ia32_Inc:
		case iro_ia32_Dec:
			break;

		case iro_ia32_ShlD:
		case iro_ia32_ShrD:
87
			assert((int)n_ia32_ShlD_count == (int)n_ia32_ShrD_count);
88
89
90
91
92
93
			count = get_irn_n(node, n_ia32_ShlD_count);
			goto check_shift_amount;

		case iro_ia32_Shl:
		case iro_ia32_Shr:
		case iro_ia32_Sar:
94
95
			assert((int)n_ia32_Shl_count == (int)n_ia32_Shr_count
					&& (int)n_ia32_Shl_count == (int)n_ia32_Sar_count);
96
			count = get_irn_n(node, n_ia32_Shl_count);
Christoph Mallon's avatar
Christoph Mallon committed
97
check_shift_amount:
98
99
100
101
102
			/* when shift count is zero the flags are not affected, so we can only
			 * do this for constants != 0 */
			if (!is_ia32_Immediate(count))
				return produces_no_flag;

yb9976's avatar
yb9976 committed
103
			const ia32_immediate_attr_t *imm_attr = get_ia32_immediate_attr_const(count);
104
			if (imm_attr->entity != NULL)
105
106
107
108
109
110
111
				return produces_no_flag;
			if ((imm_attr->offset & 0x1f) == 0)
				return produces_no_flag;
			break;

		case iro_ia32_Mul:
			return pn == pn_ia32_Mul_res_high ?
112
				produces_zero_in_carry : produces_no_flag;
113
114
115

		default:
			return produces_no_flag;
116
	}
117

118
	return pn == pn_ia32_res ? produces_zero_sign : produces_no_flag;
119
120
}

121
122
123
124
125
126
127
128
/**
 * Replace Cmp(x, 0) by a Test(x, x)
 */
static void peephole_ia32_Cmp(ir_node *const node)
{
	if (get_ia32_op_type(node) != ia32_Normal)
		return;

129
	ir_node *const right = get_irn_n(node, n_ia32_Cmp_right);
130
131
132
	if (!is_ia32_Immediate(right))
		return;

133
	ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
134
	if (imm->entity != NULL || imm->offset != 0)
135
136
		return;

137
138
	dbg_info *const dbgi         = get_irn_dbg_info(node);
	ir_node  *const block        = get_nodes_block(node);
139
	ir_graph *const irg          = get_irn_irg(node);
140
141
142
	ir_node  *const noreg        = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem        = get_irg_no_mem(irg);
	ir_node  *const op           = get_irn_n(node, n_ia32_Cmp_left);
143
	int       const ins_permuted = get_ia32_attr(node)->ins_permuted;
144

145
146
147
148
149
	ir_mode *const ls_mode = get_ia32_ls_mode(node);
	ir_node *const test    = get_mode_size_bits(ls_mode) == 8
		? new_bd_ia32_Test_8bit(dbgi, block, noreg, noreg, nomem, op, op, ins_permuted)
		: new_bd_ia32_Test     (dbgi, block, noreg, noreg, nomem, op, op, ins_permuted);
	set_ia32_ls_mode(test, ls_mode);
150

151
	arch_register_t const *const reg = arch_get_irn_register_out(node, pn_ia32_Cmp_eflags);
152
	arch_set_irn_register_out(test, pn_ia32_Test_eflags, reg);
153

154
	foreach_out_edge_safe(node, edge) {
155
156
157
158
159
160
		ir_node *const user = get_edge_src_irn(edge);

		if (is_Proj(user))
			exchange(user, test);
	}

161
	sched_add_before(node, test);
162
	copy_mark(node, test);
163
164
165
	be_peephole_exchange(node, test);
}

Michael Beck's avatar
Michael Beck committed
166
167
/**
 * Peephole optimization for Test instructions.
168
 * - Remove the Test, if an appropriate flag was produced which is still live
169
 * - Change a Test(x, c) to 8Bit, if 0 <= c < 128 (3 byte shorter opcode)
Michael Beck's avatar
Michael Beck committed
170
 */
171
172
static void peephole_ia32_Test(ir_node *node)
{
173
174
	ir_node *left  = get_irn_n(node, n_ia32_Test_left);
	ir_node *right = get_irn_n(node, n_ia32_Test_right);
175

176
	if (left == right) { /* we need a test for 0 */
yb9976's avatar
yb9976 committed
177
		ir_node *block = get_nodes_block(node);
178
		if (get_nodes_block(left) != block)
179
180
			return;

181
		unsigned pn = pn_ia32_res;
yb9976's avatar
yb9976 committed
182
		ir_node *op = left;
183
		if (is_Proj(op)) {
184
			pn = get_Proj_num(op);
185
			op = get_Proj_pred(op);
186
		}
187

188
189
		/* walk schedule up and abort when we find left or some other node
		 * destroys the flags */
yb9976's avatar
yb9976 committed
190
		ir_node *schedpoint = node;
191
192
		for (;;) {
			schedpoint = sched_prev(schedpoint);
193
			if (schedpoint == op)
194
195
196
197
198
				break;
			if (arch_irn_is(schedpoint, modify_flags))
				return;
			if (schedpoint == block)
				panic("couldn't find left");
199
200
		}

yb9976's avatar
yb9976 committed
201
		produces_flag_t produced = check_produces_zero_sign(op, pn);
202
203
204
205
		if (produced == produces_no_flag)
			return;

		/* make sure users only look at the sign/zero flag */
206
		foreach_out_edge(node, edge) {
yb9976's avatar
yb9976 committed
207
208
209
210
211
212
213
214
215
216
217
			ir_node *user = get_edge_src_irn(edge);
			if (is_ia32_CMovcc(user) || is_ia32_Jcc(user) ||
			    is_ia32_Setcc(user) || is_ia32_SetccMem(user)) {
				x86_condition_code_t  cc  = get_ia32_condcode(user);

				if (cc == x86_cc_equal || cc == x86_cc_not_equal)
					continue;
				if (produced == produces_zero_sign
					&& (cc == x86_cc_sign || cc == x86_cc_not_sign)) {
					continue;
				}
218
			}
219
			return;
220
		}
221

yb9976's avatar
yb9976 committed
222
		ir_mode *op_mode = get_ia32_ls_mode(op);
yb9976's avatar
yb9976 committed
223
224
225
226
227
228
229
		if (op_mode == NULL)
			op_mode = get_irn_mode(op);

		/* Make sure we operate on the same bit size */
		if (get_mode_size_bits(op_mode) != get_mode_size_bits(get_ia32_ls_mode(node)))
			return;

230
231
232
233
		if (produced == produces_zero_in_carry) {
			/* patch users to look at the carry instead of the zero flag */
			foreach_out_edge(node, edge) {
				ir_node              *user = get_edge_src_irn(edge);
234
				x86_condition_code_t  cc   = get_ia32_condcode(user);
235
236

				switch (cc) {
237
238
				case x86_cc_equal:     cc = x86_cc_above_equal; break;
				case x86_cc_not_equal: cc = x86_cc_below;       break;
239
240
241
242
243
244
				default: panic("unexpected pn");
				}
				set_ia32_condcode(user, cc);
			}
		}

245
246
		if (get_irn_mode(op) != mode_T) {
			set_irn_mode(op, mode_T);
247
248

			/* If there are other users, reroute them to result proj */
249
			if (get_irn_n_edges(op) != 2) {
250
				ir_node *res = new_r_Proj(op, ia32_mode_gp, pn_ia32_res);
251
				edges_reroute_except(op, res, res);
252
			}
253
254
255
		} else {
			if (get_irn_n_edges(left) == 2)
				kill_node(left);
256
		}
257

yb9976's avatar
yb9976 committed
258
259
		ir_mode *flags_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
		ir_node *flags_proj = new_r_Proj(op, flags_mode, pn_ia32_flags);
260
		arch_set_irn_register(flags_proj, &ia32_registers[REG_EFLAGS]);
261

262
		assert(get_irn_mode(node) != mode_T);
263

264
265
266
267
		be_peephole_exchange(node, flags_proj);
	} else if (is_ia32_Immediate(right)) {
		ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);

268
269
		/* A test with an entity is rather strange, but better safe than sorry */
		if (imm->entity != NULL)
270
271
			return;

272
273
274
275
276
		/*
		 * We have to take care that we end up with the same sign flag:
		 * testl(128, 128) -> SF=0
		 * testb(128, 128) -> SF=1
		 */
yb9976's avatar
yb9976 committed
277
		unsigned offset = imm->offset;
278
		if (get_ia32_op_type(node) == ia32_AddrModeS) {
279
			ia32_attr_t *const attr = get_ia32_attr(node);
280
			ir_graph    *const irg  = get_irn_irg(node);
281

282
			if ((offset & 0xFFFFFF80) == 0) {
283
				/* attr->am_offs += 0; */
284
			} else if ((offset & 0xFFFF80FF) == 0) {
285
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >>  8);
286
				set_irn_n(node, n_ia32_Test_right, imm_node);
287
				attr->am_offs += 1;
288
			} else if ((offset & 0xFF80FFFF) == 0) {
289
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >> 16);
290
				set_irn_n(node, n_ia32_Test_right, imm_node);
291
292
				attr->am_offs += 2;
			} else if ((offset & 0x00FFFFFF) == 0) {
293
				ir_node *imm_node = ia32_create_Immediate(irg, NULL, offset >> 24);
294
				set_irn_n(node, n_ia32_Test_right, imm_node);
295
296
297
298
				attr->am_offs += 3;
			} else {
				return;
			}
299
		} else if ((offset & 0xFFFFFF80) == 0) {
300
301
			arch_register_t const* const reg = arch_get_irn_register(left);

302
303
304
305
			if (reg != &ia32_registers[REG_EAX] &&
					reg != &ia32_registers[REG_EBX] &&
					reg != &ia32_registers[REG_ECX] &&
					reg != &ia32_registers[REG_EDX]) {
306
307
				return;
			}
308
309
		} else {
			return;
310
311
312
313
314
315
		}

		/* Technically we should build a Test8Bit because of the register
		 * constraints, but nobody changes registers at this point anymore. */
		set_ia32_ls_mode(node, mode_Bu);
	}
316
317
}

318
319
320
321
322
/**
 * AMD Athlon works faster when RET is not destination of
 * conditional jump or directly preceded by other jump instruction.
 * Can be avoided by placing a Rep prefix before the return.
 */
323
324
static void peephole_ia32_Return(ir_node *node)
{
325
326
327
328
	if (!ia32_cg_config.use_pad_return)
		return;

	/* check if this return is the first on the block */
329
	sched_foreach_reverse_before(node, irn) {
330
		if (is_Phi(irn) || is_ia32_Start(irn))
331
			continue;
332
333
		/* arg, IncSP 0 nodes might occur, ignore these */
		if (be_is_IncSP(irn) && be_get_IncSP_offset(irn) == 0)
334
			continue;
335
		return;
336
	}
337

338
	/* ensure, that the 3 byte return is generated */
339
340
	ia32_return_attr_t *attr = get_ia32_return_attr(node);
	attr->emit_pop = true;
341
342
343
}

/* only optimize up to 48 stores behind IncSPs */
344
#define MAXPUSH_OPTIMIZE    48
345
346

/**
Michael Beck's avatar
Michael Beck committed
347
 * Tries to create Push's from IncSP, Store combinations.
348
349
 * The Stores are replaced by Push's, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
350
 */
351
static void peephole_IncSP_Store_to_push(ir_node *irn)
352
{
353
	int inc_ofs = be_get_IncSP_offset(irn);
354
	if (inc_ofs < 4)
355
356
		return;

yb9976's avatar
yb9976 committed
357
358
359
360
361
	int      maxslot                  = -1;
	ir_node *stores[MAXPUSH_OPTIMIZE];

	memset(stores, 0, sizeof(stores));

362
363
	/*
	 * We first walk the schedule after the IncSP node as long as we find
364
	 * suitable Stores that could be transformed to a Push.
365
366
367
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
368
	sched_foreach_after(irn, node) {
369
370
		/* it has to be a Store */
		if (!is_ia32_Store(node))
371
372
			break;

373
374
		/* it has to use our sp value */
		if (get_irn_n(node, n_ia32_base) != irn)
375
			continue;
376
		/* Store has to be attached to NoMem */
yb9976's avatar
yb9976 committed
377
		ir_node *mem = get_irn_n(node, n_ia32_mem);
378
		if (!is_NoMem(mem))
379
380
			continue;

381
382
		/* unfortunately we can't support the full AMs possible for push at the
		 * moment. TODO: fix this */
383
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
384
385
			break;

yb9976's avatar
yb9976 committed
386
		int offset = get_ia32_am_offs_int(node);
387
388
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);
389

390
391
		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
392
393
			break;

394
		if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
395
			continue;
yb9976's avatar
yb9976 committed
396
397

		int storeslot = offset >> 2;
398
399
400

		/* storing into the same slot twice is bad (and shouldn't happen...) */
		if (stores[storeslot] != NULL)
401
402
403
			break;

		stores[storeslot] = node;
404
405
		if (storeslot > maxslot)
			maxslot = storeslot;
406
407
	}

yb9976's avatar
yb9976 committed
408
409
	ir_node *curr_sp = irn;
	int      i;
410
411
412
413
	for (i = -1; i < maxslot; ++i) {
		if (stores[i + 1] == NULL)
			break;
	}
414

415
	/* walk through the Stores and create Pushs for them */
yb9976's avatar
yb9976 committed
416
417
418
419
	ir_node  *block      = get_nodes_block(irn);
	ir_mode  *spmode     = get_irn_mode(irn);
	ir_graph *irg        = get_irn_irg(irn);
	ir_node  *first_push = NULL;
420
	for (; i >= 0; --i) {
yb9976's avatar
yb9976 committed
421
422
423
424
425
426
427
428
		ir_node               *store = stores[i];
		ir_node               *noreg = ia32_new_NoReg_gp(irg);
		ir_node               *val   = get_irn_n(store, n_ia32_unary_op);
		ir_node               *mem   = get_irn_n(store, n_ia32_mem);
		const arch_register_t *spreg = arch_get_irn_register(curr_sp);

		ir_node *push = new_bd_ia32_Push(get_irn_dbg_info(store), block, noreg, noreg,
		                                 mem, val, curr_sp);
Matthias Braun's avatar
Matthias Braun committed
429
		set_ia32_ls_mode(push, ia32_mode_gp);
430
		copy_mark(store, push);
431

432
433
434
		if (first_push == NULL)
			first_push = push;

435
		sched_add_after(skip_Proj(curr_sp), push);
436

437
		/* create stackpointer Proj */
438
		curr_sp = new_r_Proj(push, spmode, pn_ia32_Push_stack);
439
		arch_set_irn_register(curr_sp, spreg);
Michael Beck's avatar
Michael Beck committed
440

441
		/* use the memproj now */
442
		be_peephole_exchange(store, push);
Matthias Braun's avatar
bugfix    
Matthias Braun committed
443

444
		inc_ofs -= 4;
445
446
	}

447
	foreach_out_edge_safe(irn, edge) {
448
449
450
451
452
		ir_node *const src = get_edge_src_irn(edge);

		if (src == first_push)
			continue;

yb9976's avatar
yb9976 committed
453
		const int pos = get_edge_src_pos(edge);
454
455
456
		set_irn_n(src, pos, curr_sp);
	}

457
	be_set_IncSP_offset(irn, inc_ofs);
458
459
}

460
461
462
463
464
465
466
467
/**
 * Tries to create Pops from Load, IncSP combinations.
 * The Loads are replaced by Pops, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
 */
static void peephole_Load_IncSP_to_pop(ir_node *irn)
{

468
	int inc_ofs = -be_get_IncSP_offset(irn);
469
470
471
	if (inc_ofs < 4)
		return;

yb9976's avatar
yb9976 committed
472
473
474
475
476
477
	ir_node  *loads[MAXPUSH_OPTIMIZE];
	unsigned  regmask                 = 0;
	unsigned  copymask                = ~0;

	memset(loads, 0, sizeof(loads));

478
479
480
481
482
483
	/*
	 * We first walk the schedule before the IncSP node as long as we find
	 * suitable Loads that could be transformed to a Pop.
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
yb9976's avatar
yb9976 committed
484
	int      maxslot = -1;
485
486
	ir_node *pred_sp = be_get_IncSP_pred(irn);
	sched_foreach_reverse_before(irn, node) {
487
488
489
		/* it has to be a Load */
		if (!is_ia32_Load(node)) {
			if (be_is_Copy(node)) {
yb9976's avatar
yb9976 committed
490
				const arch_register_t *dreg = arch_get_irn_register(node);
491
				if (dreg->cls != &ia32_reg_classes[CLASS_ia32_gp]) {
492
493
494
					/* not a GP copy, ignore */
					continue;
				}
yb9976's avatar
yb9976 committed
495
				const arch_register_t *sreg = arch_get_irn_register(be_get_Copy_op(node));
496
497
498
499
				if (regmask & copymask & (1 << sreg->index)) {
					break;
				}
				if (regmask & copymask & (1 << dreg->index)) {
500
501
					break;
				}
502
503
504
505
				/* we CAN skip Copies if neither the destination nor the source
				 * is not in our regmask, ie none of our future Pop will overwrite it */
				regmask |= (1 << dreg->index) | (1 << sreg->index);
				copymask &= ~((1 << dreg->index) | (1 << sreg->index));
506
507
508
509
510
511
				continue;
			}
			break;
		}

		/* it has to use our predecessor sp value */
512
513
514
515
516
		if (get_irn_n(node, n_ia32_base) != pred_sp) {
			/* it would be ok if this load does not use a Pop result,
			 * but we do not check this */
			break;
		}
517
518

		/* should have NO index */
519
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
520
521
			break;

yb9976's avatar
yb9976 committed
522
		int offset = get_ia32_am_offs_int(node);
523
524
525
526
527
528
529
530
531
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);

		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
			break;

		if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
			continue;
532
533
534
		/* ignore those outside the possible windows */
		if (offset > inc_ofs - 4)
			continue;
yb9976's avatar
yb9976 committed
535
		int loadslot = offset >> 2;
536
537
538
539
540

		/* loading from the same slot twice is bad (and shouldn't happen...) */
		if (loads[loadslot] != NULL)
			break;

yb9976's avatar
yb9976 committed
541
		const arch_register_t *dreg = arch_get_irn_register_out(node, pn_ia32_Load_res);
542
543
544
545
546
547
548
549
550
551
552
553
554
555
		if (regmask & (1 << dreg->index)) {
			/* this register is already used */
			break;
		}
		regmask |= 1 << dreg->index;

		loads[loadslot] = node;
		if (loadslot > maxslot)
			maxslot = loadslot;
	}

	if (maxslot < 0)
		return;

556
	/* find the first slot */
yb9976's avatar
yb9976 committed
557
	int i;
558
559
560
561
562
563
	for (i = maxslot; i >= 0; --i) {
		ir_node *load = loads[i];

		if (load == NULL)
			break;
	}
564

yb9976's avatar
yb9976 committed
565
566
	int ofs = inc_ofs - (maxslot + 1) * 4;
	inc_ofs = (i + 1) * 4;
567

568
	/* create a new IncSP if needed */
yb9976's avatar
yb9976 committed
569
570
	const arch_register_t *esp   = &ia32_registers[REG_ESP];
	ir_node               *const   block = get_nodes_block(irn);
571
	if (inc_ofs > 0) {
572
		pred_sp = ia32_new_IncSP(block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
573
574
575
		sched_add_before(irn, pred_sp);
	}

576
	/* walk through the Loads and create Pops for them */
577
	for (++i; i <= maxslot; ++i) {
yb9976's avatar
yb9976 committed
578
579
580
		ir_node               *load = loads[i];
		ir_node               *mem  = get_irn_n(load, n_ia32_mem);
		const arch_register_t *reg  = arch_get_irn_register_out(load, pn_ia32_Load_res);
581

yb9976's avatar
yb9976 committed
582
		ir_node *pop = new_bd_ia32_Pop(get_irn_dbg_info(load), block, mem, pred_sp);
583
		arch_set_irn_register_out(pop, pn_ia32_Load_res, reg);
Matthias Braun's avatar
Matthias Braun committed
584
		set_ia32_ls_mode(pop, ia32_mode_gp);
585

586
587
		copy_mark(load, pop);

588
		/* create stackpointer Proj */
589
		pred_sp = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_stack);
590
		arch_set_irn_register(pred_sp, esp);
591
592

		sched_add_before(irn, pop);
593
		be_peephole_exchange(load, pop);
594
	}
595

596
	be_set_IncSP_offset(irn, -ofs);
597
598
599
600
	be_set_IncSP_pred(irn, pred_sp);
}


601
602
603
/**
 * Find a free GP register if possible, else return NULL.
 */
604
static const arch_register_t *get_free_gp_reg(ir_graph *irg)
605
{
606
	be_irg_t *birg = be_birg_from_irg(irg);
607

yb9976's avatar
yb9976 committed
608
	for (int i = 0; i < N_ia32_gp_REGS; ++i) {
609
		const arch_register_t *reg = &ia32_reg_classes[CLASS_ia32_gp].regs[i];
610
		if (!rbitset_is_set(birg->allocatable_regs, reg->global_index))
611
612
			continue;

613
		if (be_peephole_get_value(reg->global_index) == NULL)
614
			return reg;
615
	}
616
617
618
619

	return NULL;
}

620
621
622
623
624
625
626
627
628
629
630
/**
 * Creates a Pop instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
631
static ir_node *create_pop(dbg_info *dbgi, ir_node *block,
632
                           ir_node *stack, ir_node *schedpoint,
633
                           const arch_register_t *reg)
634
{
635
	const arch_register_t *esp = &ia32_registers[REG_ESP];
yb9976's avatar
yb9976 committed
636
	ir_graph              *irg = get_irn_irg(block);
637

yb9976's avatar
yb9976 committed
638
	ir_node *pop = new_bd_ia32_Pop(dbgi, block, get_irg_no_mem(irg), stack);
Matthias Braun's avatar
Matthias Braun committed
639
	set_ia32_ls_mode(pop, ia32_mode_gp);
640

641
	stack = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_stack);
642
	arch_set_irn_register(stack, esp);
yb9976's avatar
yb9976 committed
643
	ir_node *val = new_r_Proj(pop, ia32_mode_gp, pn_ia32_Pop_res);
644
	arch_set_irn_register(val, reg);
645
646
647

	sched_add_before(schedpoint, pop);

yb9976's avatar
yb9976 committed
648
649
	ir_node *in[1] = { val };
	ir_node *keep  = be_new_Keep(block, 1, in);
650
651
652
653
654
	sched_add_before(schedpoint, keep);

	return stack;
}

655
/**
Michael Beck's avatar
Michael Beck committed
656
 * Optimize an IncSp by replacing it with Push/Pop.
657
 */
658
659
static void peephole_be_IncSP(ir_node *node)
{
660
	const arch_register_t *esp = &ia32_registers[REG_ESP];
661
662

	/* first optimize incsp->incsp combinations */
663
	node = be_peephole_IncSP_IncSP(node);
664

665
666
667
	/* transform IncSP->Store combinations to Push where possible */
	peephole_IncSP_Store_to_push(node);

668
669
670
	/* transform Load->IncSP combinations to Pop where possible */
	peephole_Load_IncSP_to_pop(node);

671
	if (arch_get_irn_register(node) != esp)
672
673
		return;

674
	/* replace IncSP -4 by Pop freereg when possible */
yb9976's avatar
yb9976 committed
675
	int offset = be_get_IncSP_offset(node);
676
677
678
679
	if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
	    (offset != -4 || ia32_cg_config.use_add_esp_4) &&
	    (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
	    (offset != +8 || ia32_cg_config.use_sub_esp_8))
680
681
		return;

yb9976's avatar
yb9976 committed
682
683
	ir_node *stack;

684
685
	if (offset < 0) {
		/* we need a free register for pop */
yb9976's avatar
yb9976 committed
686
		const arch_register_t *reg = get_free_gp_reg(get_irn_irg(node));
687
		if (reg == NULL)
688
			return;
689

yb9976's avatar
yb9976 committed
690
691
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);
692

yb9976's avatar
yb9976 committed
693
		stack = be_get_IncSP_pred(node);
694
		stack = create_pop(dbgi, block, stack, node, reg);
695

696
		if (offset == -8) {
697
			stack = create_pop(dbgi, block, stack, node, reg);
698
		}
699
	} else {
yb9976's avatar
yb9976 committed
700
701
702
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);

703
		stack = be_get_IncSP_pred(node);
704
		stack = new_bd_ia32_PushEax(dbgi, block, stack);
705
706
		arch_set_irn_register(stack, esp);
		sched_add_before(node, stack);
707
708

		if (offset == +8) {
709
			stack = new_bd_ia32_PushEax(dbgi, block, stack);
710
711
			arch_set_irn_register(stack, esp);
			sched_add_before(node, stack);
712
		}
713
714
	}

Christoph Mallon's avatar
Christoph Mallon committed
715
	be_peephole_exchange(node, stack);
716
717
}

Michael Beck's avatar
Michael Beck committed
718
/**
719
 * Peephole optimization for ia32_Const's
Michael Beck's avatar
Michael Beck committed
720
 */
721
static void peephole_ia32_Const(ir_node *node)
722
{
723
724
725
	const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);

	/* try to transform a mov 0, reg to xor reg reg */
726
	if (attr->offset != 0 || attr->entity != NULL)
727
728
		return;
	if (ia32_cg_config.use_mov_0)
729
		return;
Michael Beck's avatar
Michael Beck committed
730
	/* xor destroys the flags, so no-one must be using them */
731
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
732
		return;
733

yb9976's avatar
yb9976 committed
734
	const arch_register_t *reg = arch_get_irn_register(node);
735
736
	assert(be_peephole_get_reg_value(reg) == NULL);

yb9976's avatar
yb9976 committed
737
738
739
	ir_node  *block = get_nodes_block(node);
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *xorn  = new_bd_ia32_Xor0(dbgi, block);
740
	arch_set_irn_register(xorn, reg);
741

742
	sched_add_before(node, xorn);
743

744
745
	copy_mark(node, xorn);
	be_peephole_exchange(node, xorn);
746
}
747

748
static inline int is_noreg(const ir_node *node)
749
{
750
	return is_ia32_NoReg_GP(node);
751
752
}

753
ir_node *ia32_immediate_from_long(ir_graph *const irg, long const val)
754
{
755
756
	ir_node *start_block = get_irg_start_block(irg);
	ir_node *immediate   = new_bd_ia32_Immediate(NULL, start_block, NULL, 0, val);
757
	arch_set_irn_register(immediate, &ia32_registers[REG_GP_NOREG]);
758
759
760
761

	return immediate;
}

762
static ir_node *create_immediate_from_am(const ir_node *node)
763
{
yb9976's avatar
yb9976 committed
764
765
766
	ir_node           *block            = get_nodes_block(node);
	int                offset           = get_ia32_am_offs_int(node);
	const ia32_attr_t *attr             = get_ia32_attr_const(node);
767
	int                sc_no_pic_adjust = attr->am_sc_no_pic_adjust;
768
	ir_entity         *entity           = get_ia32_am_ent(node);
769

yb9976's avatar
yb9976 committed
770
	ir_node *res = new_bd_ia32_Immediate(NULL, block, entity, sc_no_pic_adjust, offset);
771
	arch_set_irn_register(res, &ia32_registers[REG_GP_NOREG]);
772
773
774
775
776
777
	return res;
}

static int is_am_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
778
	ir_entity *entity  = get_ia32_am_ent(node);
779
780
781
782
783
784
785

	return offset == 1 && entity == NULL;
}

static int is_am_minus_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
786
	ir_entity *entity  = get_ia32_am_ent(node);
787
788
789
790
791
792
793
794
795
796
797

	return offset == -1 && entity == NULL;
}

/**
 * Transforms a LEA into an Add or SHL if possible.
 */
static void peephole_ia32_Lea(ir_node *node)
{
	assert(is_ia32_Lea(node));

Christoph Mallon's avatar
Christoph Mallon committed
798
	/* we can only do this if it is allowed to clobber the flags */
799
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
800
801
		return;

yb9976's avatar
yb9976 committed
802
803
804
805
	ir_node *base  = get_irn_n(node, n_ia32_Lea_base);
	ir_node *index = get_irn_n(node, n_ia32_Lea_index);
	const arch_register_t *base_reg;
	const arch_register_t *index_reg;
806

807
	if (is_noreg(base)) {
808
809
810
		base     = NULL;
		base_reg = NULL;
	} else {
811
		base_reg = arch_get_irn_register(base);
812
	}
813
	if (is_noreg(index)) {
814
815
816
		index     = NULL;
		index_reg = NULL;
	} else {
817
		index_reg = arch_get_irn_register(index);
818
819
	}

820
	if (base == NULL && index == NULL) {
821
822
		/* we shouldn't construct these in the first place... */
#ifdef DEBUG_libfirm
yb9976's avatar
yb9976 committed
823
		ir_fprintf(stderr, "Optimization warning: found immediate only lea\n");
824
825
826
827
#endif
		return;
	}

yb9976's avatar
yb9976 committed
828
829
830
831
832
	const arch_register_t *out_reg        = arch_get_irn_register(node);
	int                    scale          = get_ia32_am_scale(node);
	int                    has_immediates;
	ir_node               *op1;
	ir_node               *op2;
833
834
835
	assert(!is_ia32_need_stackent(node) || get_ia32_frame_ent(node) != NULL);
	/* check if we have immediates values (frame entities should already be
	 * expressed in the offsets) */
836
	if (get_ia32_am_offs_int(node) != 0 || get_ia32_am_ent(node) != NULL) {
837
838
839
840
841
842
843
		has_immediates = 1;
	} else {
		has_immediates = 0;
	}

	/* we can transform leas where the out register is the same as either the
	 * base or index register back to an Add or Shl */
844
845
	if (out_reg == base_reg) {
		if (index == NULL) {
846
#ifdef DEBUG_libfirm
847
			if (!has_immediates) {
yb9976's avatar
yb9976 committed
848
				ir_fprintf(stderr, "Optimization warning: found lea which is "
849
850
851
852
853
854
				           "just a copy\n");
			}
#endif
			op1 = base;
			goto make_add_immediate;
		}
855
		if (scale == 0 && !has_immediates) {
856
857
858
859
860
861
			op1 = base;
			op2 = index;
			goto make_add;
		}
		/* can't create an add */
		return;
862
863
864
	} else if (out_reg == index_reg) {
		if (base == NULL) {
			if (has_immediates && scale == 0) {
865
866
				op1 = index;
				goto make_add_immediate;