ia32_optimize.c 31.4 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
 * @file
 * @brief       Implements several optimizations for IA32.
9
 * @author      Matthias Braun, Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11
#include "debug.h"
12
#include "irnode_t.h"
13
14
#include "irprog_t.h"
#include "ircons.h"
15
#include "irtools.h"
16
#include "firm_types.h"
17
#include "iredges_t.h"
Christian Würdig's avatar
Christian Würdig committed
18
19
#include "tv.h"
#include "irgmod.h"
20
#include "irgwalk.h"
21
#include "heights.h"
22
#include "irprintf.h"
23
#include "irdump.h"
Matthias Braun's avatar
Matthias Braun committed
24
#include "panic.h"
Christian Würdig's avatar
Christian Würdig committed
25

26
#include "be_t.h"
27
#include "bediagnostic.h"
28
29
30
#include "benode.h"
#include "besched.h"
#include "bepeephole.h"
31
32

#include "ia32_new_nodes.h"
33
#include "ia32_optimize.h"
34
#include "bearch_ia32_t.h"
Matthias Braun's avatar
Matthias Braun committed
35
#include "gen_ia32_regalloc_if.h"
Christian Würdig's avatar
Christian Würdig committed
36
#include "ia32_transform.h"
37
#include "ia32_architecture.h"
38
#include "util.h"
39

40
41
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

42
static void copy_mark(const ir_node *old, ir_node *newn)
43
44
{
	if (is_ia32_is_reload(old))
45
		set_ia32_is_reload(newn);
46
	if (is_ia32_is_spill(old))
47
		set_ia32_is_spill(newn);
48
	if (is_ia32_is_remat(old))
49
		set_ia32_is_remat(newn);
50
51
}

52
53
typedef enum produces_flag_t {
	produces_no_flag,
54
55
	produces_zero_sign,
	produces_zero_in_carry
56
57
} produces_flag_t;

Michael Beck's avatar
Michael Beck committed
58
/**
59
60
61
 * Return which usable flag the given node produces about the result.
 * That is zero (ZF) and sign(SF).
 * We do not check for carry (CF) or overflow (OF).
Michael Beck's avatar
Michael Beck committed
62
63
 *
 * @param node  the node to check
64
 * @param pn    the projection number of the used result
Michael Beck's avatar
Michael Beck committed
65
 */
66
static produces_flag_t check_produces_zero_sign(ir_node *node, unsigned pn)
67
{
Michael Beck's avatar
Michael Beck committed
68
	if (!is_ia32_irn(node))
69
		return produces_no_flag;
70

yb9976's avatar
yb9976 committed
71
72
	ir_node *count;

Michael Beck's avatar
Michael Beck committed
73
	switch (get_ia32_irn_opcode(node)) {
74
		case iro_ia32_Adc:
Christoph Mallon's avatar
Christoph Mallon committed
75
		case iro_ia32_Add:
76
		case iro_ia32_And:
Christoph Mallon's avatar
Christoph Mallon committed
77
78
79
		case iro_ia32_Dec:
		case iro_ia32_Inc:
		case iro_ia32_Neg:
80
		case iro_ia32_Or:
81
		case iro_ia32_Popcnt:
82
		case iro_ia32_Sbb:
Christoph Mallon's avatar
Christoph Mallon committed
83
84
		case iro_ia32_Sub:
		case iro_ia32_Xor:
85
86
87
88
			break;

		case iro_ia32_ShlD:
		case iro_ia32_ShrD:
89
			assert((int)n_ia32_ShlD_count == (int)n_ia32_ShrD_count);
90
91
92
			count = get_irn_n(node, n_ia32_ShlD_count);
			goto check_shift_amount;

Christoph Mallon's avatar
Christoph Mallon committed
93
		case iro_ia32_Sar:
94
95
		case iro_ia32_Shl:
		case iro_ia32_Shr:
96
97
			assert((int)n_ia32_Shl_count == (int)n_ia32_Shr_count
					&& (int)n_ia32_Shl_count == (int)n_ia32_Sar_count);
98
			count = get_irn_n(node, n_ia32_Shl_count);
Christoph Mallon's avatar
Christoph Mallon committed
99
check_shift_amount:
100
101
102
103
104
			/* when shift count is zero the flags are not affected, so we can only
			 * do this for constants != 0 */
			if (!is_ia32_Immediate(count))
				return produces_no_flag;

yb9976's avatar
yb9976 committed
105
			const ia32_immediate_attr_t *imm_attr = get_ia32_immediate_attr_const(count);
106
			if (imm_attr->imm.entity != NULL)
107
				return produces_no_flag;
108
			if ((imm_attr->imm.offset & 0x1f) == 0)
109
110
111
112
113
				return produces_no_flag;
			break;

		case iro_ia32_Mul:
			return pn == pn_ia32_Mul_res_high ?
114
				produces_zero_in_carry : produces_no_flag;
115
116
117

		default:
			return produces_no_flag;
118
	}
119

120
	return pn == pn_ia32_res ? produces_zero_sign : produces_no_flag;
121
122
}

123
124
125
126
127
128
129
130
/**
 * Replace Cmp(x, 0) by a Test(x, x)
 */
static void peephole_ia32_Cmp(ir_node *const node)
{
	if (get_ia32_op_type(node) != ia32_Normal)
		return;

131
	ir_node *const right = get_irn_n(node, n_ia32_Cmp_right);
132
133
134
	if (!is_ia32_Immediate(right))
		return;

135
	ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
136
	if (imm->imm.entity != NULL || imm->imm.offset != 0)
137
138
		return;

139
140
	dbg_info *const dbgi         = get_irn_dbg_info(node);
	ir_node  *const block        = get_nodes_block(node);
141
	ir_graph *const irg          = get_irn_irg(node);
142
143
144
	ir_node  *const noreg        = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem        = get_irg_no_mem(irg);
	ir_node  *const op           = get_irn_n(node, n_ia32_Cmp_left);
145
	int       const ins_permuted = get_ia32_attr(node)->ins_permuted;
146

147
148
149
150
151
	ir_mode *const ls_mode = get_ia32_ls_mode(node);
	ir_node *const test    = get_mode_size_bits(ls_mode) == 8
		? new_bd_ia32_Test_8bit(dbgi, block, noreg, noreg, nomem, op, op, ins_permuted)
		: new_bd_ia32_Test     (dbgi, block, noreg, noreg, nomem, op, op, ins_permuted);
	set_ia32_ls_mode(test, ls_mode);
152

153
	arch_register_t const *const reg = arch_get_irn_register_out(node, pn_ia32_Cmp_eflags);
154
	arch_set_irn_register_out(test, pn_ia32_Test_eflags, reg);
155

156
	foreach_out_edge_safe(node, edge) {
157
158
159
160
161
162
		ir_node *const user = get_edge_src_irn(edge);

		if (is_Proj(user))
			exchange(user, test);
	}

163
	sched_add_before(node, test);
164
	copy_mark(node, test);
165
166
167
	be_peephole_exchange(node, test);
}

168
169
170
171
172
173
174
175
176
static bool is_hl_register(arch_register_t const *const reg)
{
	return
		reg == &ia32_registers[REG_EAX] ||
		reg == &ia32_registers[REG_EBX] ||
		reg == &ia32_registers[REG_ECX] ||
		reg == &ia32_registers[REG_EDX];
}

Michael Beck's avatar
Michael Beck committed
177
178
/**
 * Peephole optimization for Test instructions.
179
 * - Remove the Test, if an appropriate flag was produced which is still live
180
 * - Change a Test(x, c) to 8Bit, if 0 <= c < 128 (3 byte shorter opcode)
Michael Beck's avatar
Michael Beck committed
181
 */
182
183
static void peephole_ia32_Test(ir_node *node)
{
184
185
	ir_node *left  = get_irn_n(node, n_ia32_Test_left);
	ir_node *right = get_irn_n(node, n_ia32_Test_right);
186

187
	if (left == right) { /* we need a test for 0 */
yb9976's avatar
yb9976 committed
188
		ir_node *block = get_nodes_block(node);
189
		if (get_nodes_block(left) != block)
190
191
			return;

192
		unsigned pn = pn_ia32_res;
yb9976's avatar
yb9976 committed
193
		ir_node *op = left;
194
		if (is_Proj(op)) {
195
			pn = get_Proj_num(op);
196
			op = get_Proj_pred(op);
197
		}
198

199
200
		/* walk schedule up and abort when we find left or some other node
		 * destroys the flags */
yb9976's avatar
yb9976 committed
201
		ir_node *schedpoint = node;
202
203
		for (;;) {
			schedpoint = sched_prev(schedpoint);
204
			if (schedpoint == op)
205
206
207
208
209
				break;
			if (arch_irn_is(schedpoint, modify_flags))
				return;
			if (schedpoint == block)
				panic("couldn't find left");
210
211
		}

yb9976's avatar
yb9976 committed
212
		produces_flag_t produced = check_produces_zero_sign(op, pn);
213
214
215
216
		if (produced == produces_no_flag)
			return;

		/* make sure users only look at the sign/zero flag */
217
		foreach_out_edge(node, edge) {
yb9976's avatar
yb9976 committed
218
219
220
221
222
223
224
225
226
227
228
			ir_node *user = get_edge_src_irn(edge);
			if (is_ia32_CMovcc(user) || is_ia32_Jcc(user) ||
			    is_ia32_Setcc(user) || is_ia32_SetccMem(user)) {
				x86_condition_code_t  cc  = get_ia32_condcode(user);

				if (cc == x86_cc_equal || cc == x86_cc_not_equal)
					continue;
				if (produced == produces_zero_sign
					&& (cc == x86_cc_sign || cc == x86_cc_not_sign)) {
					continue;
				}
229
			}
230
			return;
231
		}
232

yb9976's avatar
yb9976 committed
233
		ir_mode *op_mode = get_ia32_ls_mode(op);
yb9976's avatar
yb9976 committed
234
235
236
237
238
239
240
		if (op_mode == NULL)
			op_mode = get_irn_mode(op);

		/* Make sure we operate on the same bit size */
		if (get_mode_size_bits(op_mode) != get_mode_size_bits(get_ia32_ls_mode(node)))
			return;

241
242
243
244
		if (produced == produces_zero_in_carry) {
			/* patch users to look at the carry instead of the zero flag */
			foreach_out_edge(node, edge) {
				ir_node              *user = get_edge_src_irn(edge);
245
				x86_condition_code_t  cc   = get_ia32_condcode(user);
246
247

				switch (cc) {
248
249
				case x86_cc_equal:     cc = x86_cc_above_equal; break;
				case x86_cc_not_equal: cc = x86_cc_below;       break;
250
251
252
253
254
255
				default: panic("unexpected pn");
				}
				set_ia32_condcode(user, cc);
			}
		}

256
		if (get_irn_mode(op) != mode_T) {
257
			/* If there are other users, reroute them to result proj */
258
			if (get_irn_n_edges(op) != 2) {
259
260
261
				be_peephole_to_tuple(op);
			} else {
				set_irn_mode(op, mode_T);
262
			}
263
264
265
		} else {
			if (get_irn_n_edges(left) == 2)
				kill_node(left);
266
		}
267

268
		ir_node *const flags_proj = be_new_Proj_reg(op, pn_ia32_flags, &ia32_registers[REG_EFLAGS]);
269
270
271
272
		be_peephole_exchange(node, flags_proj);
	} else if (is_ia32_Immediate(right)) {
		ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);

273
		/* A test with an entity is rather strange, but better safe than sorry */
274
		if (imm->imm.entity != NULL)
275
276
			return;

277
278
279
280
281
		/*
		 * We have to take care that we end up with the same sign flag:
		 * testl(128, 128) -> SF=0
		 * testb(128, 128) -> SF=1
		 */
282
		unsigned offset = imm->imm.offset;
283
		if (get_ia32_op_type(node) == ia32_AddrModeS) {
284
			ia32_attr_t *const attr = get_ia32_attr(node);
285
			ir_graph    *const irg  = get_irn_irg(node);
286

287
			if ((offset & 0xFFFFFF80) == 0) {
288
				/* attr->am_imm.offset += 0; */
289
			} else if ((offset & 0xFFFF80FF) == 0) {
290
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >>  8);
291
				set_irn_n(node, n_ia32_Test_right, imm_node);
292
				attr->am_imm.offset += 1;
293
			} else if ((offset & 0xFF80FFFF) == 0) {
294
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >> 16);
295
				set_irn_n(node, n_ia32_Test_right, imm_node);
296
				attr->am_imm.offset += 2;
297
			} else if ((offset & 0x00FFFFFF) == 0) {
298
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >> 24);
299
				set_irn_n(node, n_ia32_Test_right, imm_node);
300
				attr->am_imm.offset += 3;
301
302
303
			} else {
				return;
			}
304
		} else if ((offset & 0xFFFFFF80) == 0) {
305
			arch_register_t const* const reg = arch_get_irn_register(left);
306
			if (!is_hl_register(reg))
307
				return;
308
309
		} else {
			return;
310
311
312
313
314
315
		}

		/* Technically we should build a Test8Bit because of the register
		 * constraints, but nobody changes registers at this point anymore. */
		set_ia32_ls_mode(node, mode_Bu);
	}
316
317
}

318
319
320
321
322
/**
 * AMD Athlon works faster when RET is not destination of
 * conditional jump or directly preceded by other jump instruction.
 * Can be avoided by placing a Rep prefix before the return.
 */
323
324
static void peephole_ia32_Return(ir_node *node)
{
325
326
327
328
	if (!ia32_cg_config.use_pad_return)
		return;

	/* check if this return is the first on the block */
329
	sched_foreach_non_phi_reverse_before(node, irn) {
Christoph Mallon's avatar
Christoph Mallon committed
330
		if (be_is_Start(irn))
331
			continue;
332
333
		/* arg, IncSP 0 nodes might occur, ignore these */
		if (be_is_IncSP(irn) && be_get_IncSP_offset(irn) == 0)
334
			continue;
335
		return;
336
	}
337

338
	/* ensure, that the 3 byte return is generated */
339
340
	ia32_return_attr_t *attr = get_ia32_return_attr(node);
	attr->emit_pop = true;
341
342
343
}

/* only optimize up to 48 stores behind IncSPs */
344
#define MAXPUSH_OPTIMIZE    48
345
346

/**
Michael Beck's avatar
Michael Beck committed
347
 * Tries to create Push's from IncSP, Store combinations.
348
349
 * The Stores are replaced by Push's, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
350
 */
351
static void peephole_IncSP_Store_to_push(ir_node *irn)
352
{
353
	int inc_ofs = be_get_IncSP_offset(irn);
354
	if (inc_ofs < 4)
355
356
		return;

yb9976's avatar
yb9976 committed
357
358
359
360
361
	int      maxslot                  = -1;
	ir_node *stores[MAXPUSH_OPTIMIZE];

	memset(stores, 0, sizeof(stores));

362
363
	/*
	 * We first walk the schedule after the IncSP node as long as we find
364
	 * suitable Stores that could be transformed to a Push.
365
366
367
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
368
	sched_foreach_after(irn, node) {
369
370
		/* it has to be a Store */
		if (!is_ia32_Store(node))
371
372
			break;

373
374
		/* it has to use our sp value */
		if (get_irn_n(node, n_ia32_base) != irn)
375
			continue;
376
		/* Store has to be attached to NoMem */
yb9976's avatar
yb9976 committed
377
		ir_node *mem = get_irn_n(node, n_ia32_mem);
378
		if (!is_NoMem(mem))
379
380
			continue;

381
382
		/* unfortunately we can't support the full AMs possible for push at the
		 * moment. TODO: fix this */
383
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
384
385
			break;

yb9976's avatar
yb9976 committed
386
		int offset = get_ia32_am_offs_int(node);
387
388
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);
389

390
391
		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
392
393
			break;

394
		if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
395
			continue;
yb9976's avatar
yb9976 committed
396
397

		int storeslot = offset >> 2;
398
399
400

		/* storing into the same slot twice is bad (and shouldn't happen...) */
		if (stores[storeslot] != NULL)
401
402
403
			break;

		stores[storeslot] = node;
404
405
		if (storeslot > maxslot)
			maxslot = storeslot;
406
407
	}

yb9976's avatar
yb9976 committed
408
409
	ir_node *curr_sp = irn;
	int      i;
410
411
412
413
	for (i = -1; i < maxslot; ++i) {
		if (stores[i + 1] == NULL)
			break;
	}
414

415
	/* walk through the Stores and create Pushs for them */
416
417
418
419
	ir_node  *const block      = get_nodes_block(irn);
	ir_graph *const irg        = get_irn_irg(irn);
	ir_node  *const noreg      = ia32_new_NoReg_gp(irg);
	ir_node        *first_push = NULL;
420
	for (; i >= 0; --i) {
421
422
423
424
425
		ir_node  *const store = stores[i];
		dbg_info *const dbgi  = get_irn_dbg_info(store);
		ir_node  *const mem   = get_irn_n(store, n_ia32_mem);
		ir_node  *const val   = get_irn_n(store, n_ia32_unary_op);
		ir_node  *const push  = new_bd_ia32_Push(dbgi, block, noreg, noreg, mem, val, curr_sp, ia32_mode_gp);
426
		copy_mark(store, push);
427

428
429
430
		if (first_push == NULL)
			first_push = push;

431
		sched_add_after(skip_Proj(curr_sp), push);
432

433
		/* create stackpointer Proj */
434
		curr_sp = be_new_Proj_reg(push, pn_ia32_Push_stack, &ia32_registers[REG_ESP]);
Michael Beck's avatar
Michael Beck committed
435

436
		/* use the memproj now */
437
		be_peephole_exchange(store, push);
Matthias Braun's avatar
bugfix    
Matthias Braun committed
438

439
		inc_ofs -= 4;
440
441
	}

442
	edges_reroute_except(irn, curr_sp, first_push);
443
	be_set_IncSP_offset(irn, inc_ofs);
444
445
}

446
447
448
449
450
451
452
453
/**
 * Tries to create Pops from Load, IncSP combinations.
 * The Loads are replaced by Pops, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
 */
static void peephole_Load_IncSP_to_pop(ir_node *irn)
{

454
	int inc_ofs = -be_get_IncSP_offset(irn);
455
456
457
	if (inc_ofs < 4)
		return;

yb9976's avatar
yb9976 committed
458
459
460
461
462
463
	ir_node  *loads[MAXPUSH_OPTIMIZE];
	unsigned  regmask                 = 0;
	unsigned  copymask                = ~0;

	memset(loads, 0, sizeof(loads));

464
465
466
467
468
469
	/*
	 * We first walk the schedule before the IncSP node as long as we find
	 * suitable Loads that could be transformed to a Pop.
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
yb9976's avatar
yb9976 committed
470
	int      maxslot = -1;
471
472
	ir_node *pred_sp = be_get_IncSP_pred(irn);
	sched_foreach_reverse_before(irn, node) {
473
474
475
		/* it has to be a Load */
		if (!is_ia32_Load(node)) {
			if (be_is_Copy(node)) {
yb9976's avatar
yb9976 committed
476
				const arch_register_t *dreg = arch_get_irn_register(node);
477
				if (dreg->cls != &ia32_reg_classes[CLASS_ia32_gp]) {
478
479
480
					/* not a GP copy, ignore */
					continue;
				}
yb9976's avatar
yb9976 committed
481
				const arch_register_t *sreg = arch_get_irn_register(be_get_Copy_op(node));
482
483
484
485
				if (regmask & copymask & (1 << sreg->index)) {
					break;
				}
				if (regmask & copymask & (1 << dreg->index)) {
486
487
					break;
				}
488
489
490
491
				/* we CAN skip Copies if neither the destination nor the source
				 * is not in our regmask, ie none of our future Pop will overwrite it */
				regmask |= (1 << dreg->index) | (1 << sreg->index);
				copymask &= ~((1 << dreg->index) | (1 << sreg->index));
492
493
494
495
496
497
				continue;
			}
			break;
		}

		/* it has to use our predecessor sp value */
498
499
500
501
502
		if (get_irn_n(node, n_ia32_base) != pred_sp) {
			/* it would be ok if this load does not use a Pop result,
			 * but we do not check this */
			break;
		}
503
504

		/* should have NO index */
505
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
506
507
			break;

yb9976's avatar
yb9976 committed
508
		int offset = get_ia32_am_offs_int(node);
509
510
511
512
513
514
515
516
517
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);

		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
			break;

		if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
			continue;
518
519
520
		/* ignore those outside the possible windows */
		if (offset > inc_ofs - 4)
			continue;
yb9976's avatar
yb9976 committed
521
		int loadslot = offset >> 2;
522
523
524
525
526

		/* loading from the same slot twice is bad (and shouldn't happen...) */
		if (loads[loadslot] != NULL)
			break;

yb9976's avatar
yb9976 committed
527
		const arch_register_t *dreg = arch_get_irn_register_out(node, pn_ia32_Load_res);
528
529
530
531
532
533
534
535
536
537
538
539
540
541
		if (regmask & (1 << dreg->index)) {
			/* this register is already used */
			break;
		}
		regmask |= 1 << dreg->index;

		loads[loadslot] = node;
		if (loadslot > maxslot)
			maxslot = loadslot;
	}

	if (maxslot < 0)
		return;

542
	/* find the first slot */
yb9976's avatar
yb9976 committed
543
	int i;
544
545
546
547
548
549
	for (i = maxslot; i >= 0; --i) {
		ir_node *load = loads[i];

		if (load == NULL)
			break;
	}
550

yb9976's avatar
yb9976 committed
551
552
	int ofs = inc_ofs - (maxslot + 1) * 4;
	inc_ofs = (i + 1) * 4;
553

554
	/* create a new IncSP if needed */
555
	ir_node *const block = get_nodes_block(irn);
556
	if (inc_ofs > 0) {
557
		pred_sp = ia32_new_IncSP(block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
558
559
560
		sched_add_before(irn, pred_sp);
	}

561
	/* walk through the Loads and create Pops for them */
562
	for (++i; i <= maxslot; ++i) {
yb9976's avatar
yb9976 committed
563
564
565
		ir_node               *load = loads[i];
		ir_node               *mem  = get_irn_n(load, n_ia32_mem);
		const arch_register_t *reg  = arch_get_irn_register_out(load, pn_ia32_Load_res);
566

yb9976's avatar
yb9976 committed
567
		ir_node *pop = new_bd_ia32_Pop(get_irn_dbg_info(load), block, mem, pred_sp);
568
		arch_set_irn_register_out(pop, pn_ia32_Load_res, reg);
569

570
571
		copy_mark(load, pop);

572
		/* create stackpointer Proj */
573
		pred_sp = be_new_Proj_reg(pop, pn_ia32_Pop_stack, &ia32_registers[REG_ESP]);
574
575

		sched_add_before(irn, pop);
576
		be_peephole_exchange(load, pop);
577
	}
578

579
	be_set_IncSP_offset(irn, -ofs);
580
581
582
583
	be_set_IncSP_pred(irn, pred_sp);
}


584
585
586
/**
 * Find a free GP register if possible, else return NULL.
 */
587
static const arch_register_t *get_free_gp_reg(ir_graph *irg)
588
{
589
	be_irg_t *birg = be_birg_from_irg(irg);
590

yb9976's avatar
yb9976 committed
591
	for (int i = 0; i < N_ia32_gp_REGS; ++i) {
592
		const arch_register_t *reg = &ia32_reg_classes[CLASS_ia32_gp].regs[i];
593
		if (!rbitset_is_set(birg->allocatable_regs, reg->global_index))
594
595
			continue;

596
		if (be_peephole_get_value(reg->global_index) == NULL)
597
			return reg;
598
	}
599
600
601
602

	return NULL;
}

603
604
605
606
607
608
609
610
611
612
613
/**
 * Creates a Pop instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
614
static ir_node *create_pop(dbg_info *dbgi, ir_node *block,
615
                           ir_node *stack, ir_node *schedpoint,
616
                           const arch_register_t *reg)
617
{
618
619
620
	ir_graph *const irg = get_irn_irg(block);
	ir_node  *const mem = get_irg_no_mem(irg);
	ir_node  *const pop = new_bd_ia32_Pop(dbgi, block, mem, stack);
621
622
	sched_add_before(schedpoint, pop);

623
	ir_node *const val  = be_new_Proj_reg(pop, pn_ia32_Pop_res, reg);
624
	ir_node *const keep = be_new_Keep_one(val);
625
626
	sched_add_before(schedpoint, keep);

627
	return be_new_Proj_reg(pop, pn_ia32_Pop_stack, &ia32_registers[REG_ESP]);
628
629
}

630
/**
Michael Beck's avatar
Michael Beck committed
631
 * Optimize an IncSp by replacing it with Push/Pop.
632
 */
633
634
635
static void peephole_be_IncSP(ir_node *node)
{
	/* first optimize incsp->incsp combinations */
636
637
	if (be_peephole_IncSP_IncSP(node))
		return;
638

639
640
641
	/* transform IncSP->Store combinations to Push where possible */
	peephole_IncSP_Store_to_push(node);

642
643
644
	/* transform Load->IncSP combinations to Pop where possible */
	peephole_Load_IncSP_to_pop(node);

645
	/* replace IncSP -4 by Pop freereg when possible */
yb9976's avatar
yb9976 committed
646
	int offset = be_get_IncSP_offset(node);
647
648
649
650
	if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
	    (offset != -4 || ia32_cg_config.use_add_esp_4) &&
	    (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
	    (offset != +8 || ia32_cg_config.use_sub_esp_8))
651
652
		return;

yb9976's avatar
yb9976 committed
653
654
	ir_node *stack;

655
656
	if (offset < 0) {
		/* we need a free register for pop */
yb9976's avatar
yb9976 committed
657
		const arch_register_t *reg = get_free_gp_reg(get_irn_irg(node));
658
		if (reg == NULL)
659
			return;
660

yb9976's avatar
yb9976 committed
661
662
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);
663

yb9976's avatar
yb9976 committed
664
		stack = be_get_IncSP_pred(node);
665
		stack = create_pop(dbgi, block, stack, node, reg);
666

667
		if (offset == -8) {
668
			stack = create_pop(dbgi, block, stack, node, reg);
669
		}
670
	} else {
671
672
		arch_register_t const *const esp = &ia32_registers[REG_ESP];

yb9976's avatar
yb9976 committed
673
674
675
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);

676
		stack = be_get_IncSP_pred(node);
677
		stack = new_bd_ia32_PushEax(dbgi, block, stack);
678
679
		arch_set_irn_register(stack, esp);
		sched_add_before(node, stack);
680
681

		if (offset == +8) {
682
			stack = new_bd_ia32_PushEax(dbgi, block, stack);
683
684
			arch_set_irn_register(stack, esp);
			sched_add_before(node, stack);
685
		}
686
687
	}

Christoph Mallon's avatar
Christoph Mallon committed
688
	be_peephole_exchange(node, stack);
689
690
}

Michael Beck's avatar
Michael Beck committed
691
/**
692
 * Peephole optimization for ia32_Const's
Michael Beck's avatar
Michael Beck committed
693
 */
694
static void peephole_ia32_Const(ir_node *node)
695
{
696
697
698
	const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);

	/* try to transform a mov 0, reg to xor reg reg */
699
	if (attr->imm.offset != 0 || attr->imm.entity != NULL)
700
701
		return;
	if (ia32_cg_config.use_mov_0)
702
		return;
Michael Beck's avatar
Michael Beck committed
703
	/* xor destroys the flags, so no-one must be using them */
704
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
705
		return;
706

yb9976's avatar
yb9976 committed
707
	const arch_register_t *reg = arch_get_irn_register(node);
708
709
	assert(be_peephole_get_reg_value(reg) == NULL);

yb9976's avatar
yb9976 committed
710
711
712
	ir_node  *block = get_nodes_block(node);
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *xorn  = new_bd_ia32_Xor0(dbgi, block);
713
	arch_set_irn_register(xorn, reg);
714

715
	sched_add_before(node, xorn);
716

717
718
	copy_mark(node, xorn);
	be_peephole_exchange(node, xorn);
719
}
720

721
static bool is_disp_const(ir_node const *const node, int32_t const val)
722
{
723
	return !get_ia32_am_ent(node) && get_ia32_am_offs_int(node) == val;
724
725
726
}

/**
727
 * Transforms a Lea into an Add or Shl if possible.
728
729
730
731
732
 */
static void peephole_ia32_Lea(ir_node *node)
{
	assert(is_ia32_Lea(node));

733
734
	/* We can only do this if it is allowed to clobber the flags. */
	if (be_peephole_get_value(REG_EFLAGS))
735
736
		return;

737
	/* Frame entities should already be expressed in the offsets. */
738
	assert(get_ia32_attr_const(node)->am_imm.kind != X86_IMM_FRAMEOFFSET);
739

740
741
742
743
744
745
746
747
748
749
750
751
752
	/* We can transform Leas where the out register is the same as either the
	 * base or index register back to an Add or Shl. */
	ir_node                     *op1;
	ir_node                     *op2;
	ir_node               *const base    = get_irn_n(node, n_ia32_Lea_base);
	ir_node               *const index   = get_irn_n(node, n_ia32_Lea_index);
	arch_register_t const *const out_reg = arch_get_irn_register(node);
	if (out_reg == arch_get_irn_register(base)) {
		op1 = base;
		op2 = index;
	} else if (out_reg == arch_get_irn_register(index)) {
		op1 = index;
		op2 = base;
753
754
	} else {
#ifdef DEBUG_libfirm
755
		/* We shouldn't construct these in the first place. */
756
		if (is_ia32_NoReg_GP(base) && is_ia32_NoReg_GP(index))
757
			be_warningf(node, "found unoptimized Lea which is a Const");
758
#endif
759
		return; /* Neither base nor index use the same register as the Lea. */
760
761
	}

762
	ir_node       *res;
763
	bool     const has_2ops = !is_ia32_NoReg_GP(op2);
764
765
766
767
768
	bool     const has_disp = !is_disp_const(node, 0);
	unsigned const scale    = get_ia32_am_scale(node);
	if (scale == 0) {
		if (!has_2ops) {
			/* Lea base/index + disp -> Add+Imm. */
769
#ifdef DEBUG_libfirm
770
			if (!has_disp)
771
				be_warningf(node, "found unoptimized Lea which is a Copy");
772
#endif
773
774
775
776
777
778
779
780
781
782
783
784
			if (ia32_cg_config.use_incdec) {
				if (is_disp_const(node, 1)) {
					dbg_info *const dbgi  = get_irn_dbg_info(node);
					ir_node  *const block = get_nodes_block(node);
					res = new_bd_ia32_Inc(dbgi, block, op1);
					goto exchange;
				} else if (is_disp_const(node, -1)) {
					dbg_info *const dbgi  = get_irn_dbg_info(node);
					ir_node  *const block = get_nodes_block(node);
					res = new_bd_ia32_Dec(dbgi, block, op1);
					goto exchange;
				}
785
			}
786
787
			ir_graph          *const irg  = get_irn_irg(node);
			ia32_attr_t const *const attr = get_ia32_attr_const(node);
788
			op2 = ia32_create_Immediate_full(irg, &attr->am_imm);
789
790
		} else if (has_disp) {
			return; /* Lea has base, index and displacement. */
791
		}
792
793
794
795
796
797
798
799
800
801
802
803
804
805
		/* Lea base + index, base/index + disp -> Add. */
		dbg_info *const dbgi  = get_irn_dbg_info(node);
		ir_node  *const block = get_nodes_block(node);
		ir_graph *const irg   = get_irn_irg(node);
		ir_node  *const noreg = ia32_new_NoReg_gp(irg);
		ir_node  *const nomem = get_irg_no_mem(irg);
		res = new_bd_ia32_Add(dbgi, block, noreg, noreg, nomem, op1, op2);
		set_ia32_commutative(res);
	} else if (!has_2ops && !has_disp) {
		/* Lea index * scale -> Shl+Imm. */
		assert(op1 == index);
		dbg_info *const dbgi  = get_irn_dbg_info(node);
		ir_node  *const block = get_nodes_block(node);
		ir_graph *const irg   = get_irn_irg(node);
806
		ir_node  *const amt   = ia32_create_Immediate(irg, scale);
807
		res = new_bd_ia32_Shl(dbgi, block, op1, amt);
808
	} else {
809
		return; /* Lea has scaled index as well as base and/or displacement. */
810
811
812
	}

exchange:
813
814
	/* Replace the Lea by Add/Shl. */
	arch_set_irn_register(res, out_reg);
815
	SET_IA32_ORIG_NODE(res, node);
816
	sched_add_before(node, res);
817
	copy_mark(node, res);
Christoph Mallon's avatar
Christoph Mallon committed
818
	be_peephole_exchange(node, res);
819
820
}

821
822
823
/**
 * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
 */
824
static void peephole_ia32_ImulImm_split(ir_node *imul)
825
{
826
827
	/* Ignore, if no memory, imm form. */
	if (get_ia32_op_type(imul) != ia32_AddrModeS)
828
829
		return;
	/* we need a free register */
yb9976's avatar
yb9976 committed
830
	const arch_register_t *reg = get_free_gp_reg(get_irn_irg(imul));
831
832
833
834
	if (reg == NULL)
		return;

	/* fine, we can rebuild it */
yb9976's avatar
yb9976 committed
835
	ir_node *res = ia32_turn_back_am(imul);
836
	arch_set_irn_register(res, reg);
837
838
}

Michael Beck's avatar