ia32_optimize.c 31.4 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
/**
Christian Würdig's avatar
Christian Würdig committed
7
8
 * @file
 * @brief       Implements several optimizations for IA32.
9
 * @author      Matthias Braun, Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
10
 */
11
#include "debug.h"
12
13
14
#include "irnode.h"
#include "irprog_t.h"
#include "ircons.h"
15
#include "irtools.h"
16
#include "firm_types.h"
Christian Würdig's avatar
Christian Würdig committed
17
18
19
#include "iredges.h"
#include "tv.h"
#include "irgmod.h"
20
#include "irgwalk.h"
21
#include "heights.h"
22
#include "irprintf.h"
23
#include "irdump.h"
Matthias Braun's avatar
Matthias Braun committed
24
#include "panic.h"
25
#include "firmstat_t.h"
Christian Würdig's avatar
Christian Würdig committed
26

27
#include "be_t.h"
28
#include "bediagnostic.h"
29
30
31
#include "benode.h"
#include "besched.h"
#include "bepeephole.h"
32
33

#include "ia32_new_nodes.h"
34
#include "ia32_optimize.h"
35
#include "bearch_ia32_t.h"
Matthias Braun's avatar
Matthias Braun committed
36
#include "gen_ia32_regalloc_if.h"
Christian Würdig's avatar
Christian Würdig committed
37
#include "ia32_transform.h"
38
#include "ia32_architecture.h"
39
#include "util.h"
40

41
42
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

43
static void copy_mark(const ir_node *old, ir_node *newn)
44
45
{
	if (is_ia32_is_reload(old))
46
		set_ia32_is_reload(newn);
47
	if (is_ia32_is_spill(old))
48
		set_ia32_is_spill(newn);
49
	if (is_ia32_is_remat(old))
50
		set_ia32_is_remat(newn);
51
52
}

53
54
typedef enum produces_flag_t {
	produces_no_flag,
55
56
	produces_zero_sign,
	produces_zero_in_carry
57
58
} produces_flag_t;

Michael Beck's avatar
Michael Beck committed
59
/**
60
61
62
 * Return which usable flag the given node produces about the result.
 * That is zero (ZF) and sign(SF).
 * We do not check for carry (CF) or overflow (OF).
Michael Beck's avatar
Michael Beck committed
63
64
 *
 * @param node  the node to check
65
 * @param pn    the projection number of the used result
Michael Beck's avatar
Michael Beck committed
66
 */
67
static produces_flag_t check_produces_zero_sign(ir_node *node, unsigned pn)
68
{
Michael Beck's avatar
Michael Beck committed
69
	if (!is_ia32_irn(node))
70
		return produces_no_flag;
71

yb9976's avatar
yb9976 committed
72
73
	ir_node *count;

Michael Beck's avatar
Michael Beck committed
74
	switch (get_ia32_irn_opcode(node)) {
75
		case iro_ia32_Adc:
Christoph Mallon's avatar
Christoph Mallon committed
76
		case iro_ia32_Add:
77
		case iro_ia32_And:
Christoph Mallon's avatar
Christoph Mallon committed
78
79
80
		case iro_ia32_Dec:
		case iro_ia32_Inc:
		case iro_ia32_Neg:
81
		case iro_ia32_Or:
82
		case iro_ia32_Popcnt:
83
		case iro_ia32_Sbb:
Christoph Mallon's avatar
Christoph Mallon committed
84
85
		case iro_ia32_Sub:
		case iro_ia32_Xor:
86
87
88
89
			break;

		case iro_ia32_ShlD:
		case iro_ia32_ShrD:
90
			assert((int)n_ia32_ShlD_count == (int)n_ia32_ShrD_count);
91
92
93
			count = get_irn_n(node, n_ia32_ShlD_count);
			goto check_shift_amount;

Christoph Mallon's avatar
Christoph Mallon committed
94
		case iro_ia32_Sar:
95
96
		case iro_ia32_Shl:
		case iro_ia32_Shr:
97
98
			assert((int)n_ia32_Shl_count == (int)n_ia32_Shr_count
					&& (int)n_ia32_Shl_count == (int)n_ia32_Sar_count);
99
			count = get_irn_n(node, n_ia32_Shl_count);
Christoph Mallon's avatar
Christoph Mallon committed
100
check_shift_amount:
101
102
103
104
105
			/* when shift count is zero the flags are not affected, so we can only
			 * do this for constants != 0 */
			if (!is_ia32_Immediate(count))
				return produces_no_flag;

yb9976's avatar
yb9976 committed
106
			const ia32_immediate_attr_t *imm_attr = get_ia32_immediate_attr_const(count);
107
			if (imm_attr->imm.entity != NULL)
108
				return produces_no_flag;
109
			if ((imm_attr->imm.offset & 0x1f) == 0)
110
111
112
113
114
				return produces_no_flag;
			break;

		case iro_ia32_Mul:
			return pn == pn_ia32_Mul_res_high ?
115
				produces_zero_in_carry : produces_no_flag;
116
117
118

		default:
			return produces_no_flag;
119
	}
120

121
	return pn == pn_ia32_res ? produces_zero_sign : produces_no_flag;
122
123
}

124
125
126
127
128
129
130
131
/**
 * Replace Cmp(x, 0) by a Test(x, x)
 */
static void peephole_ia32_Cmp(ir_node *const node)
{
	if (get_ia32_op_type(node) != ia32_Normal)
		return;

132
	ir_node *const right = get_irn_n(node, n_ia32_Cmp_right);
133
134
135
	if (!is_ia32_Immediate(right))
		return;

136
	ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
137
	if (imm->imm.entity != NULL || imm->imm.offset != 0)
138
139
		return;

140
141
	dbg_info *const dbgi         = get_irn_dbg_info(node);
	ir_node  *const block        = get_nodes_block(node);
142
	ir_graph *const irg          = get_irn_irg(node);
143
144
145
	ir_node  *const noreg        = ia32_new_NoReg_gp(irg);
	ir_node  *const nomem        = get_irg_no_mem(irg);
	ir_node  *const op           = get_irn_n(node, n_ia32_Cmp_left);
146
	int       const ins_permuted = get_ia32_attr(node)->ins_permuted;
147

148
149
150
151
152
	ir_mode *const ls_mode = get_ia32_ls_mode(node);
	ir_node *const test    = get_mode_size_bits(ls_mode) == 8
		? new_bd_ia32_Test_8bit(dbgi, block, noreg, noreg, nomem, op, op, ins_permuted)
		: new_bd_ia32_Test     (dbgi, block, noreg, noreg, nomem, op, op, ins_permuted);
	set_ia32_ls_mode(test, ls_mode);
153

154
	arch_register_t const *const reg = arch_get_irn_register_out(node, pn_ia32_Cmp_eflags);
155
	arch_set_irn_register_out(test, pn_ia32_Test_eflags, reg);
156

157
	foreach_out_edge_safe(node, edge) {
158
159
160
161
162
163
		ir_node *const user = get_edge_src_irn(edge);

		if (is_Proj(user))
			exchange(user, test);
	}

164
	sched_add_before(node, test);
165
	copy_mark(node, test);
166
167
168
	be_peephole_exchange(node, test);
}

169
170
171
172
173
174
175
176
177
static bool is_hl_register(arch_register_t const *const reg)
{
	return
		reg == &ia32_registers[REG_EAX] ||
		reg == &ia32_registers[REG_EBX] ||
		reg == &ia32_registers[REG_ECX] ||
		reg == &ia32_registers[REG_EDX];
}

Michael Beck's avatar
Michael Beck committed
178
179
/**
 * Peephole optimization for Test instructions.
180
 * - Remove the Test, if an appropriate flag was produced which is still live
181
 * - Change a Test(x, c) to 8Bit, if 0 <= c < 128 (3 byte shorter opcode)
Michael Beck's avatar
Michael Beck committed
182
 */
183
184
static void peephole_ia32_Test(ir_node *node)
{
185
186
	ir_node *left  = get_irn_n(node, n_ia32_Test_left);
	ir_node *right = get_irn_n(node, n_ia32_Test_right);
187

188
	if (left == right) { /* we need a test for 0 */
yb9976's avatar
yb9976 committed
189
		ir_node *block = get_nodes_block(node);
190
		if (get_nodes_block(left) != block)
191
192
			return;

193
		unsigned pn = pn_ia32_res;
yb9976's avatar
yb9976 committed
194
		ir_node *op = left;
195
		if (is_Proj(op)) {
196
			pn = get_Proj_num(op);
197
			op = get_Proj_pred(op);
198
		}
199

200
201
		/* walk schedule up and abort when we find left or some other node
		 * destroys the flags */
yb9976's avatar
yb9976 committed
202
		ir_node *schedpoint = node;
203
204
		for (;;) {
			schedpoint = sched_prev(schedpoint);
205
			if (schedpoint == op)
206
207
208
209
210
				break;
			if (arch_irn_is(schedpoint, modify_flags))
				return;
			if (schedpoint == block)
				panic("couldn't find left");
211
212
		}

yb9976's avatar
yb9976 committed
213
		produces_flag_t produced = check_produces_zero_sign(op, pn);
214
215
216
217
		if (produced == produces_no_flag)
			return;

		/* make sure users only look at the sign/zero flag */
218
		foreach_out_edge(node, edge) {
yb9976's avatar
yb9976 committed
219
220
221
222
223
224
225
226
227
228
229
			ir_node *user = get_edge_src_irn(edge);
			if (is_ia32_CMovcc(user) || is_ia32_Jcc(user) ||
			    is_ia32_Setcc(user) || is_ia32_SetccMem(user)) {
				x86_condition_code_t  cc  = get_ia32_condcode(user);

				if (cc == x86_cc_equal || cc == x86_cc_not_equal)
					continue;
				if (produced == produces_zero_sign
					&& (cc == x86_cc_sign || cc == x86_cc_not_sign)) {
					continue;
				}
230
			}
231
			return;
232
		}
233

yb9976's avatar
yb9976 committed
234
		ir_mode *op_mode = get_ia32_ls_mode(op);
yb9976's avatar
yb9976 committed
235
236
237
238
239
240
241
		if (op_mode == NULL)
			op_mode = get_irn_mode(op);

		/* Make sure we operate on the same bit size */
		if (get_mode_size_bits(op_mode) != get_mode_size_bits(get_ia32_ls_mode(node)))
			return;

242
243
244
245
		if (produced == produces_zero_in_carry) {
			/* patch users to look at the carry instead of the zero flag */
			foreach_out_edge(node, edge) {
				ir_node              *user = get_edge_src_irn(edge);
246
				x86_condition_code_t  cc   = get_ia32_condcode(user);
247
248

				switch (cc) {
249
250
				case x86_cc_equal:     cc = x86_cc_above_equal; break;
				case x86_cc_not_equal: cc = x86_cc_below;       break;
251
252
253
254
255
256
				default: panic("unexpected pn");
				}
				set_ia32_condcode(user, cc);
			}
		}

257
		if (get_irn_mode(op) != mode_T) {
258
			/* If there are other users, reroute them to result proj */
259
			if (get_irn_n_edges(op) != 2) {
260
261
262
				be_peephole_to_tuple(op);
			} else {
				set_irn_mode(op, mode_T);
263
			}
264
265
266
		} else {
			if (get_irn_n_edges(left) == 2)
				kill_node(left);
267
		}
268

269
		ir_node *const flags_proj = be_new_Proj_reg(op, pn_ia32_flags, &ia32_registers[REG_EFLAGS]);
270
271
272
273
		be_peephole_exchange(node, flags_proj);
	} else if (is_ia32_Immediate(right)) {
		ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);

274
		/* A test with an entity is rather strange, but better safe than sorry */
275
		if (imm->imm.entity != NULL)
276
277
			return;

278
279
280
281
282
		/*
		 * We have to take care that we end up with the same sign flag:
		 * testl(128, 128) -> SF=0
		 * testb(128, 128) -> SF=1
		 */
283
		unsigned offset = imm->imm.offset;
284
		if (get_ia32_op_type(node) == ia32_AddrModeS) {
285
			ia32_attr_t *const attr = get_ia32_attr(node);
286
			ir_graph    *const irg  = get_irn_irg(node);
287

288
			if ((offset & 0xFFFFFF80) == 0) {
289
				/* attr->am_imm.offset += 0; */
290
			} else if ((offset & 0xFFFF80FF) == 0) {
291
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >>  8);
292
				set_irn_n(node, n_ia32_Test_right, imm_node);
293
				attr->am_imm.offset += 1;
294
			} else if ((offset & 0xFF80FFFF) == 0) {
295
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >> 16);
296
				set_irn_n(node, n_ia32_Test_right, imm_node);
297
				attr->am_imm.offset += 2;
298
			} else if ((offset & 0x00FFFFFF) == 0) {
299
				ir_node *const imm_node = ia32_create_Immediate(irg, offset >> 24);
300
				set_irn_n(node, n_ia32_Test_right, imm_node);
301
				attr->am_imm.offset += 3;
302
303
304
			} else {
				return;
			}
305
		} else if ((offset & 0xFFFFFF80) == 0) {
306
			arch_register_t const* const reg = arch_get_irn_register(left);
307
			if (!is_hl_register(reg))
308
				return;
309
310
		} else {
			return;
311
312
313
314
315
316
		}

		/* Technically we should build a Test8Bit because of the register
		 * constraints, but nobody changes registers at this point anymore. */
		set_ia32_ls_mode(node, mode_Bu);
	}
317
318
}

319
320
321
322
323
/**
 * AMD Athlon works faster when RET is not destination of
 * conditional jump or directly preceded by other jump instruction.
 * Can be avoided by placing a Rep prefix before the return.
 */
324
325
static void peephole_ia32_Return(ir_node *node)
{
326
327
328
329
	if (!ia32_cg_config.use_pad_return)
		return;

	/* check if this return is the first on the block */
330
	sched_foreach_non_phi_reverse_before(node, irn) {
Christoph Mallon's avatar
Christoph Mallon committed
331
		if (be_is_Start(irn))
332
			continue;
333
334
		/* arg, IncSP 0 nodes might occur, ignore these */
		if (be_is_IncSP(irn) && be_get_IncSP_offset(irn) == 0)
335
			continue;
336
		return;
337
	}
338

339
	/* ensure, that the 3 byte return is generated */
340
341
	ia32_return_attr_t *attr = get_ia32_return_attr(node);
	attr->emit_pop = true;
342
343
344
}

/* only optimize up to 48 stores behind IncSPs */
345
#define MAXPUSH_OPTIMIZE    48
346
347

/**
Michael Beck's avatar
Michael Beck committed
348
 * Tries to create Push's from IncSP, Store combinations.
349
350
 * The Stores are replaced by Push's, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
351
 */
352
static void peephole_IncSP_Store_to_push(ir_node *irn)
353
{
354
	int inc_ofs = be_get_IncSP_offset(irn);
355
	if (inc_ofs < 4)
356
357
		return;

yb9976's avatar
yb9976 committed
358
359
360
361
362
	int      maxslot                  = -1;
	ir_node *stores[MAXPUSH_OPTIMIZE];

	memset(stores, 0, sizeof(stores));

363
364
	/*
	 * We first walk the schedule after the IncSP node as long as we find
365
	 * suitable Stores that could be transformed to a Push.
366
367
368
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
369
	sched_foreach_after(irn, node) {
370
371
		/* it has to be a Store */
		if (!is_ia32_Store(node))
372
373
			break;

374
375
		/* it has to use our sp value */
		if (get_irn_n(node, n_ia32_base) != irn)
376
			continue;
377
		/* Store has to be attached to NoMem */
yb9976's avatar
yb9976 committed
378
		ir_node *mem = get_irn_n(node, n_ia32_mem);
379
		if (!is_NoMem(mem))
380
381
			continue;

382
383
		/* unfortunately we can't support the full AMs possible for push at the
		 * moment. TODO: fix this */
384
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
385
386
			break;

yb9976's avatar
yb9976 committed
387
		int offset = get_ia32_am_offs_int(node);
388
389
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);
390

391
392
		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
393
394
			break;

395
		if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
396
			continue;
yb9976's avatar
yb9976 committed
397
398

		int storeslot = offset >> 2;
399
400
401

		/* storing into the same slot twice is bad (and shouldn't happen...) */
		if (stores[storeslot] != NULL)
402
403
404
			break;

		stores[storeslot] = node;
405
406
		if (storeslot > maxslot)
			maxslot = storeslot;
407
408
	}

yb9976's avatar
yb9976 committed
409
410
	ir_node *curr_sp = irn;
	int      i;
411
412
413
414
	for (i = -1; i < maxslot; ++i) {
		if (stores[i + 1] == NULL)
			break;
	}
415

416
	/* walk through the Stores and create Pushs for them */
417
418
419
420
	ir_node  *const block      = get_nodes_block(irn);
	ir_graph *const irg        = get_irn_irg(irn);
	ir_node  *const noreg      = ia32_new_NoReg_gp(irg);
	ir_node        *first_push = NULL;
421
	for (; i >= 0; --i) {
422
423
424
425
426
		ir_node  *const store = stores[i];
		dbg_info *const dbgi  = get_irn_dbg_info(store);
		ir_node  *const mem   = get_irn_n(store, n_ia32_mem);
		ir_node  *const val   = get_irn_n(store, n_ia32_unary_op);
		ir_node  *const push  = new_bd_ia32_Push(dbgi, block, noreg, noreg, mem, val, curr_sp, ia32_mode_gp);
427
		copy_mark(store, push);
428

429
430
431
		if (first_push == NULL)
			first_push = push;

432
		sched_add_after(skip_Proj(curr_sp), push);
433

434
		/* create stackpointer Proj */
435
		curr_sp = be_new_Proj_reg(push, pn_ia32_Push_stack, &ia32_registers[REG_ESP]);
Michael Beck's avatar
Michael Beck committed
436

437
		/* use the memproj now */
438
		be_peephole_exchange(store, push);
Matthias Braun's avatar
bugfix    
Matthias Braun committed
439

440
		inc_ofs -= 4;
441
442
	}

443
	edges_reroute_except(irn, curr_sp, first_push);
444
	be_set_IncSP_offset(irn, inc_ofs);
445
446
}

447
448
449
450
451
452
453
454
/**
 * Tries to create Pops from Load, IncSP combinations.
 * The Loads are replaced by Pops, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
 */
static void peephole_Load_IncSP_to_pop(ir_node *irn)
{

455
	int inc_ofs = -be_get_IncSP_offset(irn);
456
457
458
	if (inc_ofs < 4)
		return;

yb9976's avatar
yb9976 committed
459
460
461
462
463
464
	ir_node  *loads[MAXPUSH_OPTIMIZE];
	unsigned  regmask                 = 0;
	unsigned  copymask                = ~0;

	memset(loads, 0, sizeof(loads));

465
466
467
468
469
470
	/*
	 * We first walk the schedule before the IncSP node as long as we find
	 * suitable Loads that could be transformed to a Pop.
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
yb9976's avatar
yb9976 committed
471
	int      maxslot = -1;
472
473
	ir_node *pred_sp = be_get_IncSP_pred(irn);
	sched_foreach_reverse_before(irn, node) {
474
475
476
		/* it has to be a Load */
		if (!is_ia32_Load(node)) {
			if (be_is_Copy(node)) {
yb9976's avatar
yb9976 committed
477
				const arch_register_t *dreg = arch_get_irn_register(node);
478
				if (dreg->cls != &ia32_reg_classes[CLASS_ia32_gp]) {
479
480
481
					/* not a GP copy, ignore */
					continue;
				}
yb9976's avatar
yb9976 committed
482
				const arch_register_t *sreg = arch_get_irn_register(be_get_Copy_op(node));
483
484
485
486
				if (regmask & copymask & (1 << sreg->index)) {
					break;
				}
				if (regmask & copymask & (1 << dreg->index)) {
487
488
					break;
				}
489
490
491
492
				/* we CAN skip Copies if neither the destination nor the source
				 * is not in our regmask, ie none of our future Pop will overwrite it */
				regmask |= (1 << dreg->index) | (1 << sreg->index);
				copymask &= ~((1 << dreg->index) | (1 << sreg->index));
493
494
495
496
497
498
				continue;
			}
			break;
		}

		/* it has to use our predecessor sp value */
499
500
501
502
503
		if (get_irn_n(node, n_ia32_base) != pred_sp) {
			/* it would be ok if this load does not use a Pop result,
			 * but we do not check this */
			break;
		}
504
505

		/* should have NO index */
506
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
507
508
			break;

yb9976's avatar
yb9976 committed
509
		int offset = get_ia32_am_offs_int(node);
510
511
512
513
514
515
516
517
518
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);

		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
			break;

		if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
			continue;
519
520
521
		/* ignore those outside the possible windows */
		if (offset > inc_ofs - 4)
			continue;
yb9976's avatar
yb9976 committed
522
		int loadslot = offset >> 2;
523
524
525
526
527

		/* loading from the same slot twice is bad (and shouldn't happen...) */
		if (loads[loadslot] != NULL)
			break;

yb9976's avatar
yb9976 committed
528
		const arch_register_t *dreg = arch_get_irn_register_out(node, pn_ia32_Load_res);
529
530
531
532
533
534
535
536
537
538
539
540
541
542
		if (regmask & (1 << dreg->index)) {
			/* this register is already used */
			break;
		}
		regmask |= 1 << dreg->index;

		loads[loadslot] = node;
		if (loadslot > maxslot)
			maxslot = loadslot;
	}

	if (maxslot < 0)
		return;

543
	/* find the first slot */
yb9976's avatar
yb9976 committed
544
	int i;
545
546
547
548
549
550
	for (i = maxslot; i >= 0; --i) {
		ir_node *load = loads[i];

		if (load == NULL)
			break;
	}
551

yb9976's avatar
yb9976 committed
552
553
	int ofs = inc_ofs - (maxslot + 1) * 4;
	inc_ofs = (i + 1) * 4;
554

555
	/* create a new IncSP if needed */
556
	ir_node *const block = get_nodes_block(irn);
557
	if (inc_ofs > 0) {
558
		pred_sp = ia32_new_IncSP(block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
559
560
561
		sched_add_before(irn, pred_sp);
	}

562
	/* walk through the Loads and create Pops for them */
563
	for (++i; i <= maxslot; ++i) {
yb9976's avatar
yb9976 committed
564
565
566
		ir_node               *load = loads[i];
		ir_node               *mem  = get_irn_n(load, n_ia32_mem);
		const arch_register_t *reg  = arch_get_irn_register_out(load, pn_ia32_Load_res);
567

yb9976's avatar
yb9976 committed
568
		ir_node *pop = new_bd_ia32_Pop(get_irn_dbg_info(load), block, mem, pred_sp);
569
		arch_set_irn_register_out(pop, pn_ia32_Load_res, reg);
570

571
572
		copy_mark(load, pop);

573
		/* create stackpointer Proj */
574
		pred_sp = be_new_Proj_reg(pop, pn_ia32_Pop_stack, &ia32_registers[REG_ESP]);
575
576

		sched_add_before(irn, pop);
577
		be_peephole_exchange(load, pop);
578
	}
579

580
	be_set_IncSP_offset(irn, -ofs);
581
582
583
584
	be_set_IncSP_pred(irn, pred_sp);
}


585
586
587
/**
 * Find a free GP register if possible, else return NULL.
 */
588
static const arch_register_t *get_free_gp_reg(ir_graph *irg)
589
{
590
	be_irg_t *birg = be_birg_from_irg(irg);
591

yb9976's avatar
yb9976 committed
592
	for (int i = 0; i < N_ia32_gp_REGS; ++i) {
593
		const arch_register_t *reg = &ia32_reg_classes[CLASS_ia32_gp].regs[i];
594
		if (!rbitset_is_set(birg->allocatable_regs, reg->global_index))
595
596
			continue;

597
		if (be_peephole_get_value(reg->global_index) == NULL)
598
			return reg;
599
	}
600
601
602
603

	return NULL;
}

604
605
606
607
608
609
610
611
612
613
614
/**
 * Creates a Pop instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
615
static ir_node *create_pop(dbg_info *dbgi, ir_node *block,
616
                           ir_node *stack, ir_node *schedpoint,
617
                           const arch_register_t *reg)
618
{
619
620
621
	ir_graph *const irg = get_irn_irg(block);
	ir_node  *const mem = get_irg_no_mem(irg);
	ir_node  *const pop = new_bd_ia32_Pop(dbgi, block, mem, stack);
622
623
	sched_add_before(schedpoint, pop);

624
	ir_node *const val  = be_new_Proj_reg(pop, pn_ia32_Pop_res, reg);
625
	ir_node *const keep = be_new_Keep_one(val);
626
627
	sched_add_before(schedpoint, keep);

628
	return be_new_Proj_reg(pop, pn_ia32_Pop_stack, &ia32_registers[REG_ESP]);
629
630
}

631
/**
Michael Beck's avatar
Michael Beck committed
632
 * Optimize an IncSp by replacing it with Push/Pop.
633
 */
634
635
636
static void peephole_be_IncSP(ir_node *node)
{
	/* first optimize incsp->incsp combinations */
637
638
	if (be_peephole_IncSP_IncSP(node))
		return;
639

640
641
642
	/* transform IncSP->Store combinations to Push where possible */
	peephole_IncSP_Store_to_push(node);

643
644
645
	/* transform Load->IncSP combinations to Pop where possible */
	peephole_Load_IncSP_to_pop(node);

646
	/* replace IncSP -4 by Pop freereg when possible */
yb9976's avatar
yb9976 committed
647
	int offset = be_get_IncSP_offset(node);
648
649
650
651
	if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
	    (offset != -4 || ia32_cg_config.use_add_esp_4) &&
	    (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
	    (offset != +8 || ia32_cg_config.use_sub_esp_8))
652
653
		return;

yb9976's avatar
yb9976 committed
654
655
	ir_node *stack;

656
657
	if (offset < 0) {
		/* we need a free register for pop */
yb9976's avatar
yb9976 committed
658
		const arch_register_t *reg = get_free_gp_reg(get_irn_irg(node));
659
		if (reg == NULL)
660
			return;
661

yb9976's avatar
yb9976 committed
662
663
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);
664

yb9976's avatar
yb9976 committed
665
		stack = be_get_IncSP_pred(node);
666
		stack = create_pop(dbgi, block, stack, node, reg);
667

668
		if (offset == -8) {
669
			stack = create_pop(dbgi, block, stack, node, reg);
670
		}
671
	} else {
672
673
		arch_register_t const *const esp = &ia32_registers[REG_ESP];

yb9976's avatar
yb9976 committed
674
675
676
		dbg_info *dbgi  = get_irn_dbg_info(node);
		ir_node  *block = get_nodes_block(node);

677
		stack = be_get_IncSP_pred(node);
678
		stack = new_bd_ia32_PushEax(dbgi, block, stack);
679
680
		arch_set_irn_register(stack, esp);
		sched_add_before(node, stack);
681
682

		if (offset == +8) {
683
			stack = new_bd_ia32_PushEax(dbgi, block, stack);
684
685
			arch_set_irn_register(stack, esp);
			sched_add_before(node, stack);
686
		}
687
688
	}

Christoph Mallon's avatar
Christoph Mallon committed
689
	be_peephole_exchange(node, stack);
690
691
}

Michael Beck's avatar
Michael Beck committed
692
/**
693
 * Peephole optimization for ia32_Const's
Michael Beck's avatar
Michael Beck committed
694
 */
695
static void peephole_ia32_Const(ir_node *node)
696
{
697
698
699
	const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);

	/* try to transform a mov 0, reg to xor reg reg */
700
	if (attr->imm.offset != 0 || attr->imm.entity != NULL)
701
702
		return;
	if (ia32_cg_config.use_mov_0)
703
		return;
Michael Beck's avatar
Michael Beck committed
704
	/* xor destroys the flags, so no-one must be using them */
705
	if (be_peephole_get_value(REG_EFLAGS) != NULL)
706
		return;
707

yb9976's avatar
yb9976 committed
708
	const arch_register_t *reg = arch_get_irn_register(node);
709
710
	assert(be_peephole_get_reg_value(reg) == NULL);

yb9976's avatar
yb9976 committed
711
712
713
	ir_node  *block = get_nodes_block(node);
	dbg_info *dbgi  = get_irn_dbg_info(node);
	ir_node  *xorn  = new_bd_ia32_Xor0(dbgi, block);
714
	arch_set_irn_register(xorn, reg);
715

716
	sched_add_before(node, xorn);
717

718
719
	copy_mark(node, xorn);
	be_peephole_exchange(node, xorn);
720
}
721

722
static bool is_disp_const(ir_node const *const node, int32_t const val)
723
{
724
	return !get_ia32_am_ent(node) && get_ia32_am_offs_int(node) == val;
725
726
727
}

/**
728
 * Transforms a Lea into an Add or Shl if possible.
729
730
731
732
733
 */
static void peephole_ia32_Lea(ir_node *node)
{
	assert(is_ia32_Lea(node));

734
735
	/* We can only do this if it is allowed to clobber the flags. */
	if (be_peephole_get_value(REG_EFLAGS))
736
737
		return;

738
	/* Frame entities should already be expressed in the offsets. */
739
	assert(get_ia32_attr_const(node)->am_imm.kind != X86_IMM_FRAMEOFFSET);
740

741
742
743
744
745
746
747
748
749
750
751
752
753
	/* We can transform Leas where the out register is the same as either the
	 * base or index register back to an Add or Shl. */
	ir_node                     *op1;
	ir_node                     *op2;
	ir_node               *const base    = get_irn_n(node, n_ia32_Lea_base);
	ir_node               *const index   = get_irn_n(node, n_ia32_Lea_index);
	arch_register_t const *const out_reg = arch_get_irn_register(node);
	if (out_reg == arch_get_irn_register(base)) {
		op1 = base;
		op2 = index;
	} else if (out_reg == arch_get_irn_register(index)) {
		op1 = index;
		op2 = base;
754
755
	} else {
#ifdef DEBUG_libfirm
756
		/* We shouldn't construct these in the first place. */
757
		if (is_ia32_NoReg_GP(base) && is_ia32_NoReg_GP(index))
758
			be_warningf(node, "found unoptimized Lea which is a Const");
759
#endif
760
		return; /* Neither base nor index use the same register as the Lea. */
761
762
	}

763
	ir_node       *res;
764
	bool     const has_2ops = !is_ia32_NoReg_GP(op2);
765
766
767
768
769
	bool     const has_disp = !is_disp_const(node, 0);
	unsigned const scale    = get_ia32_am_scale(node);
	if (scale == 0) {
		if (!has_2ops) {
			/* Lea base/index + disp -> Add+Imm. */
770
#ifdef DEBUG_libfirm
771
			if (!has_disp)
772
				be_warningf(node, "found unoptimized Lea which is a Copy");
773
#endif
774
775
776
777
778
779
780
781
782
783
784
785
			if (ia32_cg_config.use_incdec) {
				if (is_disp_const(node, 1)) {
					dbg_info *const dbgi  = get_irn_dbg_info(node);
					ir_node  *const block = get_nodes_block(node);
					res = new_bd_ia32_Inc(dbgi, block, op1);
					goto exchange;
				} else if (is_disp_const(node, -1)) {
					dbg_info *const dbgi  = get_irn_dbg_info(node);
					ir_node  *const block = get_nodes_block(node);
					res = new_bd_ia32_Dec(dbgi, block, op1);
					goto exchange;
				}
786
			}
787
788
			ir_graph          *const irg  = get_irn_irg(node);
			ia32_attr_t const *const attr = get_ia32_attr_const(node);
789
			op2 = ia32_create_Immediate_full(irg, &attr->am_imm);
790
791
		} else if (has_disp) {
			return; /* Lea has base, index and displacement. */
792
		}
793
794
795
796
797
798
799
800
801
802
803
804
805
806
		/* Lea base + index, base/index + disp -> Add. */
		dbg_info *const dbgi  = get_irn_dbg_info(node);
		ir_node  *const block = get_nodes_block(node);
		ir_graph *const irg   = get_irn_irg(node);
		ir_node  *const noreg = ia32_new_NoReg_gp(irg);
		ir_node  *const nomem = get_irg_no_mem(irg);
		res = new_bd_ia32_Add(dbgi, block, noreg, noreg, nomem, op1, op2);
		set_ia32_commutative(res);
	} else if (!has_2ops && !has_disp) {
		/* Lea index * scale -> Shl+Imm. */
		assert(op1 == index);
		dbg_info *const dbgi  = get_irn_dbg_info(node);
		ir_node  *const block = get_nodes_block(node);
		ir_graph *const irg   = get_irn_irg(node);
807
		ir_node  *const amt   = ia32_create_Immediate(irg, scale);
808
		res = new_bd_ia32_Shl(dbgi, block, op1, amt);
809
	} else {
810
		return; /* Lea has scaled index as well as base and/or displacement. */
811
812
813
	}

exchange:
814
815
	/* Replace the Lea by Add/Shl. */
	arch_set_irn_register(res, out_reg);
816
	SET_IA32_ORIG_NODE(res, node);
817
	sched_add_before(node, res);
818
	copy_mark(node, res);
Christoph Mallon's avatar
Christoph Mallon committed
819
	be_peephole_exchange(node, res);
820
821
}

822
823
824
/**
 * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
 */
825
static void peephole_ia32_ImulImm_split(ir_node *imul)
826
{
827
828
	/* Ignore, if no memory, imm form. */
	if (get_ia32_op_type(imul) != ia32_AddrModeS)
829
830
		return;
	/* we need a free register */
yb9976's avatar
yb9976 committed
831
	const arch_register_t *reg = get_free_gp_reg(get_irn_irg(imul));
832
833
834
835
	if (reg == NULL)
		return;

	/* fine, we can rebuild it */
yb9976's avatar
yb9976 committed
836
	ir_node *res = ia32_turn_back_am(imul);
837
	arch_set_irn_register(res, reg);
838
839
}

840
841
842
/**
 * Replace xorps r,r and xorpd r,r by pxor r,r
 */
843
static void peephole_ia32_xZero(ir_node *xorn)
Christoph Mallon's avatar