ia32_optimize.c 38.7 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
/*
Michael Beck's avatar
Michael Beck committed
2
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
Christian Würdig's avatar
Christian Würdig committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

Christian Würdig's avatar
Christian Würdig committed
20
/**
Christian Würdig's avatar
Christian Würdig committed
21
22
 * @file
 * @brief       Implements several optimizations for IA32.
23
 * @author      Matthias Braun, Christian Wuerdig
Christian Würdig's avatar
Christian Würdig committed
24
 * @version     $Id$
Christian Würdig's avatar
Christian Würdig committed
25
 */
26
#include "config.h"
27
28
29
30

#include "irnode.h"
#include "irprog_t.h"
#include "ircons.h"
31
#include "irtools.h"
32
#include "firm_types.h"
Christian Würdig's avatar
Christian Würdig committed
33
34
35
#include "iredges.h"
#include "tv.h"
#include "irgmod.h"
36
#include "irgwalk.h"
Christian Würdig's avatar
Christian Würdig committed
37
#include "height.h"
38
#include "irbitset.h"
39
#include "irprintf.h"
Matthias Braun's avatar
Matthias Braun committed
40
#include "error.h"
Christian Würdig's avatar
Christian Würdig committed
41

Christian Würdig's avatar
Christian Würdig committed
42
43
#include "../be_t.h"
#include "../beabi.h"
Christian Würdig's avatar
Christian Würdig committed
44
#include "../benode_t.h"
45
#include "../besched.h"
46
#include "../bepeephole.h"
47
48

#include "ia32_new_nodes.h"
49
#include "ia32_optimize.h"
50
#include "bearch_ia32_t.h"
Matthias Braun's avatar
Matthias Braun committed
51
#include "gen_ia32_regalloc_if.h"
52
#include "ia32_common_transform.h"
Christian Würdig's avatar
Christian Würdig committed
53
#include "ia32_transform.h"
Christian Würdig's avatar
Christian Würdig committed
54
#include "ia32_dbg_stat.h"
Christian Würdig's avatar
Christian Würdig committed
55
#include "ia32_util.h"
56
#include "ia32_architecture.h"
57

58
59
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

60
static ia32_code_gen_t *cg;
61

62
63
64
65
66
67
68
69
70
71
static void copy_mark(const ir_node *old, ir_node *new)
{
	if (is_ia32_is_reload(old))
		set_ia32_is_reload(new);
	if (is_ia32_is_spill(old))
		set_ia32_is_spill(new);
	if (is_ia32_is_remat(old))
		set_ia32_is_remat(new);
}

72
73
74
75
76
77
typedef enum produces_flag_t {
	produces_no_flag,
	produces_flag_zero,
	produces_flag_carry
} produces_flag_t;

Michael Beck's avatar
Michael Beck committed
78
/**
79
 * Return which usable flag the given node produces
Michael Beck's avatar
Michael Beck committed
80
81
 *
 * @param node  the node to check
82
 * @param pn    the projection number of the used result
Michael Beck's avatar
Michael Beck committed
83
 */
84
static produces_flag_t produces_test_flag(ir_node *node, int pn)
85
86
87
88
{
	ir_node                     *count;
	const ia32_immediate_attr_t *imm_attr;

Michael Beck's avatar
Michael Beck committed
89
	if (!is_ia32_irn(node))
90
		return produces_no_flag;
91

Michael Beck's avatar
Michael Beck committed
92
	switch (get_ia32_irn_opcode(node)) {
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
		case iro_ia32_Add:
		case iro_ia32_Adc:
		case iro_ia32_And:
		case iro_ia32_Or:
		case iro_ia32_Xor:
		case iro_ia32_Sub:
		case iro_ia32_Sbb:
		case iro_ia32_Neg:
		case iro_ia32_Inc:
		case iro_ia32_Dec:
			break;

		case iro_ia32_ShlD:
		case iro_ia32_ShrD:
			assert(n_ia32_ShlD_count == n_ia32_ShrD_count);
			count = get_irn_n(node, n_ia32_ShlD_count);
			goto check_shift_amount;

		case iro_ia32_Shl:
		case iro_ia32_Shr:
		case iro_ia32_Sar:
			assert(n_ia32_Shl_count == n_ia32_Shr_count
					&& n_ia32_Shl_count == n_ia32_Sar_count);
			count = get_irn_n(node, n_ia32_Shl_count);
Christoph Mallon's avatar
Christoph Mallon committed
117
check_shift_amount:
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
			/* when shift count is zero the flags are not affected, so we can only
			 * do this for constants != 0 */
			if (!is_ia32_Immediate(count))
				return produces_no_flag;

			imm_attr = get_ia32_immediate_attr_const(count);
			if (imm_attr->symconst != NULL)
				return produces_no_flag;
			if ((imm_attr->offset & 0x1f) == 0)
				return produces_no_flag;
			break;

		case iro_ia32_Mul:
			return pn == pn_ia32_Mul_res_high ?
				produces_flag_carry : produces_no_flag;

		default:
			return produces_no_flag;
136
	}
137
138
139

	return pn == pn_ia32_res ?
		produces_flag_zero : produces_no_flag;
140
141
}

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
/**
 * Replace Cmp(x, 0) by a Test(x, x)
 */
static void peephole_ia32_Cmp(ir_node *const node)
{
	ir_node                     *right;
	ia32_immediate_attr_t const *imm;
	dbg_info                    *dbgi;
	ir_node                     *block;
	ir_node                     *noreg;
	ir_node                     *nomem;
	ir_node                     *op;
	ia32_attr_t           const *attr;
	int                          ins_permuted;
	int                          cmp_unsigned;
	ir_node                     *test;
	arch_register_t       const *reg;
159
160
	ir_edge_t             const *edge;
	ir_edge_t             const *tmp;
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

	if (get_ia32_op_type(node) != ia32_Normal)
		return;

	right = get_irn_n(node, n_ia32_Cmp_right);
	if (!is_ia32_Immediate(right))
		return;

	imm = get_ia32_immediate_attr_const(right);
	if (imm->symconst != NULL || imm->offset != 0)
		return;

	dbgi         = get_irn_dbg_info(node);
	block        = get_nodes_block(node);
	noreg        = ia32_new_NoReg_gp(cg);
176
	nomem        = get_irg_no_mem(current_ir_graph);
177
178
179
180
181
182
	op           = get_irn_n(node, n_ia32_Cmp_left);
	attr         = get_irn_generic_attr(node);
	ins_permuted = attr->data.ins_permuted;
	cmp_unsigned = attr->data.cmp_unsigned;

	if (is_ia32_Cmp(node)) {
183
		test = new_bd_ia32_Test(dbgi, block, noreg, noreg, nomem,
184
185
		                        op, op, ins_permuted, cmp_unsigned);
	} else {
186
		test = new_bd_ia32_Test8Bit(dbgi, block, noreg, noreg, nomem,
187
188
189
190
		                            op, op, ins_permuted, cmp_unsigned);
	}
	set_ia32_ls_mode(test, get_ia32_ls_mode(node));

191
192
	reg = arch_irn_get_register(node, pn_ia32_Cmp_eflags);
	arch_irn_set_register(test, pn_ia32_Test_eflags, reg);
193

194
195
196
197
198
199
200
	foreach_out_edge_safe(node, edge, tmp) {
		ir_node *const user = get_edge_src_irn(edge);

		if (is_Proj(user))
			exchange(user, test);
	}

201
	sched_add_before(node, test);
202
	copy_mark(node, test);
203
204
205
	be_peephole_exchange(node, test);
}

Michael Beck's avatar
Michael Beck committed
206
207
/**
 * Peephole optimization for Test instructions.
208
209
 * - Remove the Test, if an appropriate flag was produced which is still live
 * - Change a Test(x, c) to 8Bit, if 0 <= c < 256 (3 byte shorter opcode)
Michael Beck's avatar
Michael Beck committed
210
 */
211
212
static void peephole_ia32_Test(ir_node *node)
{
213
214
	ir_node *left  = get_irn_n(node, n_ia32_Test_left);
	ir_node *right = get_irn_n(node, n_ia32_Test_right);
215
216
217
218

	assert(n_ia32_Test_left == n_ia32_Test8Bit_left
			&& n_ia32_Test_right == n_ia32_Test8Bit_right);

219
220
221
222
223
224
225
	if (left == right) { /* we need a test for 0 */
		ir_node         *block = get_nodes_block(node);
		int              pn    = pn_ia32_res;
		ir_node         *flags_proj;
		ir_mode         *flags_mode;
		ir_node         *schedpoint;
		const ir_edge_t *edge;
226

227
		if (get_nodes_block(left) != block)
228
229
			return;

230
231
232
233
		if (is_Proj(left)) {
			pn   = get_Proj_proj(left);
			left = get_Proj_pred(left);
		}
234

235
236
		/* happens rarely, but if it does code will panic' */
		if (is_ia32_Unknown_GP(left))
237
			return;
238
239
240
241
242
243
244
245
246
247
248
249

		/* walk schedule up and abort when we find left or some other node destroys
			 the flags */
		schedpoint = node;
		for (;;) {
			schedpoint = sched_prev(schedpoint);
			if (schedpoint == left)
				break;
			if (arch_irn_is(schedpoint, modify_flags))
				return;
			if (schedpoint == block)
				panic("couldn't find left");
250
251
		}

252
253
254
255
		/* make sure only Lg/Eq tests are used */
		foreach_out_edge(node, edge) {
			ir_node *user = get_edge_src_irn(edge);
			int      pnc  = get_ia32_condcode(user);
256

257
258
259
260
			if(pnc != pn_Cmp_Eq && pnc != pn_Cmp_Lg) {
				return;
			}
		}
261

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
		switch (produces_test_flag(left, pn)) {
			case produces_flag_zero:
				break;

			case produces_flag_carry:
				foreach_out_edge(node, edge) {
					ir_node *user = get_edge_src_irn(edge);
					int      pnc  = get_ia32_condcode(user);

					switch (pnc) {
						case pn_Cmp_Eq: pnc = pn_Cmp_Ge | ia32_pn_Cmp_unsigned; break;
						case pn_Cmp_Lg: pnc = pn_Cmp_Lt | ia32_pn_Cmp_unsigned; break;
						default: panic("unexpected pn");
					}
					set_ia32_condcode(user, pnc);
277
				}
278
				break;
279

280
281
282
			default:
				return;
		}
283

284
285
286
287
288
		if (get_irn_mode(left) != mode_T) {
			set_irn_mode(left, mode_T);

			/* If there are other users, reroute them to result proj */
			if (get_irn_n_edges(left) != 2) {
289
				ir_node *res = new_r_Proj(block, left, mode_Iu, pn_ia32_res);
290
291
292
293
294
295

				edges_reroute(left, res, current_ir_graph);
				/* Reattach the result proj to left */
				set_Proj_pred(res, left);
			}
		}
296

297
		flags_mode = ia32_reg_classes[CLASS_ia32_flags].mode;
298
		flags_proj = new_r_Proj(block, left, flags_mode, pn_ia32_flags);
299
		arch_set_irn_register(flags_proj, &ia32_flags_regs[REG_EFLAGS]);
300

301
		assert(get_irn_mode(node) != mode_T);
302

303
304
305
		be_peephole_exchange(node, flags_proj);
	} else if (is_ia32_Immediate(right)) {
		ia32_immediate_attr_t const *const imm = get_ia32_immediate_attr_const(right);
306
		unsigned                           offset;
307
308
309
310
311

		/* A test with a symconst is rather strange, but better safe than sorry */
		if (imm->symconst != NULL)
			return;

312
313
314
315
316
317
318
		offset = imm->offset;
		if (get_ia32_op_type(node) == ia32_AddrModeS) {
			ia32_attr_t *const attr = get_irn_generic_attr(node);

			if ((offset & 0xFFFFFF00) == 0) {
				/* attr->am_offs += 0; */
			} else if ((offset & 0xFFFF00FF) == 0) {
319
				ir_node *imm = ia32_create_Immediate(NULL, 0, offset >>  8);
320
321
322
				set_irn_n(node, n_ia32_Test_right, imm);
				attr->am_offs += 1;
			} else if ((offset & 0xFF00FFFF) == 0) {
323
				ir_node *imm = ia32_create_Immediate(NULL, 0, offset >> 16);
324
325
326
				set_irn_n(node, n_ia32_Test_right, imm);
				attr->am_offs += 2;
			} else if ((offset & 0x00FFFFFF) == 0) {
327
				ir_node *imm = ia32_create_Immediate(NULL, 0, offset >> 24);
328
329
330
331
332
333
				set_irn_n(node, n_ia32_Test_right, imm);
				attr->am_offs += 3;
			} else {
				return;
			}
		} else if (offset < 256) {
334
335
336
337
338
339
340
341
			arch_register_t const* const reg = arch_get_irn_register(left);

			if (reg != &ia32_gp_regs[REG_EAX] &&
					reg != &ia32_gp_regs[REG_EBX] &&
					reg != &ia32_gp_regs[REG_ECX] &&
					reg != &ia32_gp_regs[REG_EDX]) {
				return;
			}
342
343
		} else {
			return;
344
345
346
347
348
349
		}

		/* Technically we should build a Test8Bit because of the register
		 * constraints, but nobody changes registers at this point anymore. */
		set_ia32_ls_mode(node, mode_Bu);
	}
350
351
}

352
353
354
355
356
357
/**
 * AMD Athlon works faster when RET is not destination of
 * conditional jump or directly preceded by other jump instruction.
 * Can be avoided by placing a Rep prefix before the return.
 */
static void peephole_ia32_Return(ir_node *node) {
Michael Beck's avatar
Michael Beck committed
358
	ir_node *block, *irn;
359
360
361
362
363
364
365
366

	if (!ia32_cg_config.use_pad_return)
		return;

	block = get_nodes_block(node);

	/* check if this return is the first on the block */
	sched_foreach_reverse_from(node, irn) {
367
		switch (get_irn_opcode(irn)) {
368
369
370
		case beo_Return:
			/* the return node itself, ignore */
			continue;
371
372
		case iro_Start:
		case beo_RegParams:
373
		case beo_Barrier:
374
			/* ignore no code generated */
375
376
377
378
379
380
			continue;
		case beo_IncSP:
			/* arg, IncSP 0 nodes might occur, ignore these */
			if (be_get_IncSP_offset(irn) == 0)
				continue;
			return;
381
382
		case iro_Phi:
			continue;
383
384
385
386
		default:
			return;
		}
	}
387

388
	/* ensure, that the 3 byte return is generated */
389
	be_Return_set_emit_pop(node, 1);
390
391
392
}

/* only optimize up to 48 stores behind IncSPs */
393
#define MAXPUSH_OPTIMIZE	48
394
395

/**
Michael Beck's avatar
Michael Beck committed
396
 * Tries to create Push's from IncSP, Store combinations.
397
398
 * The Stores are replaced by Push's, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
399
 */
400
static void peephole_IncSP_Store_to_push(ir_node *irn)
401
{
402
403
404
405
406
407
408
409
410
411
412
413
	int              i;
	int              maxslot;
	int              inc_ofs;
	ir_node         *node;
	ir_node         *stores[MAXPUSH_OPTIMIZE];
	ir_node         *block;
	ir_graph        *irg;
	ir_node         *curr_sp;
	ir_mode         *spmode;
	ir_node         *first_push = NULL;
	ir_edge_t const *edge;
	ir_edge_t const *next;
414
415
416
417
418

	memset(stores, 0, sizeof(stores));

	assert(be_is_IncSP(irn));

419
420
	inc_ofs = be_get_IncSP_offset(irn);
	if (inc_ofs < 4)
421
422
423
424
		return;

	/*
	 * We first walk the schedule after the IncSP node as long as we find
425
	 * suitable Stores that could be transformed to a Push.
426
427
428
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
429
430
	maxslot = -1;
	for (node = sched_next(irn); !sched_is_end(node); node = sched_next(node)) {
431
		ir_node *mem;
432
		int offset;
433
434
		int storeslot;

435
436
		/* it has to be a Store */
		if (!is_ia32_Store(node))
437
438
			break;

439
440
		/* it has to use our sp value */
		if (get_irn_n(node, n_ia32_base) != irn)
441
			continue;
442
		/* Store has to be attached to NoMem */
443
		mem = get_irn_n(node, n_ia32_mem);
444
		if (!is_NoMem(mem))
445
446
			continue;

447
448
		/* unfortunately we can't support the full AMs possible for push at the
		 * moment. TODO: fix this */
449
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
450
451
			break;

452
		offset = get_ia32_am_offs_int(node);
453
454
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);
455

456
457
		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
458
459
			break;

460
		if (inc_ofs - 4 < offset || offset >= MAXPUSH_OPTIMIZE * 4)
461
462
463
464
465
			continue;
		storeslot = offset >> 2;

		/* storing into the same slot twice is bad (and shouldn't happen...) */
		if (stores[storeslot] != NULL)
466
467
468
			break;

		stores[storeslot] = node;
469
470
		if (storeslot > maxslot)
			maxslot = storeslot;
471
472
	}

473
474
475
476
477
478
	curr_sp = irn;

	for (i = -1; i < maxslot; ++i) {
		if (stores[i + 1] == NULL)
			break;
	}
479

480
481
482
483
	/* walk through the Stores and create Pushs for them */
	block  = get_nodes_block(irn);
	spmode = get_irn_mode(irn);
	irg    = cg->irg;
484
	for (; i >= 0; --i) {
485
486
		const arch_register_t *spreg;
		ir_node *push;
487
		ir_node *val, *mem, *mem_proj;
488
		ir_node *store = stores[i];
489
		ir_node *noreg = ia32_new_NoReg_gp(cg);
490

491
492
		val = get_irn_n(store, n_ia32_unary_op);
		mem = get_irn_n(store, n_ia32_mem);
493
		spreg = arch_get_irn_register(curr_sp);
494

495
		push = new_bd_ia32_Push(get_irn_dbg_info(store), block, noreg, noreg, mem, val, curr_sp);
496
		copy_mark(store, push);
497

498
499
500
		if (first_push == NULL)
			first_push = push;

501
		sched_add_after(skip_Proj(curr_sp), push);
502

503
		/* create stackpointer Proj */
504
		curr_sp = new_r_Proj(block, push, spmode, pn_ia32_Push_stack);
505
		arch_set_irn_register(curr_sp, spreg);
Michael Beck's avatar
Michael Beck committed
506

507
		/* create memory Proj */
508
		mem_proj = new_r_Proj(block, push, mode_M, pn_ia32_Push_M);
509

510
		/* use the memproj now */
511
		be_peephole_exchange(store, mem_proj);
Matthias Braun's avatar
bugfix    
Matthias Braun committed
512

513
		inc_ofs -= 4;
514
515
	}

516
517
518
519
520
521
522
523
524
525
	foreach_out_edge_safe(irn, edge, next) {
		ir_node *const src = get_edge_src_irn(edge);
		int      const pos = get_edge_src_pos(edge);

		if (src == first_push)
			continue;

		set_irn_n(src, pos, curr_sp);
	}

526
	be_set_IncSP_offset(irn, inc_ofs);
527
528
}

529
530
531
532
533
534
#if 0
static void peephole_store_incsp(ir_node *store)
{
	dbg_info *dbgi;
	ir_node  *node;
	ir_node  *block;
535
	ir_node  *noreg;
536
537
538
	ir_node  *mem;
	ir_node  *push;
	ir_node  *val;
539
540
	ir_node  *base;
	ir_node  *index;
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
	ir_node  *am_base = get_irn_n(store, n_ia32_Store_base);
	if (!be_is_IncSP(am_base)
			|| get_nodes_block(am_base) != get_nodes_block(store))
		return;
	mem = get_irn_n(store, n_ia32_Store_mem);
	if (!is_ia32_NoReg_GP(get_irn_n(store, n_ia32_Store_index))
			|| !is_NoMem(mem))
		return;

	int incsp_offset = be_get_IncSP_offset(am_base);
	if (incsp_offset <= 0)
		return;

	/* we have to be at offset 0 */
	int my_offset = get_ia32_am_offs_int(store);
	if (my_offset != 0) {
		/* TODO here: find out wether there is a store with offset 0 before
		 * us and wether we can move it down to our place */
		return;
	}
	ir_mode *ls_mode = get_ia32_ls_mode(store);
	int my_store_size = get_mode_size_bytes(ls_mode);

	if (my_offset + my_store_size > incsp_offset)
		return;

	/* correctness checking:
		- noone else must write to that stackslot
		    (because after translation incsp won't allocate it anymore)
	*/
	sched_foreach_reverse_from(store, node) {
		int i, arity;

		if (node == am_base)
			break;

		/* make sure noone else can use the space on the stack */
		arity = get_irn_arity(node);
		for (i = 0; i < arity; ++i) {
			ir_node *pred = get_irn_n(node, i);
			if (pred != am_base)
				continue;

			if (i == n_ia32_base &&
					(get_ia32_op_type(node) == ia32_AddrModeS
					 || get_ia32_op_type(node) == ia32_AddrModeD)) {
				int      node_offset  = get_ia32_am_offs_int(node);
				ir_mode *node_ls_mode = get_ia32_ls_mode(node);
589
				int      node_size    = get_mode_size_bytes(node_ls_mode);
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
				/* overlapping with our position? abort */
				if (node_offset < my_offset + my_store_size
						&& node_offset + node_size >= my_offset)
					return;
				/* otherwise it's fine */
				continue;
			}

			/* strange use of esp: abort */
			return;
		}
	}

	/* all ok, change to push */
	dbgi  = get_irn_dbg_info(store);
	block = get_nodes_block(store);
	noreg = ia32_new_NoReg_gp(cg);
607
	val   = get_irn_n(store, n_ia32_Store_val);
608

609
	push  = new_bd_ia32_Push(dbgi, block, noreg, noreg, mem,
610
611
612
613
614

	create_push(dbgi, current_ir_graph, block, am_base, store);
}
#endif

615
616
617
/**
 * Return true if a mode can be stored in the GP register set
 */
618
static inline int mode_needs_gp_reg(ir_mode *mode) {
619
620
621
622
623
624
625
        if (mode == mode_fpcw)
                return 0;
        if (get_mode_size_bits(mode) > 32)
                return 0;
        return mode_is_int(mode) || mode_is_reference(mode) || mode == mode_b;
}

626
627
628
629
630
631
632
633
/**
 * Tries to create Pops from Load, IncSP combinations.
 * The Loads are replaced by Pops, the IncSP is modified
 * (possibly into IncSP 0, but not removed).
 */
static void peephole_Load_IncSP_to_pop(ir_node *irn)
{
	const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
634
	int      i, maxslot, inc_ofs, ofs;
635
636
637
638
	ir_node  *node, *pred_sp, *block;
	ir_node  *loads[MAXPUSH_OPTIMIZE];
	ir_graph *irg;
	unsigned regmask = 0;
639
	unsigned copymask = ~0;
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658

	memset(loads, 0, sizeof(loads));
	assert(be_is_IncSP(irn));

	inc_ofs = -be_get_IncSP_offset(irn);
	if (inc_ofs < 4)
		return;

	/*
	 * We first walk the schedule before the IncSP node as long as we find
	 * suitable Loads that could be transformed to a Pop.
	 * We save them into the stores array which is sorted by the frame offset/4
	 * attached to the node
	 */
	maxslot = -1;
	pred_sp = be_get_IncSP_pred(irn);
	for (node = sched_prev(irn); !sched_is_end(node); node = sched_prev(node)) {
		int offset;
		int loadslot;
659
		const arch_register_t *sreg, *dreg;
660
661
662
663

		/* it has to be a Load */
		if (!is_ia32_Load(node)) {
			if (be_is_Copy(node)) {
664
				if (!mode_needs_gp_reg(get_irn_mode(node))) {
665
666
667
					/* not a GP copy, ignore */
					continue;
				}
668
669
				dreg = arch_get_irn_register(node);
				sreg = arch_get_irn_register(be_get_Copy_op(node));
670
671
672
673
				if (regmask & copymask & (1 << sreg->index)) {
					break;
				}
				if (regmask & copymask & (1 << dreg->index)) {
674
675
					break;
				}
676
677
678
679
				/* we CAN skip Copies if neither the destination nor the source
				 * is not in our regmask, ie none of our future Pop will overwrite it */
				regmask |= (1 << dreg->index) | (1 << sreg->index);
				copymask &= ~((1 << dreg->index) | (1 << sreg->index));
680
681
682
683
684
685
				continue;
			}
			break;
		}

		/* we can handle only GP loads */
686
		if (!mode_needs_gp_reg(get_ia32_ls_mode(node)))
687
688
689
			continue;

		/* it has to use our predecessor sp value */
690
691
692
693
694
		if (get_irn_n(node, n_ia32_base) != pred_sp) {
			/* it would be ok if this load does not use a Pop result,
			 * but we do not check this */
			break;
		}
695
696

		/* should have NO index */
697
		if (!is_ia32_NoReg_GP(get_irn_n(node, n_ia32_index)))
698
699
700
701
702
703
704
705
706
707
708
709
			break;

		offset = get_ia32_am_offs_int(node);
		/* we should NEVER access uninitialized stack BELOW the current SP */
		assert(offset >= 0);

		/* storing at half-slots is bad */
		if ((offset & 3) != 0)
			break;

		if (offset < 0 || offset >= MAXPUSH_OPTIMIZE * 4)
			continue;
710
711
712
		/* ignore those outside the possible windows */
		if (offset > inc_ofs - 4)
			continue;
713
714
715
716
717
718
		loadslot = offset >> 2;

		/* loading from the same slot twice is bad (and shouldn't happen...) */
		if (loads[loadslot] != NULL)
			break;

719
		dreg = arch_irn_get_register(node, pn_ia32_Load_res);
720
721
722
723
724
725
726
727
728
729
730
731
732
733
		if (regmask & (1 << dreg->index)) {
			/* this register is already used */
			break;
		}
		regmask |= 1 << dreg->index;

		loads[loadslot] = node;
		if (loadslot > maxslot)
			maxslot = loadslot;
	}

	if (maxslot < 0)
		return;

734
	/* find the first slot */
735
736
737
738
739
740
	for (i = maxslot; i >= 0; --i) {
		ir_node *load = loads[i];

		if (load == NULL)
			break;
	}
741

742
743
744
	ofs = inc_ofs - (maxslot + 1) * 4;
	inc_ofs = (i+1) * 4;

745
746
747
	/* create a new IncSP if needed */
	block = get_nodes_block(irn);
	irg   = cg->irg;
748
	if (inc_ofs > 0) {
749
		pred_sp = be_new_IncSP(esp, block, pred_sp, -inc_ofs, be_get_IncSP_align(irn));
750
751
752
		sched_add_before(irn, pred_sp);
	}

753
	/* walk through the Loads and create Pops for them */
754
755
756
757
758
759
760
	for (++i; i <= maxslot; ++i) {
		ir_node *load = loads[i];
		ir_node *mem, *pop;
		const ir_edge_t *edge, *tmp;
		const arch_register_t *reg;

		mem = get_irn_n(load, n_ia32_mem);
761
		reg = arch_irn_get_register(load, pn_ia32_Load_res);
762

763
		pop = new_bd_ia32_Pop(get_irn_dbg_info(load), block, mem, pred_sp);
764
		arch_irn_set_register(pop, pn_ia32_Load_res, reg);
765

766
767
		copy_mark(load, pop);

768
		/* create stackpointer Proj */
769
		pred_sp = new_r_Proj(block, pop, mode_Iu, pn_ia32_Pop_stack);
770
		arch_set_irn_register(pred_sp, esp);
771
772
773
774
775
776
777
778
779
780
781
782
783
784

		sched_add_before(irn, pop);

		/* rewire now */
		foreach_out_edge_safe(load, edge, tmp) {
			ir_node *proj = get_edge_src_irn(edge);

			set_Proj_pred(proj, pop);
		}

		/* we can remove the Load now */
		sched_remove(load);
		kill_node(load);
	}
785

786
	be_set_IncSP_offset(irn, -ofs);
787
788
789
790
	be_set_IncSP_pred(irn, pred_sp);
}


791
792
793
/**
 * Find a free GP register if possible, else return NULL.
 */
794
static const arch_register_t *get_free_gp_reg(void)
795
{
796
797
798
799
800
801
802
803
804
	int i;

	for(i = 0; i < N_ia32_gp_REGS; ++i) {
		const arch_register_t *reg = &ia32_gp_regs[i];
		if(arch_register_type_is(reg, ignore))
			continue;

		if(be_peephole_get_value(CLASS_ia32_gp, i) == NULL)
			return &ia32_gp_regs[i];
805
	}
806
807
808
809

	return NULL;
}

810
811
812
813
814
815
816
817
818
819
820
/**
 * Creates a Pop instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
821
static ir_node *create_pop(dbg_info *dbgi, ir_node *block,
822
                           ir_node *stack, ir_node *schedpoint,
823
                           const arch_register_t *reg)
824
825
826
827
828
829
830
{
	const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
	ir_node *pop;
	ir_node *keep;
	ir_node *val;
	ir_node *in[1];

831
	pop   = new_bd_ia32_Pop(dbgi, block, new_NoMem(), stack);
832

833
	stack = new_r_Proj(block, pop, mode_Iu, pn_ia32_Pop_stack);
834
	arch_set_irn_register(stack, esp);
835
	val   = new_r_Proj(block, pop, mode_Iu, pn_ia32_Pop_res);
836
	arch_set_irn_register(val, reg);
837
838
839
840

	sched_add_before(schedpoint, pop);

	in[0] = val;
841
	keep = be_new_Keep(&ia32_reg_classes[CLASS_ia32_gp], block, 1, in);
842
843
844
845
846
	sched_add_before(schedpoint, keep);

	return stack;
}

847
848
849
850
851
852
853
854
855
856
857
/**
 * Creates a Push instruction before the given schedule point.
 *
 * @param dbgi        debug info
 * @param block       the block
 * @param stack       the previous stack value
 * @param schedpoint  the new node is added before this node
 * @param reg         the register to pop
 *
 * @return the new stack value
 */
858
static ir_node *create_push(dbg_info *dbgi, ir_node *block,
859
                            ir_node *stack, ir_node *schedpoint)
860
861
862
{
	const arch_register_t *esp = &ia32_gp_regs[REG_ESP];

863
864
	ir_node *val   = ia32_new_Unknown_gp(cg);
	ir_node *noreg = ia32_new_NoReg_gp(cg);
865
	ir_node *nomem = new_NoMem();
866
	ir_node *push  = new_bd_ia32_Push(dbgi, block, noreg, noreg, nomem, val, stack);
867
868
	sched_add_before(schedpoint, push);

869
	stack = new_r_Proj(block, push, mode_Iu, pn_ia32_Push_stack);
870
	arch_set_irn_register(stack, esp);
871
872
873
874
875

	return stack;
}

/**
Michael Beck's avatar
Michael Beck committed
876
 * Optimize an IncSp by replacing it with Push/Pop.
877
 */
878
879
880
881
882
883
884
885
886
887
static void peephole_be_IncSP(ir_node *node)
{
	const arch_register_t *esp = &ia32_gp_regs[REG_ESP];
	const arch_register_t *reg;
	dbg_info              *dbgi;
	ir_node               *block;
	ir_node               *stack;
	int                    offset;

	/* first optimize incsp->incsp combinations */
888
	node = be_peephole_IncSP_IncSP(node);
889

890
891
892
	/* transform IncSP->Store combinations to Push where possible */
	peephole_IncSP_Store_to_push(node);

893
894
895
	/* transform Load->IncSP combinations to Pop where possible */
	peephole_Load_IncSP_to_pop(node);

896
	if (arch_get_irn_register(node) != esp)
897
898
		return;

899
900
	/* replace IncSP -4 by Pop freereg when possible */
	offset = be_get_IncSP_offset(node);
901
902
903
904
	if ((offset != -8 || ia32_cg_config.use_add_esp_8) &&
	    (offset != -4 || ia32_cg_config.use_add_esp_4) &&
	    (offset != +4 || ia32_cg_config.use_sub_esp_4) &&
	    (offset != +8 || ia32_cg_config.use_sub_esp_8))
905
906
		return;

907
908
909
	if (offset < 0) {
		/* we need a free register for pop */
		reg = get_free_gp_reg();
910
		if (reg == NULL)
911
			return;
912

913
914
915
916
		dbgi  = get_irn_dbg_info(node);
		block = get_nodes_block(node);
		stack = be_get_IncSP_pred(node);

917
		stack = create_pop(dbgi, block, stack, node, reg);
918

919
		if (offset == -8) {
920
			stack = create_pop(dbgi, block, stack, node, reg);
921
		}
922
	} else {
923
924
925
		dbgi  = get_irn_dbg_info(node);
		block = get_nodes_block(node);
		stack = be_get_IncSP_pred(node);
926
		stack = create_push(dbgi, block, stack, node);
927
928

		if (offset == +8) {
929
			stack = create_push(dbgi, block, stack, node);
930
		}
931
932
	}

Christoph Mallon's avatar
Christoph Mallon committed
933
	be_peephole_exchange(node, stack);
934
935
}

Michael Beck's avatar
Michael Beck committed
936
937
938
/**
 * Peephole optimisation for ia32_Const's
 */
939
static void peephole_ia32_Const(ir_node *node)
940
{
941
942
943
944
945
946
947
	const ia32_immediate_attr_t *attr = get_ia32_immediate_attr_const(node);
	const arch_register_t       *reg;
	ir_node                     *block;
	dbg_info                    *dbgi;
	ir_node                     *xor;

	/* try to transform a mov 0, reg to xor reg reg */
948
949
950
	if (attr->offset != 0 || attr->symconst != NULL)
		return;
	if (ia32_cg_config.use_mov_0)
951
		return;
Michael Beck's avatar
Michael Beck committed
952
	/* xor destroys the flags, so no-one must be using them */
953
	if (be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
954
		return;
955

956
	reg = arch_get_irn_register(node);
957
958
959
	assert(be_peephole_get_reg_value(reg) == NULL);

	/* create xor(produceval, produceval) */
960
961
962
	block = get_nodes_block(node);
	dbgi  = get_irn_dbg_info(node);
	xor   = new_bd_ia32_Xor0(dbgi, block);
963
	arch_set_irn_register(xor, reg);
964
965

	sched_add_before(node, xor);
966

967
	copy_mark(node, xor);
Christoph Mallon's avatar
Christoph Mallon committed
968
	be_peephole_exchange(node, xor);
969
}
970

971
static inline int is_noreg(ia32_code_gen_t *cg, const ir_node *node)
972
973
974
975
{
	return node == cg->noreg_gp;
}

976
static ir_node *create_immediate_from_int(int val)
977
978
979
{
	ir_graph *irg         = current_ir_graph;
	ir_node  *start_block = get_irg_start_block(irg);
980
981
	ir_node  *immediate
		= new_bd_ia32_Immediate(NULL, start_block, NULL, 0, 0, val);
982
	arch_set_irn_register(immediate, &ia32_gp_regs[REG_GP_NOREG]);
983
984
985
986

	return immediate;
}

987
static ir_node *create_immediate_from_am(const ir_node *node)
988
989
990
991
{
	ir_node   *block   = get_nodes_block(node);
	int        offset  = get_ia32_am_offs_int(node);
	int        sc_sign = is_ia32_am_sc_sign(node);
992
993
	const ia32_attr_t *attr = get_ia32_attr_const(node);
	int        sc_no_pic_adjust = attr->data.am_sc_no_pic_adjust;
994
995
996
	ir_entity *entity  = get_ia32_am_sc(node);
	ir_node   *res;

997
998
	res = new_bd_ia32_Immediate(NULL, block, entity, sc_sign, sc_no_pic_adjust,
	                            offset);
999
	arch_set_irn_register(res, &ia32_gp_regs[REG_GP_NOREG]);
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
	return res;
}

static int is_am_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
	ir_entity *entity  = get_ia32_am_sc(node);

	return offset == 1 && entity == NULL;
}

static int is_am_minus_one(const ir_node *node)
{
	int        offset  = get_ia32_am_offs_int(node);
	ir_entity *entity  = get_ia32_am_sc(node);

	return offset == -1 && entity == NULL;
}

/**
 * Transforms a LEA into an Add or SHL if possible.
 */
static void peephole_ia32_Lea(ir_node *node)
{
	ir_node               *base;
	ir_node               *index;
	const arch_register_t *base_reg;
	const arch_register_t *index_reg;
	const arch_register_t *out_reg;
	int                    scale;
	int                    has_immediates;
	ir_node               *op1;
	ir_node               *op2;
	dbg_info              *dbgi;
	ir_node               *block;
	ir_node               *res;
	ir_node               *noreg;
	ir_node               *nomem;

	assert(is_ia32_Lea(node));

Christoph Mallon's avatar
Christoph Mallon committed
1041
	/* we can only do this if it is allowed to clobber the flags */
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
	if(be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
		return;

	base  = get_irn_n(node, n_ia32_Lea_base);
	index = get_irn_n(node, n_ia32_Lea_index);

	if(is_noreg(cg, base)) {
		base     = NULL;
		base_reg = NULL;
	} else {
1052
		base_reg = arch_get_irn_register(base);
1053
1054
1055
1056
1057
	}
	if(is_noreg(cg, index)) {
		index     = NULL;
		index_reg = NULL;
	} else {
1058
		index_reg = arch_get_irn_register(index);
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
	}

	if(base == NULL && index == NULL) {
		/* we shouldn't construct these in the first place... */
#ifdef DEBUG_libfirm
		ir_fprintf(stderr, "Optimisation warning: found immediate only lea\n");
#endif
		return;
	}

1069
	out_reg = arch_get_irn_register(node);
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
	scale   = get_ia32_am_scale(node);
	assert(!is_ia32_need_stackent(node) || get_ia32_frame_ent(node) != NULL);
	/* check if we have immediates values (frame entities should already be
	 * expressed in the offsets) */
	if(get_ia32_am_offs_int(node) != 0 || get_ia32_am_sc(node) != NULL) {
		has_immediates = 1;
	} else {
		has_immediates = 0;
	}

	/* we can transform leas where the out register is the same as either the
	 * base or index register back to an Add or Shl */
	if(out_reg == base_reg) {
		if(index == NULL) {
#ifdef DEBUG_libfirm
			if(!has_immediates) {
				ir_fprintf(stderr, "Optimisation warning: found lea which is "
				           "just a copy\n");
			}
#endif
			op1 = base;
			goto make_add_immediate;
		}
		if(scale == 0 && !has_immediates) {
			op1 = base;
			op2 = index;
			goto make_add;
		}
		/* can't create an add */
		return;
	} else if(out_reg == index_reg) {
		if(base == NULL) {
			if(has_immediates && scale == 0) {
				op1 = index;
				goto make_add_immediate;
			} else if(!has_immediates && scale > 0) {
				op1 = index;
1107
				op2 = create_immediate_from_int(scale);
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
				goto make_shl;
			} else if(!has_immediates) {
#ifdef DEBUG_libfirm
				ir_fprintf(stderr, "Optimisation warning: found lea which is "
				           "just a copy\n");
#endif
			}
		} else if(scale == 0 && !has_immediates) {
			op1 = index;
			op2 = base;
			goto make_add;
		}
		/* can't create an add */
		return;
	} else {
		/* can't create an add */
		return;
	}

make_add_immediate:
1128
	if(ia32_cg_config.use_incdec) {
1129
1130
1131
		if(is_am_one(node)) {
			dbgi  = get_irn_dbg_info(node);
			block = get_nodes_block(node);
1132
			res   = new_bd_ia32_Inc(dbgi, block, op1);
1133
			arch_set_irn_register(res, out_reg);
1134
1135
1136
1137
1138
			goto exchange;
		}
		if(is_am_minus_one(node)) {
			dbgi  = get_irn_dbg_info(node);
			block = get_nodes_block(node);
1139
			res   = new_bd_ia32_Dec(dbgi, block, op1);
1140
			arch_set_irn_register(res, out_reg);
1141
1142
1143
			goto exchange;
		}
	}
1144
	op2 = create_immediate_from_am(node);
1145
1146
1147
1148
1149
1150

make_add:
	dbgi  = get_irn_dbg_info(node);
	block = get_nodes_block(node);
	noreg = ia32_new_NoReg_gp(cg);
	nomem = new_NoMem();
1151
	res   = new_bd_ia32_Add(dbgi, block, noreg, noreg, nomem, op1, op2);
1152
	arch_set_irn_register(res, out_reg);
1153
1154
1155
1156
1157
1158
1159
1160
	set_ia32_commutative(res);
	goto exchange;

make_shl:
	dbgi  = get_irn_dbg_info(node);
	block = get_nodes_block(node);
	noreg = ia32_new_NoReg_gp(cg);
	nomem = new_NoMem();
1161
	res   = new_bd_ia32_Shl(dbgi, block, op1, op2);
1162
	arch_set_irn_register(res, out_reg);
1163
1164
1165
	goto exchange;

exchange:
1166
	SET_IA32_ORIG_NODE(res, node);
1167
1168
1169
1170
1171

	/* add new ADD/SHL to schedule */
	DBG_OPT_LEA2ADD(node, res);

	/* exchange the Add and the LEA */
1172
	sched_add_before(