belower.c 25.8 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

6
/**
Christian Würdig's avatar
Christian Würdig committed
7
 * @file
8
9
 * @brief       Performs lowering of perm nodes. Inserts copies to assure
 *              register constraints.
Christian Würdig's avatar
Christian Würdig committed
10
11
 * @author      Christian Wuerdig
 * @date        14.12.2005
12
 */
Christian Würdig's avatar
Christian Würdig committed
13
14
15
#include <stdlib.h>

#include "ircons.h"
Christian Würdig's avatar
Christian Würdig committed
16
#include "debug.h"
17
#include "xmalloc.h"
Christian Würdig's avatar
Christian Würdig committed
18
#include "irnodeset.h"
19
#include "irnodehashmap.h"
Christian Würdig's avatar
Christian Würdig committed
20
21
22
#include "irgmod.h"
#include "iredges_t.h"
#include "irgwalk.h"
23
#include "array.h"
24
25
#include "raw_bitset.h"
#include "adt/obstack.h"
26
#include "util.h"
Christian Würdig's avatar
Christian Würdig committed
27

28
#include "bearch.h"
29
#include "beirg.h"
30
#include "belower.h"
31
#include "benode.h"
32
#include "besched.h"
Christian Würdig's avatar
Christian Würdig committed
33
#include "bestat.h"
34
#include "bessaconstr.h"
Matthias Braun's avatar
Matthias Braun committed
35
#include "belive.h"
36

Christian Würdig's avatar
Christian Würdig committed
37
38
#undef KEEP_ALIVE_COPYKEEP_HACK

39
40
DEBUG_ONLY(static firm_dbg_module_t *dbg;)
DEBUG_ONLY(static firm_dbg_module_t *dbg_constr;)
Michael Beck's avatar
Michael Beck committed
41
DEBUG_ONLY(static firm_dbg_module_t *dbg_permmove;)
42

43
/** Associates an ir_node with its copy and CopyKeep. */
44
typedef struct {
Michael Beck's avatar
Michael Beck committed
45
	ir_nodeset_t copies; /**< all non-spillable copies of this irn */
Christian Würdig's avatar
Christian Würdig committed
46
	const arch_register_class_t *cls;
47
48
} op_copy_assoc_t;

Michael Beck's avatar
Michael Beck committed
49
/** Environment for constraints. */
50
typedef struct {
51
52
53
	ir_graph        *irg;
	ir_nodehashmap_t op_set;
	struct obstack   obst;
54
55
} constraint_env_t;

Michael Beck's avatar
Michael Beck committed
56
/** Lowering walker environment. */
57
typedef struct lower_env_t {
58
59
60
61
62
	bool             use_copies;
	struct obstack   obst;
	ir_nodehashmap_t live_regs; /**< maps Perm nodes to a raw bitset that
	                                 maps register indices to register state
	                                 at end of Perm's block (used=0, free=1) */
63
64
} lower_env_t;

Michael Beck's avatar
Michael Beck committed
65
/** Holds a Perm register pair. */
66
typedef struct reg_pair_t {
67
68
69
70
71
72
73
	const arch_register_t *in_reg;    /**< a perm IN register */
	ir_node               *in_node;   /**< the in node to which the register belongs */

	const arch_register_t *out_reg;   /**< a perm OUT register */
	ir_node               *out_node;  /**< the out node to which the register belongs */
} reg_pair_t;

74
75
static void set_reg_free(unsigned *free_regs, ir_node const *irn, bool const reg_is_free)
{
Matthias Braun's avatar
Matthias Braun committed
76
	if (!mode_is_data(get_irn_mode(irn)))
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
		return;
	arch_register_t const *reg = arch_get_irn_register(irn);
	if (reg_is_free) {
		rbitset_set(free_regs, reg->global_index);
	} else {
		rbitset_clear(free_regs, reg->global_index);
	}
}

/* Save the register situation at the end of the Perm's block, i.e. mark all
 * registers holding live values as used.
 * As we do this before we modify the graph, we might mark more registers
 * as used than really necessary.
 */
static void mark_live_nodes_registers(const ir_node *irn, lower_env_t *env)
{
	ir_node                     *block     = get_nodes_block(irn);
	ir_graph                    *irg       = get_irn_irg(irn);
	arch_register_class_t const *reg_class = arch_get_irn_register(get_irn_n(irn, 0))->reg_class;
	arch_env_t const            *arch_env  = be_get_irg_arch_env(irg);
	be_irg_t                    *birg      = be_birg_from_irg(irg);
	unsigned                     n_regs    = arch_env->n_registers;
	unsigned                    *free_regs = rbitset_duplicate_obstack_alloc(&env->obst, birg->allocatable_regs, n_regs);

	be_lv_t *lv = be_get_irg_liveness(irg);
	assert(lv->sets_valid && "Live sets are invalid");
	be_lv_foreach_cls(lv, block, be_lv_state_end, reg_class, live) {
		set_reg_free(free_regs, live, false);
	}

	ir_nodehashmap_insert(&env->live_regs, (ir_node*)irn, free_regs);
}

static void live_nodes_registers_walker(ir_node *irn, void *env)
{
	if (!be_is_Perm(irn))
		return;

	mark_live_nodes_registers(irn, (lower_env_t*)env);
}

static arch_register_t const *get_free_register(ir_node *const perm, lower_env_t *env)
{
	if (!env->use_copies)
		return NULL;

	ir_node                     *block     = get_nodes_block(perm);
	ir_graph                    *irg       = get_irn_irg(perm);
	arch_register_class_t const *reg_class = arch_get_irn_register(get_irn_n(perm, 0))->reg_class;
	arch_env_t const            *arch_env  = be_get_irg_arch_env(irg);
	unsigned                     n_regs    = arch_env->n_registers;
	unsigned                    *free_regs = (unsigned*)ir_nodehashmap_get(arch_register_t const, &env->live_regs, perm);

	sched_foreach_reverse(block, node) {
		if (is_Phi(node))
			break;

		/* If we later implement first the chains and then the cycles
		   of the Perm, we *cannot* regard the Perm's own outputs as
		   free registers. */
		bool const reg_is_free = perm != node;
		if (get_irn_mode(node) == mode_T) {
			foreach_out_edge(node, edge) {
				ir_node *proj = get_edge_src_irn(edge);
				set_reg_free(free_regs, proj, reg_is_free);
			}
		} else {
			set_reg_free(free_regs, node, reg_is_free);
		}

147
		foreach_irn_in(node, i, in) {
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
			set_reg_free(free_regs, in, false);
		}

		if (perm == node)
			break;
	}

	rbitset_foreach(free_regs, n_regs, free_idx) {
		arch_register_t const *free_reg = &arch_env->registers[free_idx];
		if (free_reg->reg_class != reg_class)
			continue;

		return free_reg;
	}

	return NULL;
}

yb9976's avatar
yb9976 committed
166
167
168
169
170
171
172
173
static bool is_same_value(const ir_node *a, const ir_node *b)
{
	return (be_is_Copy(a) && be_get_Copy_op(a) == b)
	    || (be_is_CopyKeep(a) && be_get_CopyKeep_op(a) == b)
	    || (be_is_Copy(b) && be_get_Copy_op(b) == a)
	    || (be_is_CopyKeep(b) && be_get_CopyKeep_op(b) == a);
}

174
175
176
177
/**
 * Lowers a perm node.  Resolves cycles and creates a bunch of
 * copy and swap operations to permute registers.
 *
178
179
180
 * @param perm        The perm node
 * @param env         The lowering environment, containing information on,
 *                    i.e., whether to use copies if free register available
181
 */
182
static void lower_perm_node(ir_node *const perm, lower_env_t *env)
183
{
184
185
	DBG((dbg, LEVEL_1, "lowering %+F\n", perm));

186
187
188
	bool const                         use_copies = env->use_copies;
	arch_register_class_t const *const cls        = arch_get_irn_register_req_out(perm, 0)->cls;
	unsigned                     const n_regs     = cls->n_regs;
189
	/* Registers used as inputs of the Perm. */
190
	unsigned                    *const inregs     = rbitset_alloca(n_regs);
191
	/* Map from register index to pair with this register as output. */
192
193
194
195
196
197
	reg_pair_t                 **const oregmap    = ALLOCANZ(reg_pair_t*, n_regs);
	size_t                       const arity      = get_irn_arity(perm);
	reg_pair_t                  *const pairs      = ALLOCAN(reg_pair_t, arity);
	reg_pair_t                        *pair       = pairs;
	arch_register_t const             *free_reg   = NULL;

198
199
200
201
202
203
204
205
206
207
	/* Collect all input-output pairs of the Perm. */
	foreach_out_edge_safe(perm, edge) {
		ir_node               *const out  = get_edge_src_irn(edge);
		unsigned               const pos  = get_Proj_proj(out);
		ir_node               *const in   = get_irn_n(perm, pos);
		arch_register_t const *const ireg = arch_get_irn_register(in);
		arch_register_t const *const oreg = arch_get_irn_register_out(perm, pos);

		if (ireg == oreg) {
			DBG((dbg, LEVEL_2, "%+F: removing equal perm register pair (%+F, %+F, %s)\n", perm, in, out, oreg->name));
208
209
210
			exchange(out, in);
			continue;
		}
211

212
		pair->in_reg   = ireg;
213
		pair->in_node  = in;
214
		pair->out_reg  = oreg;
215
		pair->out_node = out;
216

217
218
		oregmap[oreg->index] = pair++;
		rbitset_set(inregs, ireg->index);
219
220
	}

221
222
223
224
	if (pair == pairs) {
		DBG((dbg, LEVEL_1, "%+F is identity\n", perm));
		goto done;
	}
225

226
	DBG((dbg, LEVEL_1, "%+F has %d unresolved constraints\n", perm, (int)(pair - pairs)));
227

228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
	ir_node *const block = get_nodes_block(perm);
	/* Build Copy chains. */
	for (unsigned i = 0; i != n_regs; ++i) {
		if (rbitset_is_set(inregs, i))
			continue;
		/* Found end of chain, i.e. register which is written to, but not read.
		 * Generate copies by following the chain backwards. */
		unsigned k = i;
		for (reg_pair_t const *p; (p = oregmap[k]);) {
			oregmap[k] = NULL;
			ir_node *const copy = be_new_Copy(block, p->in_node);
			DBG((dbg, LEVEL_2, "%+F: inserting %+F for %+F from %s to %s\n", perm, copy, p->in_node, p->in_reg, p->out_reg));
			arch_set_irn_register(copy, p->out_reg);
			exchange(p->out_node, copy);
			sched_add_before(perm, copy);
243
244
245
246
247
248
249
250
251

			const unsigned new_k = p->in_reg->index;
			if (oregmap[new_k] == NULL) {
				/* The invariant of Perm nodes allows us to overwrite
				 * the first register in a chain with an arbitrary value. */
				free_reg = p->in_reg;
			}
			k = new_k;

252
			rbitset_clear(inregs, k);
Christian Würdig's avatar
Christian Würdig committed
253
		}
254
255
	}

256
257
258
	if (arity == 2 && !rbitset_is_empty(inregs, n_regs)) {
		DBG((dbg, LEVEL_1, "%+F is transposition\n", perm));
		return;
259
	}
260

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
	if (use_copies && free_reg == NULL) {
		free_reg = get_free_register(perm, env);
	}

	if (use_copies && free_reg != NULL) {
		/* Implement cycles using copies and the free register. */
		for (unsigned i = 0; i != n_regs; /* empty */) {
			if (!rbitset_is_set(inregs, i)) {
				++i;
				continue;
			}
			reg_pair_t *start = oregmap[i];

			ir_node *save_copy = be_new_Copy(block, start->in_node);
			arch_set_irn_register(save_copy, free_reg);
			sched_add_before(perm, save_copy);

			reg_pair_t *p = oregmap[start->in_reg->index];
			do {
				ir_node *copy = be_new_Copy(block, p->in_node);
				arch_set_irn_register(copy, p->out_reg);
				exchange(p->out_node, copy);
				sched_add_before(perm, copy);
				unsigned const in_idx = p->in_reg->index;
				rbitset_clear(inregs, in_idx);
				p = oregmap[in_idx];
			} while (p != start);

			rbitset_clear(inregs, start->in_reg->index);
			ir_node *restore_copy = be_new_Copy(block, save_copy);
			arch_set_irn_register(restore_copy, start->out_reg);
			exchange(start->out_node, restore_copy);
			sched_add_before(perm, restore_copy);
294
		}
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
	} else {
		/* Decompose cycles into transpositions.
		 *
		 * Use as many independent transpositions as possible and do not thread
		 * one value through all transpositions.
		 * I.e., for the first level of decomposition of a n-Perm do floor(n/2)
		 * transpositions. This puts floor(n/2) values into the right registers.
		 * Repeat this for all remaining values until all have the right
		 * register.
		 * This way no value is threaded through more than ceil(ld(n/2))
		 * transpositions (compared to one value being threaded through all
		 * transpositions using a naive decomposition).
		 *
		 * good:            bad:
		 * r1 r2 r3 r4 r5   r1 r2 r3 r4 r5
		 * +---+ +---+      +---+
		 *    +------+         +---+
		 *          +---+         +---+
		 * r2 r3 r4 r5 r1            +---+
		 *                  r2 r3 r4 r5 r1
		 */
		for (unsigned i = 0; i != n_regs;) {
			if (!rbitset_is_set(inregs, i)) {
				++i;
				continue;
			}
			reg_pair_t             *p     = oregmap[i];
			reg_pair_t const *const start = p;
			ir_mode          *const mode  = get_irn_mode(p->out_node);
			for (;;) {
				reg_pair_t const *const q = oregmap[p->in_reg->index];
				if (q == start)
					break;

				rbitset_clear(inregs, q->out_reg->index);
				p->in_reg = q->in_reg;

yb9976's avatar
yb9976 committed
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
				ir_node *new_p;
				ir_node *new_q;
				if (is_same_value(p->in_node, q->in_node)) {
					new_p = q->in_node;
					new_q = p->in_node;
				} else {
					ir_node *const in[]  = { p->in_node, q->in_node };
					ir_node *const xchg  = be_new_Perm(cls, block, ARRAY_SIZE(in), in);
					DBG((dbg, LEVEL_2, "%+F: inserting %+F for %+F (%s) and %+F (%s)\n", perm, xchg, in[0], arch_get_irn_register(in[0]), in[1], arch_get_irn_register(in[1])));
					new_p = new_r_Proj(xchg, mode, 0);
					new_q = new_r_Proj(xchg, mode, 1);
					arch_set_irn_register_out(xchg, 0, q->in_reg);
					arch_set_irn_register_out(xchg, 1, q->out_reg);
					sched_add_before(perm, xchg);
				}
				p->in_node = new_p;
				exchange(q->out_node, new_q);
349
350
351
352
353
354
355
356

				p = oregmap[q->in_reg->index];
				if (p == start) {
					if (start->in_reg == start->out_reg) {
						rbitset_clear(inregs, q->in_reg->index);
						exchange(start->out_node, start->in_node);
					}
					break;
357
358
359
				}
			}
		}
360
	}
361

362
363
364
365
done:
	sched_remove(perm);
	kill_node(perm);
}
366

367
368
static int has_irn_users(const ir_node *irn)
{
Michael Beck's avatar
Michael Beck committed
369
	return get_irn_out_edge_first_kind(irn, EDGE_KIND_NORMAL) != 0;
Christian Würdig's avatar
Christian Würdig committed
370
371
}

372
373
374
static ir_node *find_copy(ir_node *irn, ir_node *op)
{
	ir_node *cur_node;
Christian Würdig's avatar
Christian Würdig committed
375

376
377
378
379
	for (cur_node = irn;;) {
		cur_node = sched_prev(cur_node);
		if (! be_is_Copy(cur_node))
			return NULL;
380
		if (be_get_Copy_op(cur_node) == op && arch_irn_is(cur_node, dont_spill))
Christian Würdig's avatar
Christian Würdig committed
381
382
383
384
			return cur_node;
	}
}

385
386
static void gen_assure_different_pattern(ir_node *irn, ir_node *other_different, constraint_env_t *env)
{
387
	ir_nodehashmap_t            *op_set;
388
389
390
	ir_node                     *block;
	const arch_register_class_t *cls;
	ir_node                     *keep, *cpy;
391
	op_copy_assoc_t             *entry;
Christian Würdig's avatar
Christian Würdig committed
392

393
394
	arch_register_req_t const *const req = arch_get_irn_register_req(other_different);
	if (arch_register_req_is(req, ignore) ||
Matthias Braun's avatar
Matthias Braun committed
395
396
			!mode_is_data(get_irn_mode(other_different))) {
		DB((dbg_constr, LEVEL_1, "ignore constraint for %+F because other_irn is ignore or not a data node\n", irn));
Christian Würdig's avatar
Christian Würdig committed
397
398
399
		return;
	}

400
401
	op_set = &env->op_set;
	block  = get_nodes_block(irn);
402
	cls    = req->cls;
403

Christian Würdig's avatar
Christian Würdig committed
404
405
406
407
408
	/* Make a not spillable copy of the different node   */
	/* this is needed because the different irn could be */
	/* in block far far away                             */
	/* The copy is optimized later if not needed         */

Michael Beck's avatar
Michael Beck committed
409
	/* check if already exists such a copy in the schedule immediately before */
410
	cpy = find_copy(skip_Proj(irn), other_different);
Christian Würdig's avatar
Christian Würdig committed
411
	if (! cpy) {
412
		cpy = be_new_Copy(block, other_different);
413
		arch_set_irn_flags(cpy, arch_irn_flag_dont_spill);
Matthias Braun's avatar
Matthias Braun committed
414
		DB((dbg_constr, LEVEL_1, "created non-spillable %+F for value %+F\n", cpy, other_different));
415
	} else {
Matthias Braun's avatar
Matthias Braun committed
416
		DB((dbg_constr, LEVEL_1, "using already existing %+F for value %+F\n", cpy, other_different));
Christian Würdig's avatar
Christian Würdig committed
417
	}
Christian Würdig's avatar
Christian Würdig committed
418
419
420

	/* Add the Keep resp. CopyKeep and reroute the users */
	/* of the other_different irn in case of CopyKeep.   */
Michael Beck's avatar
Michael Beck committed
421
	if (has_irn_users(other_different)) {
422
		keep = be_new_CopyKeep_single(block, cpy, irn);
423
		be_node_set_reg_class_in(keep, 1, cls);
424
425
426
427
428
	} else {
		ir_node *in[2];

		in[0] = irn;
		in[1] = cpy;
429
		keep = be_new_Keep(block, 2, in);
Michael Beck's avatar
Michael Beck committed
430
	}
Christian Würdig's avatar
Christian Würdig committed
431

Matthias Braun's avatar
Matthias Braun committed
432
	DB((dbg_constr, LEVEL_1, "created %+F(%+F, %+F)\n\n", keep, irn, cpy));
Christian Würdig's avatar
Christian Würdig committed
433

434
435
	/* insert copy and keep into schedule */
	assert(sched_is_scheduled(irn) && "need schedule to assure constraints");
Christian Würdig's avatar
Christian Würdig committed
436
	if (! sched_is_scheduled(cpy))
437
		sched_add_before(skip_Proj(irn), cpy);
438
	sched_add_after(skip_Proj(irn), keep);
439

440
	/* insert the other different and its copies into the map */
441
	entry = ir_nodehashmap_get(op_copy_assoc_t, op_set, other_different);
442
	if (! entry) {
443
		entry      = OALLOC(&env->obst, op_copy_assoc_t);
444
		entry->cls = cls;
445
		ir_nodeset_init(&entry->copies);
446

447
		ir_nodehashmap_insert(op_set, other_different, entry);
448
449
450
	}

	/* insert copy */
451
	ir_nodeset_insert(&entry->copies, cpy);
452
453

	/* insert keep in case of CopyKeep */
454
	if (be_is_CopyKeep(keep))
455
		ir_nodeset_insert(&entry->copies, keep);
Christian Würdig's avatar
Christian Würdig committed
456
457
}

458
/**
459
460
 * Checks if node has a must_be_different constraint in output and adds a Keep
 * then to assure the constraint.
461
462
463
464
 *
 * @param irn          the node to check
 * @param skipped_irn  if irn is a Proj node, its predecessor, else irn
 * @param env          the constraint environment
465
 */
466
467
static void assure_different_constraints(ir_node *irn, ir_node *skipped_irn, constraint_env_t *env)
{
468
	const arch_register_req_t *req = arch_get_irn_register_req(irn);
469

470
	if (arch_register_req_is(req, must_be_different)) {
471
472
473
		const unsigned other = req->other_different;
		int i;

Michael Beck's avatar
Michael Beck committed
474
475
476
477
478
479
480
481
		if (arch_register_req_is(req, should_be_same)) {
			const unsigned same = req->other_same;

			if (is_po2(other) && is_po2(same)) {
				int idx_other = ntz(other);
				int idx_same  = ntz(same);

				/*
482
				 * We can safely ignore a should_be_same x must_be_different y
Michael Beck's avatar
Michael Beck committed
483
				 * IFF both inputs are equal!
Michael Beck's avatar
Michael Beck committed
484
				 */
Christoph Mallon's avatar
Christoph Mallon committed
485
				if (get_irn_n(skipped_irn, idx_other) == get_irn_n(skipped_irn, idx_same)) {
Michael Beck's avatar
Michael Beck committed
486
487
488
489
					return;
				}
			}
		}
490
491
		for (i = 0; 1U << i <= other; ++i) {
			if (other & (1U << i)) {
Christoph Mallon's avatar
Christoph Mallon committed
492
				ir_node *different_from = get_irn_n(skipped_irn, i);
493
494
				gen_assure_different_pattern(irn, different_from, env);
			}
Christian Würdig's avatar
Christian Würdig committed
495
		}
496
497
498
499
	}
}

/**
500
 * Calls the functions to assure register constraints.
501
 *
502
 * @param block    The block to be checked
503
504
 * @param walk_env The walker environment
 */
505
506
static void assure_constraints_walker(ir_node *block, void *walk_env)
{
507
	constraint_env_t *env = (constraint_env_t*)walk_env;
508
509

	sched_foreach_reverse(block, irn) {
510
		be_foreach_value(irn, value,
Matthias Braun's avatar
Matthias Braun committed
511
			if (mode_is_data(get_irn_mode(value)))
512
513
				assure_different_constraints(value, irn, env);
		);
514
	}
515
516
}

Christian Würdig's avatar
Christian Würdig committed
517
518
519
520
/**
 * Melt all copykeeps pointing to the same node
 * (or Projs of the same node), copying the same operand.
 */
521
522
static void melt_copykeeps(constraint_env_t *cenv)
{
523
524
	ir_nodehashmap_iterator_t map_iter;
	ir_nodehashmap_entry_t    map_entry;
Christian Würdig's avatar
Christian Würdig committed
525
526

	/* for all */
527
	foreach_ir_nodehashmap(&cenv->op_set, map_entry, map_iter) {
528
		op_copy_assoc_t *entry = (op_copy_assoc_t*)map_entry.data;
Christian Würdig's avatar
Christian Würdig committed
529
530
531
		int     idx, num_ck;
		struct obstack obst;
		ir_node **ck_arr, **melt_arr;
532

Christian Würdig's avatar
Christian Würdig committed
533
534
535
536
		obstack_init(&obst);

		/* collect all copykeeps */
		num_ck = idx = 0;
537
		foreach_ir_nodeset(&entry->copies, cp, iter) {
Christian Würdig's avatar
Christian Würdig committed
538
539
540
541
			if (be_is_CopyKeep(cp)) {
				obstack_grow(&obst, &cp, sizeof(cp));
				++num_ck;
#ifdef KEEP_ALIVE_COPYKEEP_HACK
542
			} else {
Christian Würdig's avatar
Christian Würdig committed
543
544
				set_irn_mode(cp, mode_ANY);
				keep_alive(cp);
545
#endif
Christian Würdig's avatar
Christian Würdig committed
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
			}
		}

		/* compare each copykeep with all other copykeeps */
		ck_arr = (ir_node **)obstack_finish(&obst);
		for (idx = 0; idx < num_ck; ++idx) {
			ir_node *ref, *ref_mode_T;

			if (ck_arr[idx]) {
				int j, n_melt;
				ir_node *sched_pt = NULL;

				n_melt     = 1;
				ref        = ck_arr[idx];
				ref_mode_T = skip_Proj(get_irn_n(ref, 1));
				obstack_grow(&obst, &ref, sizeof(ref));

Matthias Braun's avatar
Matthias Braun committed
563
				DB((dbg_constr, LEVEL_1, "Trying to melt %+F:\n", ref));
Christian Würdig's avatar
Christian Würdig committed
564
565
566
567
568
569
570

				/* check for copykeeps pointing to the same mode_T node as the reference copykeep */
				for (j = 0; j < num_ck; ++j) {
					ir_node *cur_ck = ck_arr[j];

					if (j != idx && cur_ck && skip_Proj(get_irn_n(cur_ck, 1)) == ref_mode_T) {
						obstack_grow(&obst, &cur_ck, sizeof(cur_ck));
571
						ir_nodeset_remove(&entry->copies, cur_ck);
Matthias Braun's avatar
Matthias Braun committed
572
						DB((dbg_constr, LEVEL_1, "\t%+F\n", cur_ck));
Christian Würdig's avatar
Christian Würdig committed
573
574
575
576
577
578
579
580
581
						ck_arr[j] = NULL;
						++n_melt;
						sched_remove(cur_ck);
					}
				}
				ck_arr[idx] = NULL;

				/* check, if we found some candidates for melting */
				if (n_melt == 1) {
Matthias Braun's avatar
Matthias Braun committed
582
					DB((dbg_constr, LEVEL_1, "\tno candidate found\n"));
Christian Würdig's avatar
Christian Würdig committed
583
584
585
					continue;
				}

586
				ir_nodeset_remove(&entry->copies, ref);
Christian Würdig's avatar
Christian Würdig committed
587
588
589
590
				sched_remove(ref);

				melt_arr = (ir_node **)obstack_finish(&obst);
				/* melt all found copykeeps */
591
				ir_node **new_ck_in = ALLOCAN(ir_node*,n_melt);
Christian Würdig's avatar
Christian Würdig committed
592
593
594
595
596
597
				for (j = 0; j < n_melt; ++j) {
					new_ck_in[j] = get_irn_n(melt_arr[j], 1);

					/* now, we can kill the melted keep, except the */
					/* ref one, we still need some information      */
					if (melt_arr[j] != ref)
598
						kill_node(melt_arr[j]);
Christian Würdig's avatar
Christian Würdig committed
599
600
				}

601
				ir_node *const new_ck = be_new_CopyKeep(get_nodes_block(ref), be_get_CopyKeep_op(ref), n_melt, new_ck_in);
Christian Würdig's avatar
Christian Würdig committed
602
603
604
605
#ifdef KEEP_ALIVE_COPYKEEP_HACK
				keep_alive(new_ck);
#endif /* KEEP_ALIVE_COPYKEEP_HACK */

606
				/* set register class for all kept inputs */
Christian Würdig's avatar
Christian Würdig committed
607
				for (j = 1; j <= n_melt; ++j)
608
					be_node_set_reg_class_in(new_ck, j, entry->cls);
Christian Würdig's avatar
Christian Würdig committed
609

610
				ir_nodeset_insert(&entry->copies, new_ck);
Christian Würdig's avatar
Christian Würdig committed
611
612

				/* find scheduling point */
613
				sched_pt = ref_mode_T;
614
				do {
615
					/* just walk along the schedule until a non-Keep/CopyKeep node is found */
616
					sched_pt = sched_next(sched_pt);
617
				} while (be_is_Keep(sched_pt) || be_is_CopyKeep(sched_pt));
Christian Würdig's avatar
Christian Würdig committed
618
619

				sched_add_before(sched_pt, new_ck);
Matthias Braun's avatar
Matthias Braun committed
620
				DB((dbg_constr, LEVEL_1, "created %+F, scheduled before %+F\n", new_ck, sched_pt));
Christian Würdig's avatar
Christian Würdig committed
621
622

				/* finally: kill the reference copykeep */
623
				kill_node(ref);
Christian Würdig's avatar
Christian Würdig committed
624
625
626
627
628
629
			}
		}

		obstack_free(&obst, NULL);
	}
}
630

631
void assure_constraints(ir_graph *irg)
632
{
633
634
635
	constraint_env_t          cenv;
	ir_nodehashmap_iterator_t map_iter;
	ir_nodehashmap_entry_t    map_entry;
636

637
638
	FIRM_DBG_REGISTER(dbg_constr, "firm.be.lower.constr");

639
	cenv.irg = irg;
640
	ir_nodehashmap_init(&cenv.op_set);
641
642
	obstack_init(&cenv.obst);

643
	irg_block_walk_graph(irg, NULL, assure_constraints_walker, &cenv);
644

Christian Würdig's avatar
Christian Würdig committed
645
646
647
648
649
	/* melt copykeeps, pointing to projs of */
	/* the same mode_T node and keeping the */
	/* same operand                         */
	melt_copykeeps(&cenv);

650
	/* for all */
651
	foreach_ir_nodehashmap(&cenv.op_set, map_entry, map_iter) {
652
		op_copy_assoc_t          *entry = (op_copy_assoc_t*)map_entry.data;
653
		size_t                    n     = ir_nodeset_size(&entry->copies);
654
		ir_node                 **nodes = ALLOCAN(ir_node*, n);
655
		be_ssa_construction_env_t senv;
656
657

		/* put the node in an array */
658
		DBG((dbg_constr, LEVEL_1, "introduce copies for %+F ", map_entry.node));
659
660

		/* collect all copies */
661
662
		n = 0;
		foreach_ir_nodeset(&entry->copies, cp, iter) {
663
			nodes[n++] = cp;
664
			DB((dbg_constr, LEVEL_1, ", %+F ", cp));
665
666
		}

667
		DB((dbg_constr, LEVEL_1, "\n"));
668

669
		/* introduce the copies for the operand and its copies */
670
		be_ssa_construction_init(&senv, irg);
671
		be_ssa_construction_add_copy(&senv, map_entry.node);
672
		be_ssa_construction_add_copies(&senv, nodes, n);
673
		be_ssa_construction_fix_users(&senv, map_entry.node);
674
		be_ssa_construction_destroy(&senv);
675
676
677

		/* Could be that not all CopyKeeps are really needed, */
		/* so we transform unnecessary ones into Keeps.       */
678
		foreach_ir_nodeset(&entry->copies, cp, iter) {
679
			if (be_is_CopyKeep(cp) && get_irn_n_edges(cp) < 1) {
680
681
				int      n   = get_irn_arity(cp);
				ir_node *keep;
682

683
				keep = be_new_Keep(get_nodes_block(cp), n, get_irn_in(cp) + 1);
684
				sched_replace(cp, keep);
685
686

				/* Set all ins (including the block) of the CopyKeep BAD to keep the verifier happy. */
687
				kill_node(cp);
688
689
690
			}
		}

691
		ir_nodeset_destroy(&entry->copies);
692
693
	}

694
	ir_nodehashmap_destroy(&cenv.op_set);
695
	obstack_free(&cenv.obst, NULL);
696
	be_invalidate_live_sets(irg);
697
698
}

699
700
701
702
703
704
/**
 * Push nodes that do not need to be permed through the Perm.
 * This is commonly a reload cascade at block ends.
 * @note This routine needs interference.
 * @note Probably, we can implement it a little more efficient.
 *       Especially searching the frontier lazily might be better.
Michael Beck's avatar
Michael Beck committed
705
706
707
708
 *
 * @param perm The perm
 * @param env  The lowerer environment
 *
709
710
711
 * @return     1, if there is something left to perm over.
 *             0, if removed the complete perm.
 */
712
static int push_through_perm(ir_node *perm)
713
714
715
{
	ir_graph *irg     = get_irn_irg(perm);
	ir_node *bl       = get_nodes_block(perm);
716
717
718
719
720
721
	int  arity        = get_irn_arity(perm);
	int *map;
	int *proj_map;
	bitset_t *moved   = bitset_alloca(arity);
	int n_moved;
	int new_size;
Michael Beck's avatar
Michael Beck committed
722
	ir_node *frontier = bl;
723
	int i, n;
724

Michael Beck's avatar
Michael Beck committed
725
	/* get some Proj and find out the register class of that Proj. */
726
	ir_node                     *one_proj = get_edge_src_irn(get_irn_out_edge_first_kind(perm, EDGE_KIND_NORMAL));
727
	const arch_register_class_t *cls      = arch_get_irn_reg_class(one_proj);
Michael Beck's avatar
Michael Beck committed
728
	assert(is_Proj(one_proj));
729

730
	(void)irg;
Matthias Braun's avatar
Matthias Braun committed
731
	DB((dbg_permmove, LEVEL_1, "perm move %+F irg %+F\n", perm, irg));
732

Michael Beck's avatar
Michael Beck committed
733
	/* Find the point in the schedule after which the
734
	 * potentially movable nodes must be defined.
Michael Beck's avatar
Michael Beck committed
735
736
737
738
739
740
	 * A Perm will only be pushed up to first instruction
	 * which lets an operand of itself die.
	 * If we would allow to move the Perm above this instruction,
	 * the former dead operand would be live now at the point of
	 * the Perm, increasing the register pressure by one.
	 */
741
	sched_foreach_reverse_before(perm, irn) {
Matthias Braun's avatar
Matthias Braun committed
742
743
744
745
		if (is_Phi(irn)) {
			frontier = irn;
			goto found_front;
		}
746
		be_foreach_use(irn, cls, in_req_, op, op_req_,
747
			if (!be_value_live_after(op, perm)) {
748
				frontier = irn;
749
750
				goto found_front;
			}
751
		);
752
753
754
	}
found_front:

Matthias Braun's avatar
Matthias Braun committed
755
	DB((dbg_permmove, LEVEL_2, "\tfrontier: %+F\n", frontier));
756

757
	n_moved = 0;
758
759
	for (;;) {
		ir_node *const node = sched_prev(perm);
760
		if (node == frontier)
761
762
			break;

763
764
765
766
		be_foreach_use(node, cls, in_req, value, value_req,
			goto done;
		);

767
768
769
770
771
		be_foreach_definition(node, cls, value, req,
			if (req->type != arch_register_req_type_normal &&
			    req->type != arch_register_req_type_should_be_same)
				goto done;
		);
772

773
		DBG((dbg_permmove, LEVEL_2, "\tmoving %+F after %+F\n", node, perm));
774

775
776
777
		/* move the movable node in front of the Perm */
		sched_remove(node);
		sched_add_after(perm, node);
778

779
780
781
782
783
784
785
786
787
788
789
790
791
792
		/* Rewire Perm results to pushed through instruction. */
		foreach_out_edge_safe(perm, edge) {
			ir_node *const proj = get_edge_src_irn(edge);
			int      const pn   = get_Proj_proj(proj);
			ir_node *const in   = get_irn_n(perm, pn);
			if (in == node || (is_Proj(in) && get_Proj_pred(in) == node)) {
				/* Give it the proj's register. */
				arch_set_irn_register(in, arch_get_irn_register_out(perm, pn));
				/* Reroute all users of the proj to the moved node. */
				exchange(proj, in);
				bitset_set(moved, pn);
				++n_moved;
			}
		}
793
	}
794
done:
795

796
	/* well, we could not push anything through the perm */
797
	if (n_moved == 0)
798
		return 1;
799

800
	new_size = arity - n_moved;
801
	if (new_size == 0) {
802
803
		sched_remove(perm);
		kill_node(perm);
804
		return 0;
805
806
	}

807
808
	map      = ALLOCAN(int, new_size);
	proj_map = ALLOCAN(int, arity);
809
810
	memset(proj_map, -1, sizeof(proj_map[0]));
	n   = 0;
Michael Beck's avatar
Michael Beck committed
811
812
	for (i = 0; i < arity; ++i) {
		if (bitset_is_set(moved, i))
813
814
815
816
817
818
819
820
821
822
823
824
825
			continue;
		map[n]      = i;
		proj_map[i] = n;
		n++;
	}
	assert(n == new_size);
	foreach_out_edge(perm, edge) {
		ir_node *proj = get_edge_src_irn(edge);
		int      pn   = get_Proj_proj(proj);
		pn = proj_map[pn];
		assert(pn >= 0);
		set_Proj_proj(proj, pn);
	}
826

827
828
	be_Perm_reduce(perm, new_size, map);
	return 1;
829
}
830

Christian Würdig's avatar
Christian Würdig committed
831
832
833
834
835
836
/**
 * Calls the corresponding lowering function for the node.
 *
 * @param irn      The node to be checked for lowering
 * @param walk_env The walker environment
 */
837
838
static void lower_nodes_after_ra_walker(ir_node *irn, void *walk_env)
{
Michael Beck's avatar
Michael Beck committed
839
	if (!be_is_Perm(irn))
840
841
		return;

842
843
844
845
846
	const bool perm_stayed = push_through_perm(irn);
	if (perm_stayed) {
		lower_env_t *env = (lower_env_t*)walk_env;
		lower_perm_node(irn, env);
	}
Christian Würdig's avatar
Christian Würdig committed
847
848
}

849
void lower_nodes_after_ra(ir_graph *irg, bool use_copies)
850
{
851
	FIRM_DBG_REGISTER(dbg, "firm.be.lower");
Michael Beck's avatar
Michael Beck committed
852
	FIRM_DBG_REGISTER(dbg_permmove, "firm.be.lower.permmove");
853

854
	/* we will need interference */
855
	be_assure_live_chk(irg);
856

857
858
859
860
861
862
863
864
865
866
	lower_env_t env;
	env.use_copies = use_copies;

	if (use_copies) {
		ir_nodehashmap_init(&env.live_regs);
		obstack_init(&env.obst);
		be_assure_live_sets(irg);
		irg_walk_graph(irg, NULL, live_nodes_registers_walker, &env);
	}

867
	irg_walk_graph(irg, NULL, lower_nodes_after_ra_walker, &env);
868
869
870
871
872
873

	if (use_copies) {
		ir_nodehashmap_destroy(&env.live_regs);
		obstack_free(&env.obst, NULL);
		be_invalidate_live_sets(irg);
	}
874
}