benewalloc.c 39.6 KB
Newer Older
Matthias Braun's avatar
Matthias Braun committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
 * @brief       New approach to allocation and copy coalescing
 * @author      Matthias Braun
 * @date        14.2.2009
 * @version     $Id$
 *
 * ... WE NEED A NAME FOR THIS ...
 *
 * Only a proof of concept at this moment...
 *
 * The idea is to allocate registers in 2 passes:
Michael Beck's avatar
Michael Beck committed
32
 * 1. A first pass to determine "preferred" registers for live-ranges. This
Matthias Braun's avatar
Matthias Braun committed
33
 *    calculates for each register and each live-range a value indicating
Michael Beck's avatar
Michael Beck committed
34
 *    the usefulness. (You can roughly think of the value as the negative
Matthias Braun's avatar
Matthias Braun committed
35
36
 *    costs needed for copies when the value is in the specific registers...)
 *
37
38
39
 * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
 *    registers with high preferences. When register constraints are not met,
 *    add copies and split live-ranges.
Matthias Braun's avatar
Matthias Braun committed
40
41
 *
 * TODO:
42
43
44
 *  - make use of free registers in the permutate_values code
 *  - We have to pessimistically construct Phi_0s when not all predecessors
 *    of a block are known.
Matthias Braun's avatar
Matthias Braun committed
45
46
47
48
49
 *  - Phi color assignment should give bonus points towards registers already
 *    assigned at predecessors.
 *  - think about a smarter sequence of visiting the blocks. Sorted by
 *    execfreq might be good, or looptree from inner to outermost loops going
 *    over blocks in a reverse postorder
50
 *  - propagate preferences through Phis
Matthias Braun's avatar
Matthias Braun committed
51
52
53
54
 */
#include "config.h"

#include <float.h>
55
#include <stdbool.h>
Matthias Braun's avatar
Matthias Braun committed
56

57
58
#include "error.h"
#include "execfreq.h"
Matthias Braun's avatar
Matthias Braun committed
59
#include "ircons.h"
60
#include "irdom.h"
61
62
63
64
65
#include "iredges_t.h"
#include "irgraph_t.h"
#include "irgwalk.h"
#include "irnode_t.h"
#include "obst.h"
66
#include "raw_bitset.h"
Matthias Braun's avatar
Matthias Braun committed
67

68
69
#include "beabi.h"
#include "bechordal_t.h"
Matthias Braun's avatar
Matthias Braun committed
70
#include "be.h"
71
#include "beirg.h"
Matthias Braun's avatar
Matthias Braun committed
72
73
74
#include "belive_t.h"
#include "bemodule.h"
#include "benode_t.h"
75
76
#include "bera.h"
#include "besched.h"
77
#include "bespill.h"
78
#include "bespillutil.h"
Matthias Braun's avatar
Matthias Braun committed
79
80
81
82
#include "beverify.h"

#include "hungarian.h"

83
84
85
86
87
#define USE_FACTOR         1.0f
#define DEF_FACTOR         1.0f
#define NEIGHBOR_FACTOR    0.2f
#define AFF_SHOULD_BE_SAME 1.0f
#define AFF_PHI            1.0f
Matthias Braun's avatar
Matthias Braun committed
88
89
90
91
92
93
94

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

static struct obstack               obst;
static be_irg_t                    *birg;
static ir_graph                    *irg;
static const arch_register_class_t *cls;
95
static const arch_register_req_t   *default_cls_req;
Matthias Braun's avatar
Matthias Braun committed
96
97
98
static be_lv_t                     *lv;
static const ir_exec_freq          *execfreqs;
static unsigned                     n_regs;
99
static unsigned                    *normal_regs;
Matthias Braun's avatar
Matthias Braun committed
100

101
/** info about the current assignment for a register */
Matthias Braun's avatar
Matthias Braun committed
102
103
104
struct assignment_t {
	ir_node *value;            /**< currently assigned value */
};
105
typedef struct assignment_t assignment_t;
Matthias Braun's avatar
Matthias Braun committed
106

107
/** currently active assignments (while processing a basic block) */
Matthias Braun's avatar
Matthias Braun committed
108
109
static assignment_t *assignments;

110
111
112
113
/**
 * allocation information: last_uses, register preferences
 * the information is per firm-node.
 */
Matthias Braun's avatar
Matthias Braun committed
114
struct allocation_info_t {
115
116
117
118
	unsigned  last_uses;      /**< bitset indicating last uses (input pos) */
	ir_node  *current_value;  /**< copy of the value that should be used */
	ir_node  *original_value; /**< for copies point to original value */
	float     prefs[0];       /**< register preferences */
Matthias Braun's avatar
Matthias Braun committed
119
};
120
typedef struct allocation_info_t allocation_info_t;
Matthias Braun's avatar
Matthias Braun committed
121

122
/** helper datastructure used when sorting register preferences */
Matthias Braun's avatar
Matthias Braun committed
123
124
125
126
struct reg_pref_t {
	unsigned num;
	float    pref;
};
127
128
129
130
typedef struct reg_pref_t reg_pref_t;

/** per basic-block information */
struct block_info_t {
131
	bool         processed;       /**< indicate wether block is processed */
132
133
134
	assignment_t assignments[0];  /**< register assignments at end of block */
};
typedef struct block_info_t block_info_t;
Matthias Braun's avatar
Matthias Braun committed
135

136
137
138
139
/**
 * Get the allocation info for a node.
 * The info is allocated on the first visit of a node.
 */
Matthias Braun's avatar
Matthias Braun committed
140
141
142
static allocation_info_t *get_allocation_info(ir_node *node)
{
	allocation_info_t *info;
Christoph Mallon's avatar
Christoph Mallon committed
143
	if (!irn_visited_else_mark(node)) {
144
		size_t size = sizeof(info[0]) + n_regs * sizeof(info->prefs[0]);
Matthias Braun's avatar
Matthias Braun committed
145
146
		info = obstack_alloc(&obst, size);
		memset(info, 0, size);
147
148
		info->current_value  = node;
		info->original_value = node;
Matthias Braun's avatar
Matthias Braun committed
149
150
151
152
153
154
155
156
		set_irn_link(node, info);
	} else {
		info = get_irn_link(node);
	}

	return info;
}

157
158
159
160
161
162
163
164
/**
 * Get allocation information for a basic block
 */
static block_info_t *get_block_info(ir_node *block)
{
	block_info_t *info;

	assert(is_Block(block));
Christoph Mallon's avatar
Christoph Mallon committed
165
	if (!irn_visited_else_mark(block)) {
166
167
168
169
170
171
172
173
174
175
176
		size_t size = sizeof(info[0]) + n_regs * sizeof(info->assignments[0]);
		info = obstack_alloc(&obst, size);
		memset(info, 0, size);
		set_irn_link(block, info);
	} else {
		info = get_irn_link(block);
	}

	return info;
}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/**
 * Get default register requirement for the current register class
 */
static const arch_register_req_t *get_default_req_current_cls(void)
{
	if (default_cls_req == NULL) {
		struct obstack      *obst = get_irg_obstack(irg);
		arch_register_req_t *req  = obstack_alloc(obst, sizeof(*req));
		memset(req, 0, sizeof(*req));

		req->type = arch_register_req_type_normal;
		req->cls  = cls;

		default_cls_req = req;
	}
	return default_cls_req;
}

195
196
197
198
199
/**
 * Link the allocation info of a node to a copy.
 * Afterwards, both nodes uses the same allocation info.
 * Copy must not have an allocation info assigned yet.
 *
200
 * @param copy   the node that gets the allocation info assigned
201
202
 * @param value  the original node
 */
203
static void mark_as_copy_of(ir_node *copy, ir_node *value)
Matthias Braun's avatar
Matthias Braun committed
204
{
205
	ir_node           *original;
206
207
208
	allocation_info_t *info      = get_allocation_info(value);
	allocation_info_t *copy_info = get_allocation_info(copy);

209
210
211
212
213
214
215
	/* find original value */
	original = info->original_value;
	if (original != value) {
		info = get_allocation_info(original);
	}

	assert(info->original_value == original);
216
217
218
219
	info->current_value = copy;

	/* the copy should not be linked to something else yet */
	assert(copy_info->original_value == copy);
220
221
222
	/* copy over allocation preferences */
	memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
	copy_info->original_value = original;
Matthias Braun's avatar
Matthias Braun committed
223
224
}

225
226
227
/**
 * Calculate the penalties for every register on a node and its live neighbors.
 *
228
229
230
231
 * @param live_nodes  the set of live nodes at the current position, may be NULL
 * @param penalty     the penalty to subtract from
 * @param limited     a raw bitset containing the limited set for the node
 * @param node        the node
232
 */
Matthias Braun's avatar
Matthias Braun committed
233
234
static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
                                      float penalty, const unsigned* limited,
235
                                      ir_node *node)
Matthias Braun's avatar
Matthias Braun committed
236
{
Michael Beck's avatar
Michael Beck committed
237
238
	ir_nodeset_iterator_t iter;
	unsigned              r;
Matthias Braun's avatar
Matthias Braun committed
239
	allocation_info_t     *info = get_allocation_info(node);
Michael Beck's avatar
Michael Beck committed
240
	ir_node               *neighbor;
Matthias Braun's avatar
Matthias Braun committed
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

	/* give penalty for all forbidden regs */
	for (r = 0; r < n_regs; ++r) {
		if (rbitset_is_set(limited, r))
			continue;

		info->prefs[r] -= penalty;
	}

	/* all other live values should get a penalty for allowed regs */
	if (live_nodes == NULL)
		return;

	/* TODO: reduce penalty if there are multiple allowed registers... */
	penalty *= NEIGHBOR_FACTOR;
	foreach_ir_nodeset(live_nodes, neighbor, iter) {
		allocation_info_t *neighbor_info;

		/* TODO: if op is used on multiple inputs we might not do a
		 * continue here */
		if (neighbor == node)
			continue;

Christoph Mallon's avatar
Christoph Mallon committed
264
		neighbor_info = get_allocation_info(neighbor);
Matthias Braun's avatar
Matthias Braun committed
265
266
267
268
269
270
271
272
273
		for (r = 0; r < n_regs; ++r) {
			if (!rbitset_is_set(limited, r))
				continue;

			neighbor_info->prefs[r] -= penalty;
		}
	}
}

274
275
276
277
278
279
280
281
282
/**
 * Calculate the preferences of a definition for the current register class.
 * If the definition uses a limited set of registers, reduce the preferences
 * for the limited register on the node and its neighbors.
 *
 * @param live_nodes  the set of live nodes at the current node
 * @param weight      the weight
 * @param node        the current node
 */
Matthias Braun's avatar
Matthias Braun committed
283
284
285
static void check_defs(const ir_nodeset_t *live_nodes, float weight,
                       ir_node *node)
{
Michael Beck's avatar
Michael Beck committed
286
287
	const arch_register_req_t *req;

Matthias Braun's avatar
Matthias Braun committed
288
289
290
291
292
293
294
295
296
297
298
299
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;
		foreach_out_edge(node, edge) {
			ir_node *proj = get_edge_src_irn(edge);
			check_defs(live_nodes, weight, proj);
		}
		return;
	}

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

Michael Beck's avatar
Michael Beck committed
300
	req = arch_get_register_req_out(node);
Matthias Braun's avatar
Matthias Braun committed
301
302
303
304
305
306
307
308
309
310
311
312
313
314
	if (req->type & arch_register_req_type_limited) {
		const unsigned *limited = req->limited;
		float           penalty = weight * DEF_FACTOR;
		give_penalties_for_limits(live_nodes, penalty, limited, node);
	}

	if (req->type & arch_register_req_type_should_be_same) {
		ir_node           *insn  = skip_Proj(node);
		allocation_info_t *info  = get_allocation_info(node);
		int                arity = get_irn_arity(insn);
		int                i;

		float factor = 1.0f / rbitset_popcnt(&req->other_same, arity);
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
315
			ir_node           *op;
Christoph Mallon's avatar
Christoph Mallon committed
316
			unsigned           r;
Michael Beck's avatar
Michael Beck committed
317
318
			allocation_info_t *op_info;

Matthias Braun's avatar
Matthias Braun committed
319
320
321
			if (!rbitset_is_set(&req->other_same, i))
				continue;

Michael Beck's avatar
Michael Beck committed
322
323
			op      = get_irn_n(insn, i);
			op_info = get_allocation_info(op);
Matthias Braun's avatar
Matthias Braun committed
324
325
326
327
328
329
330
			for (r = 0; r < n_regs; ++r) {
				op_info->prefs[r] += info->prefs[r] * factor;
			}
		}
	}
}

331
332
333
334
/**
 * Walker: Runs an a block calculates the preferences for any
 * node and every register from the considered register class.
 */
Matthias Braun's avatar
Matthias Braun committed
335
336
337
338
339
340
341
342
343
344
345
static void analyze_block(ir_node *block, void *data)
{
	float         weight = get_block_execfreq(execfreqs, block);
	ir_nodeset_t  live_nodes;
	ir_node      *node;
	(void) data;

	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	sched_foreach_reverse(block, node) {
Michael Beck's avatar
Michael Beck committed
346
		allocation_info_t *info;
347
348
		int                i;
		int                arity;
Matthias Braun's avatar
Matthias Braun committed
349

350
		if (is_Phi(node))
Matthias Braun's avatar
Matthias Braun committed
351
352
353
354
355
356
357
			break;

		/* TODO give/take penalties for should_be_same/different) */
		check_defs(&live_nodes, weight, node);

		/* mark last uses */
		arity = get_irn_arity(node);
358
359
360
361
362
363
364
365

		/* the allocation info node currently only uses 1 unsigned value
		   to mark last used inputs. So we will fail for a node with more than
		   32 inputs. */
		if (arity >= (int) sizeof(unsigned) * 8) {
			panic("Node with more than %d inputs not supported yet",
					(int) sizeof(unsigned) * 8);
		}
Matthias Braun's avatar
Matthias Braun committed
366

Michael Beck's avatar
Michael Beck committed
367
		info = get_allocation_info(node);
Matthias Braun's avatar
Matthias Braun committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
		for (i = 0; i < arity; ++i) {
			ir_node *op = get_irn_n(node, i);
			if (!arch_irn_consider_in_reg_alloc(cls, op))
				continue;

			/* last usage of a value? */
			if (!ir_nodeset_contains(&live_nodes, op)) {
				rbitset_set(&info->last_uses, i);
			}
		}

		be_liveness_transfer(cls, node, &live_nodes);

		/* update weights based on usage constraints */
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
383
384
385
386
			const arch_register_req_t *req;
			const unsigned            *limited;
			ir_node                   *op = get_irn_n(node, i);

Matthias Braun's avatar
Matthias Braun committed
387
388
389
			if (!arch_irn_consider_in_reg_alloc(cls, op))
				continue;

Michael Beck's avatar
Michael Beck committed
390
			req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
391
			if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
392
393
394
395
				continue;

			/* TODO: give penalties to neighbors for precolored nodes! */

Michael Beck's avatar
Michael Beck committed
396
			limited = req->limited;
Matthias Braun's avatar
Matthias Braun committed
397
398
399
400
401
402
403
404
			give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited,
			                          op);
		}
	}

	ir_nodeset_destroy(&live_nodes);
}

405
406
407
408
409
410
/**
 * Assign register reg to the given node.
 *
 * @param node  the node
 * @param reg   the register
 */
Matthias Braun's avatar
Matthias Braun committed
411
412
static void use_reg(ir_node *node, const arch_register_t *reg)
{
413
414
	unsigned r = arch_register_get_index(reg);
	assignments[r].value = node;
Matthias Braun's avatar
Matthias Braun committed
415
416
417
	arch_set_irn_register(node, reg);
}

418
419
420
/**
 * Compare two register preferences in decreasing order.
 */
Matthias Braun's avatar
Matthias Braun committed
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
static int compare_reg_pref(const void *e1, const void *e2)
{
	const reg_pref_t *rp1 = (const reg_pref_t*) e1;
	const reg_pref_t *rp2 = (const reg_pref_t*) e2;
	if (rp1->pref < rp2->pref)
		return 1;
	if (rp1->pref > rp2->pref)
		return -1;
	return 0;
}

static void fill_sort_candidates(reg_pref_t *regprefs,
                                 const allocation_info_t *info)
{
	unsigned r;

	for (r = 0; r < n_regs; ++r) {
		float pref = info->prefs[r];
		regprefs[r].num  = r;
		regprefs[r].pref = pref;
	}
	/* TODO: use a stable sort here to avoid unnecessary register jumping */
	qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref);
}

446
447
448
/**
 * Determine and assign a register for node @p node
 */
Matthias Braun's avatar
Matthias Braun committed
449
450
static void assign_reg(const ir_node *block, ir_node *node)
{
Michael Beck's avatar
Michael Beck committed
451
452
453
454
	const arch_register_t     *reg;
	allocation_info_t         *info;
	const arch_register_req_t *req;
	reg_pref_t                *reg_prefs;
Michael Beck's avatar
Michael Beck committed
455
	ir_node                   *in_node;
456
457
	unsigned                   i;
	const unsigned            *allowed_regs;
Michael Beck's avatar
Michael Beck committed
458

Matthias Braun's avatar
Matthias Braun committed
459
460
461
	assert(arch_irn_consider_in_reg_alloc(cls, node));

	/* preassigned register? */
Michael Beck's avatar
Michael Beck committed
462
	reg = arch_get_irn_register(node);
Matthias Braun's avatar
Matthias Braun committed
463
464
465
466
467
468
469
	if (reg != NULL) {
		DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
		return;
	}

	/* give should_be_same boni */
Michael Beck's avatar
Michael Beck committed
470
471
	info = get_allocation_info(node);
	req  = arch_get_register_req_out(node);
472

Michael Beck's avatar
Michael Beck committed
473
	in_node = skip_Proj(node);
Matthias Braun's avatar
Matthias Braun committed
474
475
	if (req->type & arch_register_req_type_should_be_same) {
		float weight = get_block_execfreq(execfreqs, block);
476
		int   arity  = get_irn_arity(in_node);
Matthias Braun's avatar
Matthias Braun committed
477
		int   i;
Michael Beck's avatar
Michael Beck committed
478

Matthias Braun's avatar
Matthias Braun committed
479
480
481
482
483
484
485
486
		assert(arity <= (int) sizeof(req->other_same) * 8);
		for (i = 0; i < arity; ++i) {
			ir_node               *in;
			const arch_register_t *reg;
			unsigned               r;
			if (!rbitset_is_set(&req->other_same, i))
				continue;

487
			in  = get_irn_n(in_node, i);
Matthias Braun's avatar
Matthias Braun committed
488
489
490
			reg = arch_get_irn_register(in);
			assert(reg != NULL);
			r = arch_register_get_index(reg);
491
			info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
Matthias Braun's avatar
Matthias Braun committed
492
493
494
495
		}
	}

	DB((dbg, LEVEL_2, "Candidates for %+F:", node));
Michael Beck's avatar
Michael Beck committed
496
	reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
Matthias Braun's avatar
Matthias Braun committed
497
498
499
500
501
502
503
504
	fill_sort_candidates(reg_prefs, info);
	for (i = 0; i < n_regs; ++i) {
		unsigned               num = reg_prefs[i].num;
		const arch_register_t *reg = arch_register_for_index(cls, num);
		DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
	}
	DB((dbg, LEVEL_2, "\n"));

505
506
507
508
509
	allowed_regs = normal_regs;
	if (req->type & arch_register_req_type_limited) {
		allowed_regs = req->limited;
	}

Matthias Braun's avatar
Matthias Braun committed
510
511
	for (i = 0; i < n_regs; ++i) {
		unsigned r = reg_prefs[i].num;
512
		/* already used?
513
514
		   TODO: It might be better to copy the value occupying the register
		   around here instead of trying the next one, find out when... */
Matthias Braun's avatar
Matthias Braun committed
515
516
		if (assignments[r].value != NULL)
			continue;
517
518
		if (!rbitset_is_set(allowed_regs, r))
			continue;
519
520
521
		reg = arch_register_for_index(cls, r);
		DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
Matthias Braun's avatar
Matthias Braun committed
522
523
524
525
526
527
		break;
	}
}

static void free_reg_of_value(ir_node *node)
{
528
529
530
	assignment_t          *assignment;
	const arch_register_t *reg;
	unsigned               r;
Michael Beck's avatar
Michael Beck committed
531

Matthias Braun's avatar
Matthias Braun committed
532
533
534
	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

535
536
537
	reg               = arch_get_irn_register(node);
	r                 = arch_register_get_index(reg);
	assignment        = &assignments[r];
538
539
540
	/* assignment->value may be NULL if a value is used at 2 inputs
	   so it gets freed twice. */
	assert(assignment->value == node || assignment->value == NULL);
541
	assignment->value = NULL;
Matthias Braun's avatar
Matthias Braun committed
542
543
}

544
545
546
547
/**
 * Add an permutation in front of a node and change the assignments
 * due to this permutation.
 *
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
 * To understand this imagine a permutation like this:
 *
 * 1 -> 2
 * 2 -> 3
 * 3 -> 1, 5
 * 4 -> 6
 * 5
 * 6
 * 7 -> 7
 *
 * First we count how many destinations a single value has. At the same time
 * we can be sure that each destination register has at most 1 source register
 * (it can have 0 which means we don't care what value is in it).
 * We ignore all fullfilled permuations (like 7->7)
 * In a first pass we create as much copy instructions as possible as they
 * are generally cheaper than exchanges. We do this by counting into how many
 * destinations a register has to be copied (in the example it's 2 for register
 * 3, or 1 for the registers 1,2,4 and 7).
 * We can then create a copy into every destination register when the usecount
 * of that register is 0 (= noone else needs the value in the register).
 *
 * After this step we should have cycles left. We implement a cyclic permutation
 * of n registers with n-1 transpositions.
 *
572
573
 * @param live_nodes   the set of live nodes, updated due to live range split
 * @param before       the node before we add the permutation
574
575
576
 * @param permutation  the permutation array indices are the destination
 *                     registers, the values in the array are the source
 *                     registers.
577
 */
Matthias Braun's avatar
Matthias Braun committed
578
579
580
static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before,
                             unsigned *permutation)
{
581
	unsigned  *n_used = ALLOCANZ(unsigned, n_regs);
582
	ir_node   *block;
583
	unsigned   r;
Matthias Braun's avatar
Matthias Braun committed
584

585
	/* determine how often each source register needs to be read */
Matthias Braun's avatar
Matthias Braun committed
586
	for (r = 0; r < n_regs; ++r) {
587
588
		unsigned  old_reg = permutation[r];
		ir_node  *value;
Matthias Braun's avatar
Matthias Braun committed
589

590
		value = assignments[old_reg].value;
Matthias Braun's avatar
Matthias Braun committed
591
		if (value == NULL) {
592
593
			/* nothing to do here, reg is not live. Mark it as fixpoint
			 * so we ignore it in the next steps */
Matthias Braun's avatar
Matthias Braun committed
594
595
596
597
			permutation[r] = r;
			continue;
		}

598
		++n_used[old_reg];
Matthias Braun's avatar
Matthias Braun committed
599
600
	}

Michael Beck's avatar
Michael Beck committed
601
	block = get_nodes_block(before);
Matthias Braun's avatar
Matthias Braun committed
602

603
604
605
	/* step1: create copies where immediately possible */
	for (r = 0; r < n_regs; /* empty */) {
		ir_node *copy;
606
		ir_node *src;
Michael Beck's avatar
Michael Beck committed
607
		const arch_register_t *reg;
608
		unsigned               old_r = permutation[r];
Michael Beck's avatar
Michael Beck committed
609

610
611
612
		/* - no need to do anything for fixed points.
		   - we can't copy if the value in the dest reg is still needed */
		if (old_r == r || n_used[r] > 0) {
613
			++r;
Matthias Braun's avatar
Matthias Braun committed
614
			continue;
615
		}
Matthias Braun's avatar
Matthias Braun committed
616

617
		/* create a copy */
618
		src  = assignments[old_r].value;
619
		copy = be_new_Copy(cls, block, src);
620
		sched_add_before(before, copy);
621
		reg = arch_register_for_index(cls, r);
622
623
624
		DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
		    copy, src, before, reg->name));
		mark_as_copy_of(copy, src);
625
		use_reg(copy, reg);
626
627
628
629

		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, copy);
		}
630

631
632
		/* old register has 1 user less, permutation is resolved */
		assert(arch_register_get_index(arch_get_irn_register(src)) == old_r);
633
634
		permutation[r] = r;

635
636
		assert(n_used[old_r] > 0);
		--n_used[old_r];
637
		if (n_used[old_r] == 0) {
638
639
640
			if (live_nodes != NULL) {
				ir_nodeset_remove(live_nodes, src);
			}
641
642
			free_reg_of_value(src);
		}
643

644
		/* advance or jump back (if this copy enabled another copy) */
645
		if (old_r < r && n_used[old_r] == 0) {
646
			r = old_r;
647
		} else {
648
			++r;
649
		}
650
	}
Matthias Braun's avatar
Matthias Braun committed
651

652
653
654
655
656
	/* at this point we only have "cycles" left which we have to resolve with
	 * perm instructions
	 * TODO: if we have free registers left, then we should really use copy
	 * instructions for any cycle longer than 2 registers...
	 * (this is probably architecture dependent, there might be archs where
657
	 *  copies are preferable even for 2-cycles) */
658

659
	/* create perms with the rest */
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
	for (r = 0; r < n_regs; /* empty */) {
		const arch_register_t *reg;
		unsigned  old_r = permutation[r];
		unsigned  r2;
		ir_node  *in[2];
		ir_node  *perm;
		ir_node  *proj0;
		ir_node  *proj1;

		if (old_r == r) {
			++r;
			continue;
		}

		/* we shouldn't have copies from 1 value to multiple destinations left*/
		assert(n_used[old_r] == 1);

		/* exchange old_r and r2; after that old_r is a fixed point */
		r2 = permutation[old_r];

680
681
		in[0] = assignments[r2].value;
		in[1] = assignments[old_r].value;
682
		perm = be_new_Perm(cls, block, 2, in);
683
684
685
		sched_add_before(before, perm);
		DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n",
		    perm, in[0], in[1], before));
686
687

		proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0);
688
		mark_as_copy_of(proj0, in[0]);
689
690
691
692
		reg = arch_register_for_index(cls, old_r);
		use_reg(proj0, reg);

		proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1);
693
694
695
		mark_as_copy_of(proj1, in[1]);
		reg = arch_register_for_index(cls, r2);
		use_reg(proj1, reg);
696
697
698
699
700

		/* 1 value is now in the correct register */
		permutation[old_r] = old_r;
		/* the source of r changed to r2 */
		permutation[r] = r2;
701
702

		/* if we have reached a fixpoint update data structures */
703
704
705
706
		if (live_nodes != NULL) {
			ir_nodeset_remove(live_nodes, in[0]);
			ir_nodeset_remove(live_nodes, in[1]);
			ir_nodeset_remove(live_nodes, proj0);
707
			ir_nodeset_insert(live_nodes, proj1);
708
		}
Matthias Braun's avatar
Matthias Braun committed
709
	}
710
711
712
713
714
715
716

#ifdef DEBUG_libfirm
	/* now we should only have fixpoints left */
	for (r = 0; r < n_regs; ++r) {
		assert(permutation[r] == r);
	}
#endif
Matthias Braun's avatar
Matthias Braun committed
717
718
}

719
720
721
722
723
724
/**
 * Free regs for values last used.
 *
 * @param live_nodes   set of live nodes, will be updated
 * @param node         the node to consider
 */
Matthias Braun's avatar
Matthias Braun committed
725
726
static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
{
727
728
729
	allocation_info_t *info      = get_allocation_info(node);
	const unsigned    *last_uses = &info->last_uses;
	int                arity     = get_irn_arity(node);
Matthias Braun's avatar
Matthias Braun committed
730
731
	int                i;
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
732
733
		ir_node *op;

734
		/* check if one operand is the last use */
735
		if (!rbitset_is_set(last_uses, i))
Matthias Braun's avatar
Matthias Braun committed
736
737
			continue;

Michael Beck's avatar
Michael Beck committed
738
		op = get_irn_n(node, i);
Matthias Braun's avatar
Matthias Braun committed
739
740
741
742
743
		free_reg_of_value(op);
		ir_nodeset_remove(live_nodes, op);
	}
}

744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
/**
 * change inputs of a node to the current value (copies/perms)
 */
static void rewire_inputs(ir_node *node)
{
	int i;
	int arity = get_irn_arity(node);

	for (i = 0; i < arity; ++i) {
		ir_node           *op = get_irn_n(node, i);
		allocation_info_t *info;

		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

		info = get_allocation_info(op);
760
		info = get_allocation_info(info->original_value);
761
762
763
764
765
766
		if (info->current_value != op) {
			set_irn_n(node, i, info->current_value);
		}
	}
}

767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
/**
 * Create a bitset of registers occupied with value living through an
 * instruction
 */
static void determine_live_through_regs(unsigned *bitset, ir_node *node)
{
	const allocation_info_t *info = get_allocation_info(node);
	unsigned r;
	int i;
	int arity;

	/* mark all used registers as potentially live-through */
	for (r = 0; r < n_regs; ++r) {
		const assignment_t *assignment = &assignments[r];
		if (assignment->value == NULL)
			continue;
783
784
		if (!rbitset_is_set(normal_regs, r))
			continue;
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803

		rbitset_set(bitset, r);
	}

	/* remove registers of value dying at the instruction */
	arity = get_irn_arity(node);
	for (i = 0; i < arity; ++i) {
		ir_node               *op;
		const arch_register_t *reg;

		if (!rbitset_is_set(&info->last_uses, i))
			continue;

		op  = get_irn_n(node, i);
		reg = arch_get_irn_register(op);
		rbitset_clear(bitset, arch_register_get_index(reg));
	}
}

804
805
806
/**
 * Enforce constraints at a node by live range splits.
 *
807
808
 * @param  live_nodes  the set of live nodes, might be changed
 * @param  node        the current node
809
 */
Matthias Braun's avatar
Matthias Braun committed
810
811
812
static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node)
{
	int arity = get_irn_arity(node);
Michael Beck's avatar
Michael Beck committed
813
814
	int i, dummy, res;
	hungarian_problem_t *bp;
815
	unsigned l, r;
Michael Beck's avatar
Michael Beck committed
816
	unsigned *assignment;
Matthias Braun's avatar
Matthias Braun committed
817

Michael Beck's avatar
Michael Beck committed
818
819
820
821
	/* construct a list of register occupied by live-through values */
	unsigned *live_through_regs = NULL;
	unsigned *output_regs       = NULL;

Matthias Braun's avatar
Matthias Braun committed
822
823
824
	/* see if any use constraints are not met */
	bool good = true;
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
825
		ir_node                   *op = get_irn_n(node, i);
826
		const arch_register_t     *reg;
Michael Beck's avatar
Michael Beck committed
827
828
829
830
		const arch_register_req_t *req;
		const unsigned            *limited;
		unsigned                  r;

Matthias Braun's avatar
Matthias Braun committed
831
832
833
		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

834
		/* are there any limitations for the i'th operand? */
Michael Beck's avatar
Michael Beck committed
835
		req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
836
		if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
837
838
			continue;

Michael Beck's avatar
Michael Beck committed
839
		limited = req->limited;
840
841
		reg     = arch_get_irn_register(op);
		r       = arch_register_get_index(reg);
Matthias Braun's avatar
Matthias Braun committed
842
		if (!rbitset_is_set(limited, r)) {
Michael Beck's avatar
Michael Beck committed
843
			/* found an assignment outside the limited set */
Matthias Braun's avatar
Matthias Braun committed
844
845
846
847
848
			good = false;
			break;
		}
	}

Michael Beck's avatar
Michael Beck committed
849
	/* is any of the live-throughs using a constrained output register? */
850
851
852
853
854
855
856
857
858
859
860
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;

		foreach_out_edge(node, edge) {
			ir_node *proj = get_edge_src_irn(edge);
			const arch_register_req_t *req;

			if (!arch_irn_consider_in_reg_alloc(cls, proj))
				continue;

			req = arch_get_register_req_out(proj);
Christoph Mallon's avatar
Christoph Mallon committed
861
			if (!(req->type & arch_register_req_type_limited))
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
				continue;

			if (live_through_regs == NULL) {
				rbitset_alloca(live_through_regs, n_regs);
				determine_live_through_regs(live_through_regs, node);

				rbitset_alloca(output_regs, n_regs);
			}

			rbitset_or(output_regs, req->limited, n_regs);
			if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
				good = false;
			}
		}
	} else {
		if (arch_irn_consider_in_reg_alloc(cls, node)) {
			const arch_register_req_t *req = arch_get_register_req_out(node);
			if (req->type & arch_register_req_type_limited) {
				rbitset_alloca(live_through_regs, n_regs);
				determine_live_through_regs(live_through_regs, node);
				if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
					good = false;

					rbitset_alloca(output_regs, n_regs);
					rbitset_or(output_regs, req->limited, n_regs);
				}
			}
		}
	}

Matthias Braun's avatar
Matthias Braun committed
892
893
894
	if (good)
		return;

895
	/* create these arrays if we haven't yet */
896
897
898
899
	if (output_regs == NULL) {
		if (live_through_regs == NULL) {
			rbitset_alloca(live_through_regs, n_regs);
		}
900
901
902
		rbitset_alloca(output_regs, n_regs);
	}

903
	/* at this point we have to construct a bipartite matching problem to see
904
905
906
907
908
	 * which values should go to which registers
	 * Note: We're building the matrix in "reverse" - source registers are
	 *       right, destinations at l because this will produce the solution
	 *       in the format required for permutate_values.
	 */
Michael Beck's avatar
Michael Beck committed
909
	bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT);
Matthias Braun's avatar
Matthias Braun committed
910
911
912

	/* add all combinations, then remove not allowed ones */
	for (l = 0; l < n_regs; ++l) {
913
		if (!rbitset_is_set(normal_regs, l)) {
914
			hungarian_add(bp, l, l, 1);
Matthias Braun's avatar
Matthias Braun committed
915
916
917
918
			continue;
		}

		for (r = 0; r < n_regs; ++r) {
919
			if (!rbitset_is_set(normal_regs, r))
Matthias Braun's avatar
Matthias Braun committed
920
				continue;
921
922
923
924
			/* livethrough values may not use constrainted output registers */
			if (rbitset_is_set(live_through_regs, l)
					&& rbitset_is_set(output_regs, r))
				continue;
Matthias Braun's avatar
Matthias Braun committed
925

926
			hungarian_add(bp, r, l, l == r ? 9 : 8);
Matthias Braun's avatar
Matthias Braun committed
927
928
929
930
		}
	}

	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
931
		ir_node                   *op = get_irn_n(node, i);
932
		const arch_register_t     *reg;
Michael Beck's avatar
Michael Beck committed
933
934
		const arch_register_req_t *req;
		const unsigned            *limited;
935
		unsigned                   current_reg;
Michael Beck's avatar
Michael Beck committed
936

Matthias Braun's avatar
Matthias Braun committed
937
938
939
		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

Michael Beck's avatar
Michael Beck committed
940
		req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
941
		if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
942
943
			continue;

Michael Beck's avatar
Michael Beck committed
944
		limited     = req->limited;
945
946
		reg         = arch_get_irn_register(op);
		current_reg = arch_register_get_index(reg);
Matthias Braun's avatar
Matthias Braun committed
947
948
949
		for (r = 0; r < n_regs; ++r) {
			if (rbitset_is_set(limited, r))
				continue;
950
			hungarian_remv(bp, r, current_reg);
Matthias Braun's avatar
Matthias Braun committed
951
952
953
		}
	}

954
	//hungarian_print_costmatrix(bp, 1);
Matthias Braun's avatar
Matthias Braun committed
955
956
	hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);

Michael Beck's avatar
Michael Beck committed
957
958
	assignment = ALLOCAN(unsigned, n_regs);
	res = hungarian_solve(bp, (int*) assignment, &dummy, 0);
Matthias Braun's avatar
Matthias Braun committed
959
960
	assert(res == 0);

961
#if 0
962
	fprintf(stderr, "Swap result:");
963
	for (i = 0; i < (int) n_regs; ++i) {
964
		fprintf(stderr, " %d", assignment[i]);
Matthias Braun's avatar
Matthias Braun committed
965
	}
966
	fprintf(stderr, "\n");
967
#endif
Matthias Braun's avatar
Matthias Braun committed
968
969
970
971
972
973

	hungarian_free(bp);

	permutate_values(live_nodes, node, assignment);
}

974
/** test wether a node @p n is a copy of the value of node @p of */
975
static bool is_copy_of(ir_node *value, ir_node *test_value)
976
{
977
	allocation_info_t *test_info;
978
	allocation_info_t *info;
979

980
	if (value == test_value)
981
		return true;
982

983
	info      = get_allocation_info(value);
984
	test_info = get_allocation_info(test_value);
985
	return test_info->original_value == info->original_value;
986
987
}

988
989
/**
 * find a value in the end-assignment of a basic block
990
991
992
993
994
995
996
997
998
 * @returns the index into the assignment array if found
 *          -1 if not found
 */
static int find_value_in_block_info(block_info_t *info, ir_node *value)
{
	unsigned      r;
	assignment_t *assignments = info->assignments;
	for (r = 0; r < n_regs; ++r) {
		const assignment_t *assignment = &assignments[r];
999
1000
		ir_node            *a_value    = assignment->value;