benewalloc.c 38.7 KB
Newer Older
Matthias Braun's avatar
Matthias Braun committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
 * @brief       New approach to allocation and copy coalescing
 * @author      Matthias Braun
 * @date        14.2.2009
 * @version     $Id$
 *
 * ... WE NEED A NAME FOR THIS ...
 *
 * Only a proof of concept at this moment...
 *
 * The idea is to allocate registers in 2 passes:
Michael Beck's avatar
Michael Beck committed
32
 * 1. A first pass to determine "preferred" registers for live-ranges. This
Matthias Braun's avatar
Matthias Braun committed
33
 *    calculates for each register and each live-range a value indicating
Michael Beck's avatar
Michael Beck committed
34
 *    the usefulness. (You can roughly think of the value as the negative
Matthias Braun's avatar
Matthias Braun committed
35
36
 *    costs needed for copies when the value is in the specific registers...)
 *
37
38
39
 * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
 *    registers with high preferences. When register constraints are not met,
 *    add copies and split live-ranges.
Matthias Braun's avatar
Matthias Braun committed
40
41
 *
 * TODO:
42
43
44
 *  - make use of free registers in the permutate_values code
 *  - We have to pessimistically construct Phi_0s when not all predecessors
 *    of a block are known.
Matthias Braun's avatar
Matthias Braun committed
45
46
47
48
49
50
51
52
53
 *  - Phi color assignment should give bonus points towards registers already
 *    assigned at predecessors.
 *  - think about a smarter sequence of visiting the blocks. Sorted by
 *    execfreq might be good, or looptree from inner to outermost loops going
 *    over blocks in a reverse postorder
 */
#include "config.h"

#include <float.h>
54
#include <stdbool.h>
Matthias Braun's avatar
Matthias Braun committed
55
56
57
58
59
60
61

#include "obst.h"
#include "irnode_t.h"
#include "irgraph_t.h"
#include "iredges_t.h"
#include "ircons.h"
#include "irgwalk.h"
62
#include "irdom.h"
Matthias Braun's avatar
Matthias Braun committed
63
#include "execfreq.h"
64
#include "error.h"
Matthias Braun's avatar
Matthias Braun committed
65
66
67
68
69
70

#include "be.h"
#include "bera.h"
#include "belive_t.h"
#include "bemodule.h"
#include "bechordal_t.h"
71
72
#include "besched.h"
#include "beirg.h"
Matthias Braun's avatar
Matthias Braun committed
73
#include "benode_t.h"
74
#include "bespill.h"
75
#include "bespillutil.h"
Matthias Braun's avatar
Matthias Braun committed
76
77
78
79
80
#include "beverify.h"

#include "bipartite.h"
#include "hungarian.h"

81
82
83
84
85
#define USE_FACTOR         1.0f
#define DEF_FACTOR         1.0f
#define NEIGHBOR_FACTOR    0.2f
#define AFF_SHOULD_BE_SAME 1.0f
#define AFF_PHI            1.0f
Matthias Braun's avatar
Matthias Braun committed
86
87
88
89
90
91
92

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

static struct obstack               obst;
static be_irg_t                    *birg;
static ir_graph                    *irg;
static const arch_register_class_t *cls;
93
static const arch_register_req_t   *default_cls_req;
Matthias Braun's avatar
Matthias Braun committed
94
95
96
97
98
static be_lv_t                     *lv;
static const ir_exec_freq          *execfreqs;
static unsigned                     n_regs;
static bitset_t                    *ignore_regs;

99
/** info about the current assignment for a register */
Matthias Braun's avatar
Matthias Braun committed
100
101
102
struct assignment_t {
	ir_node *value;            /**< currently assigned value */
};
103
typedef struct assignment_t assignment_t;
Matthias Braun's avatar
Matthias Braun committed
104

105
/** currently active assignments (while processing a basic block) */
Matthias Braun's avatar
Matthias Braun committed
106
107
static assignment_t *assignments;

108
109
110
111
/**
 * allocation information: last_uses, register preferences
 * the information is per firm-node.
 */
Matthias Braun's avatar
Matthias Braun committed
112
struct allocation_info_t {
113
114
115
116
	unsigned  last_uses;      /**< bitset indicating last uses (input pos) */
	ir_node  *current_value;  /**< copy of the value that should be used */
	ir_node  *original_value; /**< for copies point to original value */
	float     prefs[0];       /**< register preferences */
Matthias Braun's avatar
Matthias Braun committed
117
};
118
typedef struct allocation_info_t allocation_info_t;
Matthias Braun's avatar
Matthias Braun committed
119

120
/** helper datastructure used when sorting register preferences */
Matthias Braun's avatar
Matthias Braun committed
121
122
123
124
struct reg_pref_t {
	unsigned num;
	float    pref;
};
125
126
127
128
typedef struct reg_pref_t reg_pref_t;

/** per basic-block information */
struct block_info_t {
129
	bool         processed;       /**< indicate wether block is processed */
130
131
132
	assignment_t assignments[0];  /**< register assignments at end of block */
};
typedef struct block_info_t block_info_t;
Matthias Braun's avatar
Matthias Braun committed
133

134
135
136
137
/**
 * Get the allocation info for a node.
 * The info is allocated on the first visit of a node.
 */
Matthias Braun's avatar
Matthias Braun committed
138
139
140
static allocation_info_t *get_allocation_info(ir_node *node)
{
	allocation_info_t *info;
Christoph Mallon's avatar
Christoph Mallon committed
141
	if (!irn_visited_else_mark(node)) {
142
		size_t size = sizeof(info[0]) + n_regs * sizeof(info->prefs[0]);
Matthias Braun's avatar
Matthias Braun committed
143
144
		info = obstack_alloc(&obst, size);
		memset(info, 0, size);
145
146
		info->current_value  = node;
		info->original_value = node;
Matthias Braun's avatar
Matthias Braun committed
147
148
149
150
151
152
153
154
		set_irn_link(node, info);
	} else {
		info = get_irn_link(node);
	}

	return info;
}

155
156
157
158
159
160
161
162
/**
 * Get allocation information for a basic block
 */
static block_info_t *get_block_info(ir_node *block)
{
	block_info_t *info;

	assert(is_Block(block));
Christoph Mallon's avatar
Christoph Mallon committed
163
	if (!irn_visited_else_mark(block)) {
164
165
166
167
168
169
170
171
172
173
174
		size_t size = sizeof(info[0]) + n_regs * sizeof(info->assignments[0]);
		info = obstack_alloc(&obst, size);
		memset(info, 0, size);
		set_irn_link(block, info);
	} else {
		info = get_irn_link(block);
	}

	return info;
}

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/**
 * Get default register requirement for the current register class
 */
static const arch_register_req_t *get_default_req_current_cls(void)
{
	if (default_cls_req == NULL) {
		struct obstack      *obst = get_irg_obstack(irg);
		arch_register_req_t *req  = obstack_alloc(obst, sizeof(*req));
		memset(req, 0, sizeof(*req));

		req->type = arch_register_req_type_normal;
		req->cls  = cls;

		default_cls_req = req;
	}
	return default_cls_req;
}

193
194
195
196
197
/**
 * Link the allocation info of a node to a copy.
 * Afterwards, both nodes uses the same allocation info.
 * Copy must not have an allocation info assigned yet.
 *
198
 * @param copy   the node that gets the allocation info assigned
199
200
 * @param value  the original node
 */
201
static void mark_as_copy_of(ir_node *copy, ir_node *value)
Matthias Braun's avatar
Matthias Braun committed
202
{
203
	ir_node           *original;
204
205
206
	allocation_info_t *info      = get_allocation_info(value);
	allocation_info_t *copy_info = get_allocation_info(copy);

207
208
209
210
211
212
213
	/* find original value */
	original = info->original_value;
	if (original != value) {
		info = get_allocation_info(original);
	}

	assert(info->original_value == original);
214
215
216
217
	info->current_value = copy;

	/* the copy should not be linked to something else yet */
	assert(copy_info->original_value == copy);
218
219
220
	/* copy over allocation preferences */
	memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
	copy_info->original_value = original;
Matthias Braun's avatar
Matthias Braun committed
221
222
}

223
224
225
/**
 * Calculate the penalties for every register on a node and its live neighbors.
 *
226
227
228
229
 * @param live_nodes  the set of live nodes at the current position, may be NULL
 * @param penalty     the penalty to subtract from
 * @param limited     a raw bitset containing the limited set for the node
 * @param node        the node
230
 */
Matthias Braun's avatar
Matthias Braun committed
231
232
static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
                                      float penalty, const unsigned* limited,
233
                                      ir_node *node)
Matthias Braun's avatar
Matthias Braun committed
234
{
Michael Beck's avatar
Michael Beck committed
235
236
	ir_nodeset_iterator_t iter;
	unsigned              r;
Matthias Braun's avatar
Matthias Braun committed
237
	allocation_info_t     *info = get_allocation_info(node);
Michael Beck's avatar
Michael Beck committed
238
	ir_node               *neighbor;
Matthias Braun's avatar
Matthias Braun committed
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

	/* give penalty for all forbidden regs */
	for (r = 0; r < n_regs; ++r) {
		if (rbitset_is_set(limited, r))
			continue;

		info->prefs[r] -= penalty;
	}

	/* all other live values should get a penalty for allowed regs */
	if (live_nodes == NULL)
		return;

	/* TODO: reduce penalty if there are multiple allowed registers... */
	penalty *= NEIGHBOR_FACTOR;
	foreach_ir_nodeset(live_nodes, neighbor, iter) {
		allocation_info_t *neighbor_info;

		/* TODO: if op is used on multiple inputs we might not do a
		 * continue here */
		if (neighbor == node)
			continue;

Christoph Mallon's avatar
Christoph Mallon committed
262
		neighbor_info = get_allocation_info(neighbor);
Matthias Braun's avatar
Matthias Braun committed
263
264
265
266
267
268
269
270
271
		for (r = 0; r < n_regs; ++r) {
			if (!rbitset_is_set(limited, r))
				continue;

			neighbor_info->prefs[r] -= penalty;
		}
	}
}

272
273
274
275
276
277
278
279
280
/**
 * Calculate the preferences of a definition for the current register class.
 * If the definition uses a limited set of registers, reduce the preferences
 * for the limited register on the node and its neighbors.
 *
 * @param live_nodes  the set of live nodes at the current node
 * @param weight      the weight
 * @param node        the current node
 */
Matthias Braun's avatar
Matthias Braun committed
281
282
283
static void check_defs(const ir_nodeset_t *live_nodes, float weight,
                       ir_node *node)
{
Michael Beck's avatar
Michael Beck committed
284
285
	const arch_register_req_t *req;

Matthias Braun's avatar
Matthias Braun committed
286
287
288
289
290
291
292
293
294
295
296
297
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;
		foreach_out_edge(node, edge) {
			ir_node *proj = get_edge_src_irn(edge);
			check_defs(live_nodes, weight, proj);
		}
		return;
	}

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

Michael Beck's avatar
Michael Beck committed
298
	req = arch_get_register_req_out(node);
Matthias Braun's avatar
Matthias Braun committed
299
300
301
302
303
304
305
306
307
308
309
310
311
312
	if (req->type & arch_register_req_type_limited) {
		const unsigned *limited = req->limited;
		float           penalty = weight * DEF_FACTOR;
		give_penalties_for_limits(live_nodes, penalty, limited, node);
	}

	if (req->type & arch_register_req_type_should_be_same) {
		ir_node           *insn  = skip_Proj(node);
		allocation_info_t *info  = get_allocation_info(node);
		int                arity = get_irn_arity(insn);
		int                i;

		float factor = 1.0f / rbitset_popcnt(&req->other_same, arity);
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
313
			ir_node           *op;
Christoph Mallon's avatar
Christoph Mallon committed
314
			unsigned           r;
Michael Beck's avatar
Michael Beck committed
315
316
			allocation_info_t *op_info;

Matthias Braun's avatar
Matthias Braun committed
317
318
319
			if (!rbitset_is_set(&req->other_same, i))
				continue;

Michael Beck's avatar
Michael Beck committed
320
321
			op      = get_irn_n(insn, i);
			op_info = get_allocation_info(op);
Matthias Braun's avatar
Matthias Braun committed
322
323
324
325
326
327
328
329
330
			for (r = 0; r < n_regs; ++r) {
				if (bitset_is_set(ignore_regs, r))
					continue;
				op_info->prefs[r] += info->prefs[r] * factor;
			}
		}
	}
}

331
332
333
334
/**
 * Walker: Runs an a block calculates the preferences for any
 * node and every register from the considered register class.
 */
Matthias Braun's avatar
Matthias Braun committed
335
336
337
338
339
340
341
342
343
344
345
static void analyze_block(ir_node *block, void *data)
{
	float         weight = get_block_execfreq(execfreqs, block);
	ir_nodeset_t  live_nodes;
	ir_node      *node;
	(void) data;

	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	sched_foreach_reverse(block, node) {
Michael Beck's avatar
Michael Beck committed
346
		allocation_info_t *info;
347
348
		int                i;
		int                arity;
Matthias Braun's avatar
Matthias Braun committed
349

350
		if (is_Phi(node))
Matthias Braun's avatar
Matthias Braun committed
351
352
353
354
355
356
357
			break;

		/* TODO give/take penalties for should_be_same/different) */
		check_defs(&live_nodes, weight, node);

		/* mark last uses */
		arity = get_irn_arity(node);
358
359
360
361
362
363
364
365

		/* the allocation info node currently only uses 1 unsigned value
		   to mark last used inputs. So we will fail for a node with more than
		   32 inputs. */
		if (arity >= (int) sizeof(unsigned) * 8) {
			panic("Node with more than %d inputs not supported yet",
					(int) sizeof(unsigned) * 8);
		}
Matthias Braun's avatar
Matthias Braun committed
366

Michael Beck's avatar
Michael Beck committed
367
		info = get_allocation_info(node);
Matthias Braun's avatar
Matthias Braun committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
		for (i = 0; i < arity; ++i) {
			ir_node *op = get_irn_n(node, i);
			if (!arch_irn_consider_in_reg_alloc(cls, op))
				continue;

			/* last usage of a value? */
			if (!ir_nodeset_contains(&live_nodes, op)) {
				rbitset_set(&info->last_uses, i);
			}
		}

		be_liveness_transfer(cls, node, &live_nodes);

		/* update weights based on usage constraints */
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
383
384
385
386
			const arch_register_req_t *req;
			const unsigned            *limited;
			ir_node                   *op = get_irn_n(node, i);

Matthias Braun's avatar
Matthias Braun committed
387
388
389
			if (!arch_irn_consider_in_reg_alloc(cls, op))
				continue;

Michael Beck's avatar
Michael Beck committed
390
			req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
391
			if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
392
393
394
395
				continue;

			/* TODO: give penalties to neighbors for precolored nodes! */

Michael Beck's avatar
Michael Beck committed
396
			limited = req->limited;
Matthias Braun's avatar
Matthias Braun committed
397
398
399
400
401
402
403
404
			give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited,
			                          op);
		}
	}

	ir_nodeset_destroy(&live_nodes);
}

405
406
407
408
409
410
/**
 * Assign register reg to the given node.
 *
 * @param node  the node
 * @param reg   the register
 */
Matthias Braun's avatar
Matthias Braun committed
411
412
static void use_reg(ir_node *node, const arch_register_t *reg)
{
413
414
	unsigned      r          = arch_register_get_index(reg);
	assignment_t *assignment = &assignments[r];
Matthias Braun's avatar
Matthias Braun committed
415
416
417
418
419
420
421

	assert(assignment->value == NULL);
	assignment->value = node;

	arch_set_irn_register(node, reg);
}

422
423
424
/**
 * Compare two register preferences in decreasing order.
 */
Matthias Braun's avatar
Matthias Braun committed
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
static int compare_reg_pref(const void *e1, const void *e2)
{
	const reg_pref_t *rp1 = (const reg_pref_t*) e1;
	const reg_pref_t *rp2 = (const reg_pref_t*) e2;
	if (rp1->pref < rp2->pref)
		return 1;
	if (rp1->pref > rp2->pref)
		return -1;
	return 0;
}

static void fill_sort_candidates(reg_pref_t *regprefs,
                                 const allocation_info_t *info)
{
	unsigned r;

	for (r = 0; r < n_regs; ++r) {
		float pref = info->prefs[r];
		if (bitset_is_set(ignore_regs, r)) {
			pref = -10000;
		}
		regprefs[r].num  = r;
		regprefs[r].pref = pref;
	}
	/* TODO: use a stable sort here to avoid unnecessary register jumping */
	qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref);
}

453
454
455
/**
 * Determine and assign a register for node @p node
 */
Matthias Braun's avatar
Matthias Braun committed
456
457
static void assign_reg(const ir_node *block, ir_node *node)
{
Michael Beck's avatar
Michael Beck committed
458
459
460
461
	const arch_register_t     *reg;
	allocation_info_t         *info;
	const arch_register_req_t *req;
	reg_pref_t                *reg_prefs;
Michael Beck's avatar
Michael Beck committed
462
	ir_node                   *in_node;
Michael Beck's avatar
Michael Beck committed
463
464
	unsigned                  i;

Matthias Braun's avatar
Matthias Braun committed
465
466
467
	assert(arch_irn_consider_in_reg_alloc(cls, node));

	/* preassigned register? */
Michael Beck's avatar
Michael Beck committed
468
	reg = arch_get_irn_register(node);
Matthias Braun's avatar
Matthias Braun committed
469
470
471
472
473
474
475
	if (reg != NULL) {
		DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
		return;
	}

	/* give should_be_same boni */
Michael Beck's avatar
Michael Beck committed
476
477
	info = get_allocation_info(node);
	req  = arch_get_register_req_out(node);
478

Michael Beck's avatar
Michael Beck committed
479
	in_node = skip_Proj(node);
Matthias Braun's avatar
Matthias Braun committed
480
481
	if (req->type & arch_register_req_type_should_be_same) {
		float weight = get_block_execfreq(execfreqs, block);
482
		int   arity  = get_irn_arity(in_node);
Matthias Braun's avatar
Matthias Braun committed
483
		int   i;
Michael Beck's avatar
Michael Beck committed
484

Matthias Braun's avatar
Matthias Braun committed
485
486
487
488
489
490
491
492
		assert(arity <= (int) sizeof(req->other_same) * 8);
		for (i = 0; i < arity; ++i) {
			ir_node               *in;
			const arch_register_t *reg;
			unsigned               r;
			if (!rbitset_is_set(&req->other_same, i))
				continue;

493
			in  = get_irn_n(in_node, i);
Matthias Braun's avatar
Matthias Braun committed
494
495
496
497
498
			reg = arch_get_irn_register(in);
			assert(reg != NULL);
			r = arch_register_get_index(reg);
			if (bitset_is_set(ignore_regs, r))
				continue;
499
			info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
Matthias Braun's avatar
Matthias Braun committed
500
501
502
503
		}
	}

	DB((dbg, LEVEL_2, "Candidates for %+F:", node));
Michael Beck's avatar
Michael Beck committed
504
	reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
Matthias Braun's avatar
Matthias Braun committed
505
506
507
508
509
510
511
512
513
514
	fill_sort_candidates(reg_prefs, info);
	for (i = 0; i < n_regs; ++i) {
		unsigned               num = reg_prefs[i].num;
		const arch_register_t *reg = arch_register_for_index(cls, num);
		DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
	}
	DB((dbg, LEVEL_2, "\n"));

	for (i = 0; i < n_regs; ++i) {
		unsigned r = reg_prefs[i].num;
515
		/* ignores are last and we should have at least 1 non-ignore left */
Matthias Braun's avatar
Matthias Braun committed
516
		assert(!bitset_is_set(ignore_regs, r));
517
		/* already used?
518
519
		   TODO: It might be better to copy the value occupying the register
		   around here instead of trying the next one, find out when... */
Matthias Braun's avatar
Matthias Braun committed
520
521
		if (assignments[r].value != NULL)
			continue;
522
523
524
		reg = arch_register_for_index(cls, r);
		DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
Matthias Braun's avatar
Matthias Braun committed
525
526
527
528
529
530
		break;
	}
}

static void free_reg_of_value(ir_node *node)
{
531
532
533
	assignment_t          *assignment;
	const arch_register_t *reg;
	unsigned               r;
Michael Beck's avatar
Michael Beck committed
534

Matthias Braun's avatar
Matthias Braun committed
535
536
537
	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

538
539
540
	reg               = arch_get_irn_register(node);
	r                 = arch_register_get_index(reg);
	assignment        = &assignments[r];
541
	assert(assignment->value == node);
542
	assignment->value = NULL;
Matthias Braun's avatar
Matthias Braun committed
543
544
}

545
546
547
548
/**
 * Add an permutation in front of a node and change the assignments
 * due to this permutation.
 *
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
 * To understand this imagine a permutation like this:
 *
 * 1 -> 2
 * 2 -> 3
 * 3 -> 1, 5
 * 4 -> 6
 * 5
 * 6
 * 7 -> 7
 *
 * First we count how many destinations a single value has. At the same time
 * we can be sure that each destination register has at most 1 source register
 * (it can have 0 which means we don't care what value is in it).
 * We ignore all fullfilled permuations (like 7->7)
 * In a first pass we create as much copy instructions as possible as they
 * are generally cheaper than exchanges. We do this by counting into how many
 * destinations a register has to be copied (in the example it's 2 for register
 * 3, or 1 for the registers 1,2,4 and 7).
 * We can then create a copy into every destination register when the usecount
 * of that register is 0 (= noone else needs the value in the register).
 *
 * After this step we should have cycles left. We implement a cyclic permutation
 * of n registers with n-1 transpositions.
 *
573
574
 * @param live_nodes   the set of live nodes, updated due to live range split
 * @param before       the node before we add the permutation
575
576
577
 * @param permutation  the permutation array indices are the destination
 *                     registers, the values in the array are the source
 *                     registers.
578
 */
Matthias Braun's avatar
Matthias Braun committed
579
580
581
static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before,
                             unsigned *permutation)
{
582
583
584
585
	ir_node   *block;
	ir_node  **ins    = ALLOCANZ(ir_node*, n_regs);
	unsigned  *n_used = ALLOCANZ(unsigned, n_regs);
	unsigned   r;
Matthias Braun's avatar
Matthias Braun committed
586

587
	/* create a list of permutations. Leave out fix points. */
Matthias Braun's avatar
Matthias Braun committed
588
	for (r = 0; r < n_regs; ++r) {
589
		unsigned      old_reg = permutation[r];
Michael Beck's avatar
Michael Beck committed
590
591
592
		assignment_t *assignment;
		ir_node      *value;

593
594
		/* no need to do anything for a fixpoint */
		if (old_reg == r)
Matthias Braun's avatar
Matthias Braun committed
595
596
			continue;

597
		assignment = &assignments[old_reg];
Michael Beck's avatar
Michael Beck committed
598
		value      = assignment->value;
Matthias Braun's avatar
Matthias Braun committed
599
		if (value == NULL) {
600
601
			/* nothing to do here, reg is not live. Mark it as fixpoint
			 * so we ignore it in the next steps */
Matthias Braun's avatar
Matthias Braun committed
602
603
604
605
			permutation[r] = r;
			continue;
		}

606
607
		ins[old_reg] = value;
		++n_used[old_reg];
608
		free_reg_of_value(value);
Matthias Braun's avatar
Matthias Braun committed
609

610
		/* free occupation infos, we'll add the values back later */
611
612
613
		if (live_nodes != NULL) {
			ir_nodeset_remove(live_nodes, value);
		}
Matthias Braun's avatar
Matthias Braun committed
614
615
	}

Michael Beck's avatar
Michael Beck committed
616
	block = get_nodes_block(before);
Matthias Braun's avatar
Matthias Braun committed
617

618
619
620
	/* step1: create copies where immediately possible */
	for (r = 0; r < n_regs; /* empty */) {
		ir_node *copy;
621
		ir_node *src;
Michael Beck's avatar
Michael Beck committed
622
		const arch_register_t *reg;
623
		unsigned               old_r = permutation[r];
Michael Beck's avatar
Michael Beck committed
624

625
626
627
		/* - no need to do anything for fixed points.
		   - we can't copy if the value in the dest reg is still needed */
		if (old_r == r || n_used[r] > 0) {
628
			++r;
Matthias Braun's avatar
Matthias Braun committed
629
			continue;
630
		}
Matthias Braun's avatar
Matthias Braun committed
631

632
		/* create a copy */
633
		src = ins[old_r];
634
		copy = be_new_Copy(cls, block, src);
635
		sched_add_before(before, copy);
636
		reg = arch_register_for_index(cls, r);
637
638
639
		DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
		    copy, src, before, reg->name));
		mark_as_copy_of(copy, src);
640
		use_reg(copy, reg);
641
642
643
644

		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, copy);
		}
645

646
647
		/* old register has 1 user less, permutation is resolved */
		assert(arch_register_get_index(arch_get_irn_register(src)) == old_r);
648
649
		assert(n_used[old_r] > 0);
		--n_used[old_r];
650
		permutation[r] = r;
651

652
		/* advance or jump back (if this copy enabled another copy) */
653
		if (old_r < r && n_used[old_r] == 0) {
654
			r = old_r;
655
		} else {
656
			++r;
657
		}
658
	}
Matthias Braun's avatar
Matthias Braun committed
659

660
661
662
663
664
	/* at this point we only have "cycles" left which we have to resolve with
	 * perm instructions
	 * TODO: if we have free registers left, then we should really use copy
	 * instructions for any cycle longer than 2 registers...
	 * (this is probably architecture dependent, there might be archs where
665
	 *  copies are preferable even for 2-cycles) */
666

667
	/* create perms with the rest */
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
	for (r = 0; r < n_regs; /* empty */) {
		const arch_register_t *reg;
		unsigned  old_r = permutation[r];
		unsigned  r2;
		ir_node  *in[2];
		ir_node  *perm;
		ir_node  *proj0;
		ir_node  *proj1;

		if (old_r == r) {
			++r;
			continue;
		}

		/* we shouldn't have copies from 1 value to multiple destinations left*/
		assert(n_used[old_r] == 1);

		/* exchange old_r and r2; after that old_r is a fixed point */
		r2 = permutation[old_r];

		in[0] = ins[r2];
		in[1] = ins[old_r];
		perm = be_new_Perm(cls, block, 2, in);
691
692
693
		sched_add_before(before, perm);
		DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n",
		    perm, in[0], in[1], before));
694
695

		proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0);
696
		mark_as_copy_of(proj0, in[0]);
697
698
		reg = arch_register_for_index(cls, old_r);
		use_reg(proj0, reg);
699
700
701
		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, proj0);
		}
702
703
704
705
706
707
708
709
710
711
712

		proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1);

		/* 1 value is now in the correct register */
		permutation[old_r] = old_r;
		/* the source of r changed to r2 */
		permutation[r] = r2;
		ins[r2] = in[1];
		reg = arch_register_for_index(cls, r2);
		if (r == r2) {
			/* if we have reached a fixpoint update data structures */
713
			mark_as_copy_of(proj1, in[1]);
714
			use_reg(proj1, reg);
715
716
717
			if (live_nodes != NULL) {
				ir_nodeset_insert(live_nodes, proj1);
			}
718
719
		} else {
			arch_set_irn_register(proj1, reg);
720
		}
Matthias Braun's avatar
Matthias Braun committed
721
	}
722
723
724
725
726
727
728

#ifdef DEBUG_libfirm
	/* now we should only have fixpoints left */
	for (r = 0; r < n_regs; ++r) {
		assert(permutation[r] == r);
	}
#endif
Matthias Braun's avatar
Matthias Braun committed
729
730
}

731
732
733
734
735
736
/**
 * Free regs for values last used.
 *
 * @param live_nodes   set of live nodes, will be updated
 * @param node         the node to consider
 */
Matthias Braun's avatar
Matthias Braun committed
737
738
static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
{
739
740
741
	allocation_info_t *info      = get_allocation_info(node);
	const unsigned    *last_uses = &info->last_uses;
	int                arity     = get_irn_arity(node);
Matthias Braun's avatar
Matthias Braun committed
742
743
	int                i;
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
744
745
		ir_node *op;

746
		/* check if one operand is the last use */
747
		if (!rbitset_is_set(last_uses, i))
Matthias Braun's avatar
Matthias Braun committed
748
749
			continue;

Michael Beck's avatar
Michael Beck committed
750
		op = get_irn_n(node, i);
Matthias Braun's avatar
Matthias Braun committed
751
752
753
754
755
		free_reg_of_value(op);
		ir_nodeset_remove(live_nodes, op);
	}
}

756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
/**
 * Create a bitset of registers occupied with value living through an
 * instruction
 */
static void determine_live_through_regs(unsigned *bitset, ir_node *node)
{
	const allocation_info_t *info = get_allocation_info(node);
	unsigned r;
	int i;
	int arity;

	/* mark all used registers as potentially live-through */
	for (r = 0; r < n_regs; ++r) {
		const assignment_t *assignment = &assignments[r];
		if (assignment->value == NULL)
			continue;

		rbitset_set(bitset, r);
	}

	/* remove registers of value dying at the instruction */
	arity = get_irn_arity(node);
	for (i = 0; i < arity; ++i) {
		ir_node               *op;
		const arch_register_t *reg;

		if (!rbitset_is_set(&info->last_uses, i))
			continue;

		op  = get_irn_n(node, i);
		reg = arch_get_irn_register(op);
		rbitset_clear(bitset, arch_register_get_index(reg));
	}
}

791
792
793
794
795
796
/**
 * Enforce constraints at a node by live range splits.
 *
 * @param live_nodes  the set of live nodes, might be changed
 * @param node        the current node
 */
Matthias Braun's avatar
Matthias Braun committed
797
798
799
static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node)
{
	int arity = get_irn_arity(node);
Michael Beck's avatar
Michael Beck committed
800
801
	int i, dummy, res;
	hungarian_problem_t *bp;
802
	unsigned l, r;
Michael Beck's avatar
Michael Beck committed
803
	unsigned *assignment;
Matthias Braun's avatar
Matthias Braun committed
804
805
806
807

	/* see if any use constraints are not met */
	bool good = true;
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
808
		ir_node                   *op = get_irn_n(node, i);
809
		const arch_register_t     *reg;
Michael Beck's avatar
Michael Beck committed
810
811
812
813
		const arch_register_req_t *req;
		const unsigned            *limited;
		unsigned                  r;

Matthias Braun's avatar
Matthias Braun committed
814
815
816
		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

817
		/* are there any limitations for the i'th operand? */
Michael Beck's avatar
Michael Beck committed
818
		req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
819
		if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
820
821
			continue;

Michael Beck's avatar
Michael Beck committed
822
		limited = req->limited;
823
824
		reg     = arch_get_irn_register(op);
		r       = arch_register_get_index(reg);
Matthias Braun's avatar
Matthias Braun committed
825
		if (!rbitset_is_set(limited, r)) {
826
			/* found an assignement outside the limited set */
Matthias Braun's avatar
Matthias Braun committed
827
828
829
830
831
			good = false;
			break;
		}
	}

832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
	/* construct a list of register occupied by live-through values */
	unsigned *live_through_regs = NULL;
	unsigned *output_regs       = NULL;

	/* is any of the live-throughs using a constrainted output register? */
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;

		foreach_out_edge(node, edge) {
			ir_node *proj = get_edge_src_irn(edge);
			const arch_register_req_t *req;

			if (!arch_irn_consider_in_reg_alloc(cls, proj))
				continue;

			req = arch_get_register_req_out(proj);
Christoph Mallon's avatar
Christoph Mallon committed
848
			if (!(req->type & arch_register_req_type_limited))
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
				continue;

			if (live_through_regs == NULL) {
				rbitset_alloca(live_through_regs, n_regs);
				determine_live_through_regs(live_through_regs, node);

				rbitset_alloca(output_regs, n_regs);
			}

			rbitset_or(output_regs, req->limited, n_regs);
			if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
				good = false;
			}
		}
	} else {
		if (arch_irn_consider_in_reg_alloc(cls, node)) {
			const arch_register_req_t *req = arch_get_register_req_out(node);
			if (req->type & arch_register_req_type_limited) {
				rbitset_alloca(live_through_regs, n_regs);
				determine_live_through_regs(live_through_regs, node);
				if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
					good = false;

					rbitset_alloca(output_regs, n_regs);
					rbitset_or(output_regs, req->limited, n_regs);
				}
			}
		}
	}

Matthias Braun's avatar
Matthias Braun committed
879
880
881
	if (good)
		return;

882
	/* create these arrays if we haven't yet */
883
884
885
886
	if (output_regs == NULL) {
		if (live_through_regs == NULL) {
			rbitset_alloca(live_through_regs, n_regs);
		}
887
888
889
		rbitset_alloca(output_regs, n_regs);
	}

890
891
	/* at this point we have to construct a bipartite matching problem to see
	   which values should go to which registers */
Michael Beck's avatar
Michael Beck committed
892
	bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT);
Matthias Braun's avatar
Matthias Braun committed
893
894
895
896

	/* add all combinations, then remove not allowed ones */
	for (l = 0; l < n_regs; ++l) {
		if (bitset_is_set(ignore_regs, l)) {
897
			hungarian_add(bp, l, l, 1);
Matthias Braun's avatar
Matthias Braun committed
898
899
900
901
902
903
			continue;
		}

		for (r = 0; r < n_regs; ++r) {
			if (bitset_is_set(ignore_regs, r))
				continue;
904
905
906
907
			/* livethrough values may not use constrainted output registers */
			if (rbitset_is_set(live_through_regs, l)
					&& rbitset_is_set(output_regs, r))
				continue;
Matthias Braun's avatar
Matthias Braun committed
908

909
			hungarian_add(bp, l, r, l == r ? 9 : 8);
Matthias Braun's avatar
Matthias Braun committed
910
911
912
913
		}
	}

	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
914
		ir_node                   *op = get_irn_n(node, i);
915
		const arch_register_t     *reg;
Michael Beck's avatar
Michael Beck committed
916
917
		const arch_register_req_t *req;
		const unsigned            *limited;
918
		unsigned                   current_reg;
Michael Beck's avatar
Michael Beck committed
919

Matthias Braun's avatar
Matthias Braun committed
920
921
922
		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

Michael Beck's avatar
Michael Beck committed
923
		req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
924
		if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
925
926
			continue;

Michael Beck's avatar
Michael Beck committed
927
		limited     = req->limited;
928
929
		reg         = arch_get_irn_register(op);
		current_reg = arch_register_get_index(reg);
Matthias Braun's avatar
Matthias Braun committed
930
931
932
933
934
935
936
		for (r = 0; r < n_regs; ++r) {
			if (rbitset_is_set(limited, r))
				continue;
			hungarian_remv(bp, current_reg, r);
		}
	}

937
	hungarian_print_costmatrix(bp, 1);
Matthias Braun's avatar
Matthias Braun committed
938
939
	hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);

Michael Beck's avatar
Michael Beck committed
940
941
	assignment = ALLOCAN(unsigned, n_regs);
	res = hungarian_solve(bp, (int*) assignment, &dummy, 0);
Matthias Braun's avatar
Matthias Braun committed
942
943
	assert(res == 0);

944
#if 1
Matthias Braun's avatar
Matthias Braun committed
945
	printf("Swap result:");
946
	for (i = 0; i < (int) n_regs; ++i) {
947
		printf(" %d", assignment[i]);
Matthias Braun's avatar
Matthias Braun committed
948
949
	}
	printf("\n");
950
#endif
Matthias Braun's avatar
Matthias Braun committed
951
952
953
954
955
956

	hungarian_free(bp);

	permutate_values(live_nodes, node, assignment);
}

957
/** test wether a node @p n is a copy of the value of node @p of */
958
static bool is_copy_of(ir_node *value, ir_node *test_value)
959
{
960
	allocation_info_t *test_info;
961

962
	if (value == NULL)
963
		return false;
964
	if (value == test_value)
965
		return true;
966

967
968
	test_info = get_allocation_info(test_value);
	return test_info->original_value == value;
969
970
}

971
972
/**
 * find a value in the end-assignment of a basic block
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
 * @returns the index into the assignment array if found
 *          -1 if not found
 */
static int find_value_in_block_info(block_info_t *info, ir_node *value)
{
	unsigned      r;
	assignment_t *assignments = info->assignments;
	for (r = 0; r < n_regs; ++r) {
		const assignment_t *assignment = &assignments[r];
		if (is_copy_of(assignment->value, value))
			return (int) r;
	}

	return -1;
}

/**
 * Create the necessary permutations at the end of a basic block to fullfill
 * the register assignment for phi-nodes in the next block
 */
static void add_phi_permutations(ir_node *block, int p)
{
	unsigned  r;
	unsigned *permutation;
	assignment_t *old_assignments;
998
	bool      need_permutation;
999
1000
	ir_node  *node;
	ir_node  *pred = get_Block_cfgpred_block(block, p);
For faster browsing, not all history is shown. View entire blame