benewalloc.c 39.5 KB
Newer Older
Matthias Braun's avatar
Matthias Braun committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/*
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
 * @brief       New approach to allocation and copy coalescing
 * @author      Matthias Braun
 * @date        14.2.2009
 * @version     $Id$
 *
 * ... WE NEED A NAME FOR THIS ...
 *
 * Only a proof of concept at this moment...
 *
 * The idea is to allocate registers in 2 passes:
Michael Beck's avatar
Michael Beck committed
32
 * 1. A first pass to determine "preferred" registers for live-ranges. This
Matthias Braun's avatar
Matthias Braun committed
33
 *    calculates for each register and each live-range a value indicating
Michael Beck's avatar
Michael Beck committed
34
 *    the usefulness. (You can roughly think of the value as the negative
Matthias Braun's avatar
Matthias Braun committed
35
36
 *    costs needed for copies when the value is in the specific registers...)
 *
37
38
39
 * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
 *    registers with high preferences. When register constraints are not met,
 *    add copies and split live-ranges.
Matthias Braun's avatar
Matthias Braun committed
40
41
 *
 * TODO:
42
43
44
 *  - make use of free registers in the permutate_values code
 *  - We have to pessimistically construct Phi_0s when not all predecessors
 *    of a block are known.
Matthias Braun's avatar
Matthias Braun committed
45
46
47
48
49
 *  - Phi color assignment should give bonus points towards registers already
 *    assigned at predecessors.
 *  - think about a smarter sequence of visiting the blocks. Sorted by
 *    execfreq might be good, or looptree from inner to outermost loops going
 *    over blocks in a reverse postorder
50
 *  - propagate preferences through Phis
Matthias Braun's avatar
Matthias Braun committed
51
52
53
54
 */
#include "config.h"

#include <float.h>
55
#include <stdbool.h>
Matthias Braun's avatar
Matthias Braun committed
56

57
58
#include "error.h"
#include "execfreq.h"
Matthias Braun's avatar
Matthias Braun committed
59
#include "ircons.h"
60
#include "irdom.h"
61
62
63
64
65
#include "iredges_t.h"
#include "irgraph_t.h"
#include "irgwalk.h"
#include "irnode_t.h"
#include "obst.h"
Matthias Braun's avatar
Matthias Braun committed
66

67
68
#include "beabi.h"
#include "bechordal_t.h"
Matthias Braun's avatar
Matthias Braun committed
69
#include "be.h"
70
#include "beirg.h"
Matthias Braun's avatar
Matthias Braun committed
71
72
73
#include "belive_t.h"
#include "bemodule.h"
#include "benode_t.h"
74
75
#include "bera.h"
#include "besched.h"
76
#include "bespill.h"
77
#include "bespillutil.h"
Matthias Braun's avatar
Matthias Braun committed
78
79
80
81
82
#include "beverify.h"

#include "bipartite.h"
#include "hungarian.h"

83
84
85
86
87
#define USE_FACTOR         1.0f
#define DEF_FACTOR         1.0f
#define NEIGHBOR_FACTOR    0.2f
#define AFF_SHOULD_BE_SAME 1.0f
#define AFF_PHI            1.0f
Matthias Braun's avatar
Matthias Braun committed
88
89
90
91
92
93
94

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

static struct obstack               obst;
static be_irg_t                    *birg;
static ir_graph                    *irg;
static const arch_register_class_t *cls;
95
static const arch_register_req_t   *default_cls_req;
Matthias Braun's avatar
Matthias Braun committed
96
97
98
99
100
static be_lv_t                     *lv;
static const ir_exec_freq          *execfreqs;
static unsigned                     n_regs;
static bitset_t                    *ignore_regs;

101
/** info about the current assignment for a register */
Matthias Braun's avatar
Matthias Braun committed
102
103
104
struct assignment_t {
	ir_node *value;            /**< currently assigned value */
};
105
typedef struct assignment_t assignment_t;
Matthias Braun's avatar
Matthias Braun committed
106

107
/** currently active assignments (while processing a basic block) */
Matthias Braun's avatar
Matthias Braun committed
108
109
static assignment_t *assignments;

110
111
112
113
/**
 * allocation information: last_uses, register preferences
 * the information is per firm-node.
 */
Matthias Braun's avatar
Matthias Braun committed
114
struct allocation_info_t {
115
116
117
118
	unsigned  last_uses;      /**< bitset indicating last uses (input pos) */
	ir_node  *current_value;  /**< copy of the value that should be used */
	ir_node  *original_value; /**< for copies point to original value */
	float     prefs[0];       /**< register preferences */
Matthias Braun's avatar
Matthias Braun committed
119
};
120
typedef struct allocation_info_t allocation_info_t;
Matthias Braun's avatar
Matthias Braun committed
121

122
/** helper datastructure used when sorting register preferences */
Matthias Braun's avatar
Matthias Braun committed
123
124
125
126
struct reg_pref_t {
	unsigned num;
	float    pref;
};
127
128
129
130
typedef struct reg_pref_t reg_pref_t;

/** per basic-block information */
struct block_info_t {
131
	bool         processed;       /**< indicate wether block is processed */
132
133
134
	assignment_t assignments[0];  /**< register assignments at end of block */
};
typedef struct block_info_t block_info_t;
Matthias Braun's avatar
Matthias Braun committed
135

136
137
138
139
/**
 * Get the allocation info for a node.
 * The info is allocated on the first visit of a node.
 */
Matthias Braun's avatar
Matthias Braun committed
140
141
142
static allocation_info_t *get_allocation_info(ir_node *node)
{
	allocation_info_t *info;
Christoph Mallon's avatar
Christoph Mallon committed
143
	if (!irn_visited_else_mark(node)) {
144
		size_t size = sizeof(info[0]) + n_regs * sizeof(info->prefs[0]);
Matthias Braun's avatar
Matthias Braun committed
145
146
		info = obstack_alloc(&obst, size);
		memset(info, 0, size);
147
148
		info->current_value  = node;
		info->original_value = node;
Matthias Braun's avatar
Matthias Braun committed
149
150
151
152
153
154
155
156
		set_irn_link(node, info);
	} else {
		info = get_irn_link(node);
	}

	return info;
}

157
158
159
160
161
162
163
164
/**
 * Get allocation information for a basic block
 */
static block_info_t *get_block_info(ir_node *block)
{
	block_info_t *info;

	assert(is_Block(block));
Christoph Mallon's avatar
Christoph Mallon committed
165
	if (!irn_visited_else_mark(block)) {
166
167
168
169
170
171
172
173
174
175
176
		size_t size = sizeof(info[0]) + n_regs * sizeof(info->assignments[0]);
		info = obstack_alloc(&obst, size);
		memset(info, 0, size);
		set_irn_link(block, info);
	} else {
		info = get_irn_link(block);
	}

	return info;
}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/**
 * Get default register requirement for the current register class
 */
static const arch_register_req_t *get_default_req_current_cls(void)
{
	if (default_cls_req == NULL) {
		struct obstack      *obst = get_irg_obstack(irg);
		arch_register_req_t *req  = obstack_alloc(obst, sizeof(*req));
		memset(req, 0, sizeof(*req));

		req->type = arch_register_req_type_normal;
		req->cls  = cls;

		default_cls_req = req;
	}
	return default_cls_req;
}

195
196
197
198
199
/**
 * Link the allocation info of a node to a copy.
 * Afterwards, both nodes uses the same allocation info.
 * Copy must not have an allocation info assigned yet.
 *
200
 * @param copy   the node that gets the allocation info assigned
201
202
 * @param value  the original node
 */
203
static void mark_as_copy_of(ir_node *copy, ir_node *value)
Matthias Braun's avatar
Matthias Braun committed
204
{
205
	ir_node           *original;
206
207
208
	allocation_info_t *info      = get_allocation_info(value);
	allocation_info_t *copy_info = get_allocation_info(copy);

209
210
211
212
213
214
215
	/* find original value */
	original = info->original_value;
	if (original != value) {
		info = get_allocation_info(original);
	}

	assert(info->original_value == original);
216
217
218
219
	info->current_value = copy;

	/* the copy should not be linked to something else yet */
	assert(copy_info->original_value == copy);
220
221
222
	/* copy over allocation preferences */
	memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
	copy_info->original_value = original;
Matthias Braun's avatar
Matthias Braun committed
223
224
}

225
226
227
/**
 * Calculate the penalties for every register on a node and its live neighbors.
 *
228
229
230
231
 * @param live_nodes  the set of live nodes at the current position, may be NULL
 * @param penalty     the penalty to subtract from
 * @param limited     a raw bitset containing the limited set for the node
 * @param node        the node
232
 */
Matthias Braun's avatar
Matthias Braun committed
233
234
static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
                                      float penalty, const unsigned* limited,
235
                                      ir_node *node)
Matthias Braun's avatar
Matthias Braun committed
236
{
Michael Beck's avatar
Michael Beck committed
237
238
	ir_nodeset_iterator_t iter;
	unsigned              r;
Matthias Braun's avatar
Matthias Braun committed
239
	allocation_info_t     *info = get_allocation_info(node);
Michael Beck's avatar
Michael Beck committed
240
	ir_node               *neighbor;
Matthias Braun's avatar
Matthias Braun committed
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

	/* give penalty for all forbidden regs */
	for (r = 0; r < n_regs; ++r) {
		if (rbitset_is_set(limited, r))
			continue;

		info->prefs[r] -= penalty;
	}

	/* all other live values should get a penalty for allowed regs */
	if (live_nodes == NULL)
		return;

	/* TODO: reduce penalty if there are multiple allowed registers... */
	penalty *= NEIGHBOR_FACTOR;
	foreach_ir_nodeset(live_nodes, neighbor, iter) {
		allocation_info_t *neighbor_info;

		/* TODO: if op is used on multiple inputs we might not do a
		 * continue here */
		if (neighbor == node)
			continue;

Christoph Mallon's avatar
Christoph Mallon committed
264
		neighbor_info = get_allocation_info(neighbor);
Matthias Braun's avatar
Matthias Braun committed
265
266
267
268
269
270
271
272
273
		for (r = 0; r < n_regs; ++r) {
			if (!rbitset_is_set(limited, r))
				continue;

			neighbor_info->prefs[r] -= penalty;
		}
	}
}

274
275
276
277
278
279
280
281
282
/**
 * Calculate the preferences of a definition for the current register class.
 * If the definition uses a limited set of registers, reduce the preferences
 * for the limited register on the node and its neighbors.
 *
 * @param live_nodes  the set of live nodes at the current node
 * @param weight      the weight
 * @param node        the current node
 */
Matthias Braun's avatar
Matthias Braun committed
283
284
285
static void check_defs(const ir_nodeset_t *live_nodes, float weight,
                       ir_node *node)
{
Michael Beck's avatar
Michael Beck committed
286
287
	const arch_register_req_t *req;

Matthias Braun's avatar
Matthias Braun committed
288
289
290
291
292
293
294
295
296
297
298
299
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;
		foreach_out_edge(node, edge) {
			ir_node *proj = get_edge_src_irn(edge);
			check_defs(live_nodes, weight, proj);
		}
		return;
	}

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

Michael Beck's avatar
Michael Beck committed
300
	req = arch_get_register_req_out(node);
Matthias Braun's avatar
Matthias Braun committed
301
302
303
304
305
306
307
308
309
310
311
312
313
314
	if (req->type & arch_register_req_type_limited) {
		const unsigned *limited = req->limited;
		float           penalty = weight * DEF_FACTOR;
		give_penalties_for_limits(live_nodes, penalty, limited, node);
	}

	if (req->type & arch_register_req_type_should_be_same) {
		ir_node           *insn  = skip_Proj(node);
		allocation_info_t *info  = get_allocation_info(node);
		int                arity = get_irn_arity(insn);
		int                i;

		float factor = 1.0f / rbitset_popcnt(&req->other_same, arity);
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
315
			ir_node           *op;
Christoph Mallon's avatar
Christoph Mallon committed
316
			unsigned           r;
Michael Beck's avatar
Michael Beck committed
317
318
			allocation_info_t *op_info;

Matthias Braun's avatar
Matthias Braun committed
319
320
321
			if (!rbitset_is_set(&req->other_same, i))
				continue;

Michael Beck's avatar
Michael Beck committed
322
323
			op      = get_irn_n(insn, i);
			op_info = get_allocation_info(op);
Matthias Braun's avatar
Matthias Braun committed
324
325
326
327
328
329
330
331
332
			for (r = 0; r < n_regs; ++r) {
				if (bitset_is_set(ignore_regs, r))
					continue;
				op_info->prefs[r] += info->prefs[r] * factor;
			}
		}
	}
}

333
334
335
336
/**
 * Walker: Runs an a block calculates the preferences for any
 * node and every register from the considered register class.
 */
Matthias Braun's avatar
Matthias Braun committed
337
338
339
340
341
342
343
344
345
346
347
static void analyze_block(ir_node *block, void *data)
{
	float         weight = get_block_execfreq(execfreqs, block);
	ir_nodeset_t  live_nodes;
	ir_node      *node;
	(void) data;

	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	sched_foreach_reverse(block, node) {
Michael Beck's avatar
Michael Beck committed
348
		allocation_info_t *info;
349
350
		int                i;
		int                arity;
Matthias Braun's avatar
Matthias Braun committed
351

352
		if (is_Phi(node))
Matthias Braun's avatar
Matthias Braun committed
353
354
355
356
357
358
359
			break;

		/* TODO give/take penalties for should_be_same/different) */
		check_defs(&live_nodes, weight, node);

		/* mark last uses */
		arity = get_irn_arity(node);
360
361
362
363
364
365
366
367

		/* the allocation info node currently only uses 1 unsigned value
		   to mark last used inputs. So we will fail for a node with more than
		   32 inputs. */
		if (arity >= (int) sizeof(unsigned) * 8) {
			panic("Node with more than %d inputs not supported yet",
					(int) sizeof(unsigned) * 8);
		}
Matthias Braun's avatar
Matthias Braun committed
368

Michael Beck's avatar
Michael Beck committed
369
		info = get_allocation_info(node);
Matthias Braun's avatar
Matthias Braun committed
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
		for (i = 0; i < arity; ++i) {
			ir_node *op = get_irn_n(node, i);
			if (!arch_irn_consider_in_reg_alloc(cls, op))
				continue;

			/* last usage of a value? */
			if (!ir_nodeset_contains(&live_nodes, op)) {
				rbitset_set(&info->last_uses, i);
			}
		}

		be_liveness_transfer(cls, node, &live_nodes);

		/* update weights based on usage constraints */
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
385
386
387
388
			const arch_register_req_t *req;
			const unsigned            *limited;
			ir_node                   *op = get_irn_n(node, i);

Matthias Braun's avatar
Matthias Braun committed
389
390
391
			if (!arch_irn_consider_in_reg_alloc(cls, op))
				continue;

Michael Beck's avatar
Michael Beck committed
392
			req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
393
			if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
394
395
396
397
				continue;

			/* TODO: give penalties to neighbors for precolored nodes! */

Michael Beck's avatar
Michael Beck committed
398
			limited = req->limited;
Matthias Braun's avatar
Matthias Braun committed
399
400
401
402
403
404
405
406
			give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited,
			                          op);
		}
	}

	ir_nodeset_destroy(&live_nodes);
}

407
408
409
410
411
412
/**
 * Assign register reg to the given node.
 *
 * @param node  the node
 * @param reg   the register
 */
Matthias Braun's avatar
Matthias Braun committed
413
414
static void use_reg(ir_node *node, const arch_register_t *reg)
{
415
416
	unsigned      r          = arch_register_get_index(reg);
	assignment_t *assignment = &assignments[r];
Matthias Braun's avatar
Matthias Braun committed
417

418
	//assert(assignment->value == NULL);
Matthias Braun's avatar
Matthias Braun committed
419
420
421
422
423
	assignment->value = node;

	arch_set_irn_register(node, reg);
}

424
425
426
/**
 * Compare two register preferences in decreasing order.
 */
Matthias Braun's avatar
Matthias Braun committed
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
static int compare_reg_pref(const void *e1, const void *e2)
{
	const reg_pref_t *rp1 = (const reg_pref_t*) e1;
	const reg_pref_t *rp2 = (const reg_pref_t*) e2;
	if (rp1->pref < rp2->pref)
		return 1;
	if (rp1->pref > rp2->pref)
		return -1;
	return 0;
}

static void fill_sort_candidates(reg_pref_t *regprefs,
                                 const allocation_info_t *info)
{
	unsigned r;

	for (r = 0; r < n_regs; ++r) {
		float pref = info->prefs[r];
		if (bitset_is_set(ignore_regs, r)) {
			pref = -10000;
		}
		regprefs[r].num  = r;
		regprefs[r].pref = pref;
	}
	/* TODO: use a stable sort here to avoid unnecessary register jumping */
	qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref);
}

455
456
457
/**
 * Determine and assign a register for node @p node
 */
Matthias Braun's avatar
Matthias Braun committed
458
459
static void assign_reg(const ir_node *block, ir_node *node)
{
Michael Beck's avatar
Michael Beck committed
460
461
462
463
	const arch_register_t     *reg;
	allocation_info_t         *info;
	const arch_register_req_t *req;
	reg_pref_t                *reg_prefs;
Michael Beck's avatar
Michael Beck committed
464
	ir_node                   *in_node;
Michael Beck's avatar
Michael Beck committed
465
466
	unsigned                  i;

Matthias Braun's avatar
Matthias Braun committed
467
468
469
	assert(arch_irn_consider_in_reg_alloc(cls, node));

	/* preassigned register? */
Michael Beck's avatar
Michael Beck committed
470
	reg = arch_get_irn_register(node);
Matthias Braun's avatar
Matthias Braun committed
471
472
473
474
475
476
477
	if (reg != NULL) {
		DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
		return;
	}

	/* give should_be_same boni */
Michael Beck's avatar
Michael Beck committed
478
479
	info = get_allocation_info(node);
	req  = arch_get_register_req_out(node);
480

Michael Beck's avatar
Michael Beck committed
481
	in_node = skip_Proj(node);
Matthias Braun's avatar
Matthias Braun committed
482
483
	if (req->type & arch_register_req_type_should_be_same) {
		float weight = get_block_execfreq(execfreqs, block);
484
		int   arity  = get_irn_arity(in_node);
Matthias Braun's avatar
Matthias Braun committed
485
		int   i;
Michael Beck's avatar
Michael Beck committed
486

Matthias Braun's avatar
Matthias Braun committed
487
488
489
490
491
492
493
494
		assert(arity <= (int) sizeof(req->other_same) * 8);
		for (i = 0; i < arity; ++i) {
			ir_node               *in;
			const arch_register_t *reg;
			unsigned               r;
			if (!rbitset_is_set(&req->other_same, i))
				continue;

495
			in  = get_irn_n(in_node, i);
Matthias Braun's avatar
Matthias Braun committed
496
497
498
499
500
			reg = arch_get_irn_register(in);
			assert(reg != NULL);
			r = arch_register_get_index(reg);
			if (bitset_is_set(ignore_regs, r))
				continue;
501
			info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
Matthias Braun's avatar
Matthias Braun committed
502
503
504
505
		}
	}

	DB((dbg, LEVEL_2, "Candidates for %+F:", node));
Michael Beck's avatar
Michael Beck committed
506
	reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
Matthias Braun's avatar
Matthias Braun committed
507
508
509
510
511
512
513
514
515
516
	fill_sort_candidates(reg_prefs, info);
	for (i = 0; i < n_regs; ++i) {
		unsigned               num = reg_prefs[i].num;
		const arch_register_t *reg = arch_register_for_index(cls, num);
		DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
	}
	DB((dbg, LEVEL_2, "\n"));

	for (i = 0; i < n_regs; ++i) {
		unsigned r = reg_prefs[i].num;
517
		/* ignores are last and we should have at least 1 non-ignore left */
Matthias Braun's avatar
Matthias Braun committed
518
		assert(!bitset_is_set(ignore_regs, r));
519
		/* already used?
520
521
		   TODO: It might be better to copy the value occupying the register
		   around here instead of trying the next one, find out when... */
Matthias Braun's avatar
Matthias Braun committed
522
523
		if (assignments[r].value != NULL)
			continue;
524
525
526
		reg = arch_register_for_index(cls, r);
		DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
Matthias Braun's avatar
Matthias Braun committed
527
528
529
530
531
532
		break;
	}
}

static void free_reg_of_value(ir_node *node)
{
533
534
535
	assignment_t          *assignment;
	const arch_register_t *reg;
	unsigned               r;
Michael Beck's avatar
Michael Beck committed
536

Matthias Braun's avatar
Matthias Braun committed
537
538
539
	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

540
541
542
	reg               = arch_get_irn_register(node);
	r                 = arch_register_get_index(reg);
	assignment        = &assignments[r];
543
	assert(assignment->value == node);
544
	assignment->value = NULL;
Matthias Braun's avatar
Matthias Braun committed
545
546
}

547
548
549
550
/**
 * Add an permutation in front of a node and change the assignments
 * due to this permutation.
 *
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
 * To understand this imagine a permutation like this:
 *
 * 1 -> 2
 * 2 -> 3
 * 3 -> 1, 5
 * 4 -> 6
 * 5
 * 6
 * 7 -> 7
 *
 * First we count how many destinations a single value has. At the same time
 * we can be sure that each destination register has at most 1 source register
 * (it can have 0 which means we don't care what value is in it).
 * We ignore all fullfilled permuations (like 7->7)
 * In a first pass we create as much copy instructions as possible as they
 * are generally cheaper than exchanges. We do this by counting into how many
 * destinations a register has to be copied (in the example it's 2 for register
 * 3, or 1 for the registers 1,2,4 and 7).
 * We can then create a copy into every destination register when the usecount
 * of that register is 0 (= noone else needs the value in the register).
 *
 * After this step we should have cycles left. We implement a cyclic permutation
 * of n registers with n-1 transpositions.
 *
575
576
 * @param live_nodes   the set of live nodes, updated due to live range split
 * @param before       the node before we add the permutation
577
578
579
 * @param permutation  the permutation array indices are the destination
 *                     registers, the values in the array are the source
 *                     registers.
580
 */
Matthias Braun's avatar
Matthias Braun committed
581
582
583
static void permutate_values(ir_nodeset_t *live_nodes, ir_node *before,
                             unsigned *permutation)
{
584
585
	ir_node  **ins    = ALLOCANZ(ir_node*, n_regs);
	unsigned  *n_used = ALLOCANZ(unsigned, n_regs);
586
	ir_node   *block;
587
	unsigned   r;
Matthias Braun's avatar
Matthias Braun committed
588

589
	/* create a list of permutations. Leave out fix points. */
Matthias Braun's avatar
Matthias Braun committed
590
	for (r = 0; r < n_regs; ++r) {
591
		unsigned      old_reg = permutation[r];
Michael Beck's avatar
Michael Beck committed
592
593
594
		assignment_t *assignment;
		ir_node      *value;

595
596
		/* no need to do anything for a fixpoint */
		if (old_reg == r)
Matthias Braun's avatar
Matthias Braun committed
597
598
			continue;

599
		assignment = &assignments[old_reg];
Michael Beck's avatar
Michael Beck committed
600
		value      = assignment->value;
Matthias Braun's avatar
Matthias Braun committed
601
		if (value == NULL) {
602
603
			/* nothing to do here, reg is not live. Mark it as fixpoint
			 * so we ignore it in the next steps */
Matthias Braun's avatar
Matthias Braun committed
604
605
606
607
			permutation[r] = r;
			continue;
		}

608
609
		ins[old_reg] = value;
		++n_used[old_reg];
Matthias Braun's avatar
Matthias Braun committed
610

611
		/* free occupation infos, we'll add the values back later */
612
613
614
		if (live_nodes != NULL) {
			ir_nodeset_remove(live_nodes, value);
		}
Matthias Braun's avatar
Matthias Braun committed
615
616
	}

Michael Beck's avatar
Michael Beck committed
617
	block = get_nodes_block(before);
Matthias Braun's avatar
Matthias Braun committed
618

619
620
621
	/* step1: create copies where immediately possible */
	for (r = 0; r < n_regs; /* empty */) {
		ir_node *copy;
622
		ir_node *src;
Michael Beck's avatar
Michael Beck committed
623
		const arch_register_t *reg;
624
		unsigned               old_r = permutation[r];
Michael Beck's avatar
Michael Beck committed
625

626
627
628
		/* - no need to do anything for fixed points.
		   - we can't copy if the value in the dest reg is still needed */
		if (old_r == r || n_used[r] > 0) {
629
			++r;
Matthias Braun's avatar
Matthias Braun committed
630
			continue;
631
		}
Matthias Braun's avatar
Matthias Braun committed
632

633
		/* create a copy */
634
		src = ins[old_r];
635
		copy = be_new_Copy(cls, block, src);
636
		sched_add_before(before, copy);
637
		reg = arch_register_for_index(cls, r);
638
639
640
		DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
		    copy, src, before, reg->name));
		mark_as_copy_of(copy, src);
641
		use_reg(copy, reg);
642
643
644
645

		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, copy);
		}
646

647
648
		/* old register has 1 user less, permutation is resolved */
		assert(arch_register_get_index(arch_get_irn_register(src)) == old_r);
649
650
		permutation[r] = r;

651
652
		assert(n_used[old_r] > 0);
		--n_used[old_r];
653
654
655
		if (n_used[old_r] == 0) {
			free_reg_of_value(src);
		}
656

657
		/* advance or jump back (if this copy enabled another copy) */
658
		if (old_r < r && n_used[old_r] == 0) {
659
			r = old_r;
660
		} else {
661
			++r;
662
		}
663
	}
Matthias Braun's avatar
Matthias Braun committed
664

665
666
667
668
669
	/* at this point we only have "cycles" left which we have to resolve with
	 * perm instructions
	 * TODO: if we have free registers left, then we should really use copy
	 * instructions for any cycle longer than 2 registers...
	 * (this is probably architecture dependent, there might be archs where
670
	 *  copies are preferable even for 2-cycles) */
671

672
	/* create perms with the rest */
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
	for (r = 0; r < n_regs; /* empty */) {
		const arch_register_t *reg;
		unsigned  old_r = permutation[r];
		unsigned  r2;
		ir_node  *in[2];
		ir_node  *perm;
		ir_node  *proj0;
		ir_node  *proj1;

		if (old_r == r) {
			++r;
			continue;
		}

		/* we shouldn't have copies from 1 value to multiple destinations left*/
		assert(n_used[old_r] == 1);

		/* exchange old_r and r2; after that old_r is a fixed point */
		r2 = permutation[old_r];

		in[0] = ins[r2];
		in[1] = ins[old_r];
		perm = be_new_Perm(cls, block, 2, in);
696
697
698
		sched_add_before(before, perm);
		DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n",
		    perm, in[0], in[1], before));
699
700

		proj0 = new_r_Proj(block, perm, get_irn_mode(in[0]), 0);
701
		mark_as_copy_of(proj0, in[0]);
702
703
		reg = arch_register_for_index(cls, old_r);
		use_reg(proj0, reg);
704
705
706
		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, proj0);
		}
707
708
709
710
711
712
713
714
715
716
717

		proj1 = new_r_Proj(block, perm, get_irn_mode(in[1]), 1);

		/* 1 value is now in the correct register */
		permutation[old_r] = old_r;
		/* the source of r changed to r2 */
		permutation[r] = r2;
		ins[r2] = in[1];
		reg = arch_register_for_index(cls, r2);
		if (r == r2) {
			/* if we have reached a fixpoint update data structures */
718
			mark_as_copy_of(proj1, in[1]);
719
			use_reg(proj1, reg);
720
721
722
			if (live_nodes != NULL) {
				ir_nodeset_insert(live_nodes, proj1);
			}
723
724
		} else {
			arch_set_irn_register(proj1, reg);
725
		}
Matthias Braun's avatar
Matthias Braun committed
726
	}
727
728
729
730
731
732
733

#ifdef DEBUG_libfirm
	/* now we should only have fixpoints left */
	for (r = 0; r < n_regs; ++r) {
		assert(permutation[r] == r);
	}
#endif
Matthias Braun's avatar
Matthias Braun committed
734
735
}

736
737
738
739
740
741
/**
 * Free regs for values last used.
 *
 * @param live_nodes   set of live nodes, will be updated
 * @param node         the node to consider
 */
Matthias Braun's avatar
Matthias Braun committed
742
743
static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
{
744
745
746
	allocation_info_t *info      = get_allocation_info(node);
	const unsigned    *last_uses = &info->last_uses;
	int                arity     = get_irn_arity(node);
Matthias Braun's avatar
Matthias Braun committed
747
748
	int                i;
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
749
750
		ir_node *op;

751
		/* check if one operand is the last use */
752
		if (!rbitset_is_set(last_uses, i))
Matthias Braun's avatar
Matthias Braun committed
753
754
			continue;

Michael Beck's avatar
Michael Beck committed
755
		op = get_irn_n(node, i);
Matthias Braun's avatar
Matthias Braun committed
756
757
758
759
760
		free_reg_of_value(op);
		ir_nodeset_remove(live_nodes, op);
	}
}

761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
/**
 * Create a bitset of registers occupied with value living through an
 * instruction
 */
static void determine_live_through_regs(unsigned *bitset, ir_node *node)
{
	const allocation_info_t *info = get_allocation_info(node);
	unsigned r;
	int i;
	int arity;

	/* mark all used registers as potentially live-through */
	for (r = 0; r < n_regs; ++r) {
		const assignment_t *assignment = &assignments[r];
		if (assignment->value == NULL)
			continue;

		rbitset_set(bitset, r);
	}

	/* remove registers of value dying at the instruction */
	arity = get_irn_arity(node);
	for (i = 0; i < arity; ++i) {
		ir_node               *op;
		const arch_register_t *reg;

		if (!rbitset_is_set(&info->last_uses, i))
			continue;

		op  = get_irn_n(node, i);
		reg = arch_get_irn_register(op);
		rbitset_clear(bitset, arch_register_get_index(reg));
	}
}

796
797
798
/**
 * Enforce constraints at a node by live range splits.
 *
799
800
 * @param  live_nodes  the set of live nodes, might be changed
 * @param  node        the current node
801
 */
Matthias Braun's avatar
Matthias Braun committed
802
803
804
static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node)
{
	int arity = get_irn_arity(node);
Michael Beck's avatar
Michael Beck committed
805
806
	int i, dummy, res;
	hungarian_problem_t *bp;
807
	unsigned l, r;
Michael Beck's avatar
Michael Beck committed
808
	unsigned *assignment;
Matthias Braun's avatar
Matthias Braun committed
809

Michael Beck's avatar
Michael Beck committed
810
811
812
813
	/* construct a list of register occupied by live-through values */
	unsigned *live_through_regs = NULL;
	unsigned *output_regs       = NULL;

Matthias Braun's avatar
Matthias Braun committed
814
815
816
	/* see if any use constraints are not met */
	bool good = true;
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
817
		ir_node                   *op = get_irn_n(node, i);
818
		const arch_register_t     *reg;
Michael Beck's avatar
Michael Beck committed
819
820
821
822
		const arch_register_req_t *req;
		const unsigned            *limited;
		unsigned                  r;

Matthias Braun's avatar
Matthias Braun committed
823
824
825
		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

826
		/* are there any limitations for the i'th operand? */
Michael Beck's avatar
Michael Beck committed
827
		req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
828
		if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
829
830
			continue;

Michael Beck's avatar
Michael Beck committed
831
		limited = req->limited;
832
833
		reg     = arch_get_irn_register(op);
		r       = arch_register_get_index(reg);
Matthias Braun's avatar
Matthias Braun committed
834
		if (!rbitset_is_set(limited, r)) {
Michael Beck's avatar
Michael Beck committed
835
			/* found an assignment outside the limited set */
Matthias Braun's avatar
Matthias Braun committed
836
837
838
839
840
			good = false;
			break;
		}
	}

Michael Beck's avatar
Michael Beck committed
841
	/* is any of the live-throughs using a constrained output register? */
842
843
844
845
846
847
848
849
850
851
852
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;

		foreach_out_edge(node, edge) {
			ir_node *proj = get_edge_src_irn(edge);
			const arch_register_req_t *req;

			if (!arch_irn_consider_in_reg_alloc(cls, proj))
				continue;

			req = arch_get_register_req_out(proj);
Christoph Mallon's avatar
Christoph Mallon committed
853
			if (!(req->type & arch_register_req_type_limited))
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
				continue;

			if (live_through_regs == NULL) {
				rbitset_alloca(live_through_regs, n_regs);
				determine_live_through_regs(live_through_regs, node);

				rbitset_alloca(output_regs, n_regs);
			}

			rbitset_or(output_regs, req->limited, n_regs);
			if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
				good = false;
			}
		}
	} else {
		if (arch_irn_consider_in_reg_alloc(cls, node)) {
			const arch_register_req_t *req = arch_get_register_req_out(node);
			if (req->type & arch_register_req_type_limited) {
				rbitset_alloca(live_through_regs, n_regs);
				determine_live_through_regs(live_through_regs, node);
				if (rbitsets_have_common(req->limited, live_through_regs, n_regs)) {
					good = false;

					rbitset_alloca(output_regs, n_regs);
					rbitset_or(output_regs, req->limited, n_regs);
				}
			}
		}
	}

Matthias Braun's avatar
Matthias Braun committed
884
885
886
	if (good)
		return;

887
	/* create these arrays if we haven't yet */
888
889
890
891
	if (output_regs == NULL) {
		if (live_through_regs == NULL) {
			rbitset_alloca(live_through_regs, n_regs);
		}
892
893
894
		rbitset_alloca(output_regs, n_regs);
	}

895
896
	/* at this point we have to construct a bipartite matching problem to see
	   which values should go to which registers */
Michael Beck's avatar
Michael Beck committed
897
	bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT);
Matthias Braun's avatar
Matthias Braun committed
898
899
900
901

	/* add all combinations, then remove not allowed ones */
	for (l = 0; l < n_regs; ++l) {
		if (bitset_is_set(ignore_regs, l)) {
902
			hungarian_add(bp, l, l, 1);
Matthias Braun's avatar
Matthias Braun committed
903
904
905
906
907
908
			continue;
		}

		for (r = 0; r < n_regs; ++r) {
			if (bitset_is_set(ignore_regs, r))
				continue;
909
910
911
912
			/* livethrough values may not use constrainted output registers */
			if (rbitset_is_set(live_through_regs, l)
					&& rbitset_is_set(output_regs, r))
				continue;
Matthias Braun's avatar
Matthias Braun committed
913

914
			hungarian_add(bp, r, l, l == r ? 9 : 8);
Matthias Braun's avatar
Matthias Braun committed
915
916
917
918
		}
	}

	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
919
		ir_node                   *op = get_irn_n(node, i);
920
		const arch_register_t     *reg;
Michael Beck's avatar
Michael Beck committed
921
922
		const arch_register_req_t *req;
		const unsigned            *limited;
923
		unsigned                   current_reg;
Michael Beck's avatar
Michael Beck committed
924

Matthias Braun's avatar
Matthias Braun committed
925
926
927
		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

Michael Beck's avatar
Michael Beck committed
928
		req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
929
		if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
930
931
			continue;

Michael Beck's avatar
Michael Beck committed
932
		limited     = req->limited;
933
934
		reg         = arch_get_irn_register(op);
		current_reg = arch_register_get_index(reg);
Matthias Braun's avatar
Matthias Braun committed
935
936
937
		for (r = 0; r < n_regs; ++r) {
			if (rbitset_is_set(limited, r))
				continue;
938
			hungarian_remv(bp, r, current_reg);
Matthias Braun's avatar
Matthias Braun committed
939
940
941
		}
	}

942
	hungarian_print_costmatrix(bp, 1);
Matthias Braun's avatar
Matthias Braun committed
943
944
	hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL);

Michael Beck's avatar
Michael Beck committed
945
946
	assignment = ALLOCAN(unsigned, n_regs);
	res = hungarian_solve(bp, (int*) assignment, &dummy, 0);
Matthias Braun's avatar
Matthias Braun committed
947
948
	assert(res == 0);

949
#if 1
Matthias Braun's avatar
Matthias Braun committed
950
	printf("Swap result:");
951
	for (i = 0; i < (int) n_regs; ++i) {
952
		printf(" %d", assignment[i]);
Matthias Braun's avatar
Matthias Braun committed
953
954
	}
	printf("\n");
955
#endif
Matthias Braun's avatar
Matthias Braun committed
956
957
958
959
960
961

	hungarian_free(bp);

	permutate_values(live_nodes, node, assignment);
}

962
/** test wether a node @p n is a copy of the value of node @p of */
963
static bool is_copy_of(ir_node *value, ir_node *test_value)
964
{
965
	allocation_info_t *test_info;
966
	allocation_info_t *info;
967

968
	if (value == test_value)
969
		return true;
970

971
	info      = get_allocation_info(value);
972
	test_info = get_allocation_info(test_value);
973
	return test_info->original_value == info->original_value;
974
975
}

976
977
/**
 * find a value in the end-assignment of a basic block
978
979
980
981
982
983
984
985
986
 * @returns the index into the assignment array if found
 *          -1 if not found
 */
static int find_value_in_block_info(block_info_t *info, ir_node *value)
{
	unsigned      r;
	assignment_t *assignments = info->assignments;
	for (r = 0; r < n_regs; ++r) {
		const assignment_t *assignment = &assignments[r];
987
988
989
990
991
		ir_node            *a_value    = assignment->value;

		if (a_value == NULL)
			continue;
		if (is_copy_of(a_value, value))
992
993
994
995
996
997
998
999
1000
			return (int) r;
	}

	return -1;
}

/**
 * Create the necessary permutations at the end of a basic block to fullfill
 * the register assignment for phi-nodes in the next block
For faster browsing, not all history is shown. View entire blame