beprefalloc.c 53.9 KB
Newer Older
Matthias Braun's avatar
Matthias Braun committed
1
/*
2
 * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
Matthias Braun's avatar
Matthias Braun committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
22
 * @brief       Preference Guided Register Assignment
Matthias Braun's avatar
Matthias Braun committed
23
24
25
26
27
 * @author      Matthias Braun
 * @date        14.2.2009
 * @version     $Id$
 *
 * The idea is to allocate registers in 2 passes:
Michael Beck's avatar
Michael Beck committed
28
 * 1. A first pass to determine "preferred" registers for live-ranges. This
Matthias Braun's avatar
Matthias Braun committed
29
 *    calculates for each register and each live-range a value indicating
Michael Beck's avatar
Michael Beck committed
30
 *    the usefulness. (You can roughly think of the value as the negative
Matthias Braun's avatar
Matthias Braun committed
31
32
 *    costs needed for copies when the value is in the specific registers...)
 *
33
34
35
 * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
 *    registers with high preferences. When register constraints are not met,
 *    add copies and split live-ranges.
Matthias Braun's avatar
Matthias Braun committed
36
37
 *
 * TODO:
yb9976's avatar
typos    
yb9976 committed
38
 *  - make use of free registers in the permute_values code
Matthias Braun's avatar
Matthias Braun committed
39
40
41
42
 */
#include "config.h"

#include <float.h>
43
#include <stdbool.h>
44
#include <math.h>
Matthias Braun's avatar
Matthias Braun committed
45

46
47
#include "error.h"
#include "execfreq.h"
Matthias Braun's avatar
Matthias Braun committed
48
#include "ircons.h"
49
#include "irdom.h"
50
51
52
53
#include "iredges_t.h"
#include "irgraph_t.h"
#include "irgwalk.h"
#include "irnode_t.h"
54
#include "irprintf.h"
55
#include "irdump.h"
56
#include "obst.h"
57
#include "raw_bitset.h"
58
#include "unionfind.h"
59
60
#include "pdeq.h"
#include "hungarian.h"
Matthias Braun's avatar
Matthias Braun committed
61

62
63
#include "beabi.h"
#include "bechordal_t.h"
Matthias Braun's avatar
Matthias Braun committed
64
#include "be.h"
65
#include "beirg.h"
Matthias Braun's avatar
Matthias Braun committed
66
67
#include "belive_t.h"
#include "bemodule.h"
68
#include "benode.h"
69
70
#include "bera.h"
#include "besched.h"
71
#include "bespill.h"
72
#include "bespillutil.h"
Matthias Braun's avatar
Matthias Braun committed
73
#include "beverify.h"
74
#include "beutil.h"
75
#include "bestack.h"
Matthias Braun's avatar
Matthias Braun committed
76

Matthias Braun's avatar
Matthias Braun committed
77
78
79
80
81
82
#define USE_FACTOR                     1.0f
#define DEF_FACTOR                     1.0f
#define NEIGHBOR_FACTOR                0.2f
#define AFF_SHOULD_BE_SAME             0.5f
#define AFF_PHI                        1.0f
#define SPLIT_DELTA                    1.0f
83
#define MAX_OPTIMISTIC_SPLIT_RECURSION 0
Matthias Braun's avatar
Matthias Braun committed
84
85
86
87
88
89
90
91
92

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

static struct obstack               obst;
static ir_graph                    *irg;
static const arch_register_class_t *cls;
static be_lv_t                     *lv;
static const ir_exec_freq          *execfreqs;
static unsigned                     n_regs;
93
static unsigned                    *normal_regs;
94
static int                         *congruence_classes;
95
96
static ir_node                    **block_order;
static int                          n_block_order;
97
98
99
static int                          create_preferences        = true;
static int                          create_congruence_classes = true;
static int                          propagate_phi_registers   = true;
100
101
102
103
104
105
106

static const lc_opt_table_entry_t options[] = {
	LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences),
	LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes),
	LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers),
	LC_OPT_LAST
};
Matthias Braun's avatar
Matthias Braun committed
107

108
109
110
/** currently active assignments (while processing a basic block)
 * maps registers to values(their current copies) */
static ir_node **assignments;
Matthias Braun's avatar
Matthias Braun committed
111

112
113
114
115
/**
 * allocation information: last_uses, register preferences
 * the information is per firm-node.
 */
Matthias Braun's avatar
Matthias Braun committed
116
struct allocation_info_t {
117
	unsigned  last_uses[2];   /**< bitset indicating last uses (input pos) */
118
119
120
	ir_node  *current_value;  /**< copy of the value that should be used */
	ir_node  *original_value; /**< for copies point to original value */
	float     prefs[0];       /**< register preferences */
Matthias Braun's avatar
Matthias Braun committed
121
};
122
typedef struct allocation_info_t allocation_info_t;
Matthias Braun's avatar
Matthias Braun committed
123

124
/** helper datastructure used when sorting register preferences */
Matthias Braun's avatar
Matthias Braun committed
125
126
127
128
struct reg_pref_t {
	unsigned num;
	float    pref;
};
129
130
131
132
typedef struct reg_pref_t reg_pref_t;

/** per basic-block information */
struct block_info_t {
Michael Beck's avatar
Michael Beck committed
133
	bool     processed;       /**< indicate whether block is processed */
134
	ir_node *assignments[0];  /**< register assignments at end of block */
135
136
};
typedef struct block_info_t block_info_t;
Matthias Braun's avatar
Matthias Braun committed
137

138
139
140
141
/**
 * Get the allocation info for a node.
 * The info is allocated on the first visit of a node.
 */
Matthias Braun's avatar
Matthias Braun committed
142
143
static allocation_info_t *get_allocation_info(ir_node *node)
{
144
	allocation_info_t *info = (allocation_info_t*)get_irn_link(node);
145
	if (info == NULL) {
146
		info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs);
147
148
		info->current_value  = node;
		info->original_value = node;
Matthias Braun's avatar
Matthias Braun committed
149
150
151
152
153
154
		set_irn_link(node, info);
	}

	return info;
}

155
156
157
158
159
static allocation_info_t *try_get_allocation_info(const ir_node *node)
{
	return (allocation_info_t*) get_irn_link(node);
}

160
161
162
163
164
/**
 * Get allocation information for a basic block
 */
static block_info_t *get_block_info(ir_node *block)
{
165
	block_info_t *info = (block_info_t*)get_irn_link(block);
166
167

	assert(is_Block(block));
168
	if (info == NULL) {
169
		info = OALLOCFZ(&obst, block_info_t, assignments, n_regs);
170
171
172
173
174
175
		set_irn_link(block, info);
	}

	return info;
}

176
177
178
179
180
/**
 * Link the allocation info of a node to a copy.
 * Afterwards, both nodes uses the same allocation info.
 * Copy must not have an allocation info assigned yet.
 *
181
 * @param copy   the node that gets the allocation info assigned
182
183
 * @param value  the original node
 */
184
static void mark_as_copy_of(ir_node *copy, ir_node *value)
Matthias Braun's avatar
Matthias Braun committed
185
{
186
	ir_node           *original;
187
188
189
	allocation_info_t *info      = get_allocation_info(value);
	allocation_info_t *copy_info = get_allocation_info(copy);

190
191
192
193
194
195
196
	/* find original value */
	original = info->original_value;
	if (original != value) {
		info = get_allocation_info(original);
	}

	assert(info->original_value == original);
197
198
199
200
	info->current_value = copy;

	/* the copy should not be linked to something else yet */
	assert(copy_info->original_value == copy);
201
202
	copy_info->original_value = original;

203
204
	/* copy over allocation preferences */
	memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
Matthias Braun's avatar
Matthias Braun committed
205
206
}

207
208
209
/**
 * Calculate the penalties for every register on a node and its live neighbors.
 *
210
211
212
213
 * @param live_nodes  the set of live nodes at the current position, may be NULL
 * @param penalty     the penalty to subtract from
 * @param limited     a raw bitset containing the limited set for the node
 * @param node        the node
214
 */
Matthias Braun's avatar
Matthias Braun committed
215
216
static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
                                      float penalty, const unsigned* limited,
217
                                      ir_node *node)
Matthias Braun's avatar
Matthias Braun committed
218
{
Michael Beck's avatar
Michael Beck committed
219
220
	ir_nodeset_iterator_t iter;
	unsigned              r;
221
	size_t                n_allowed;
Matthias Braun's avatar
Matthias Braun committed
222
	allocation_info_t     *info = get_allocation_info(node);
Michael Beck's avatar
Michael Beck committed
223
	ir_node               *neighbor;
Matthias Braun's avatar
Matthias Braun committed
224
225
226
227
228
229
230
231
232
233
234
235
236

	/* give penalty for all forbidden regs */
	for (r = 0; r < n_regs; ++r) {
		if (rbitset_is_set(limited, r))
			continue;

		info->prefs[r] -= penalty;
	}

	/* all other live values should get a penalty for allowed regs */
	if (live_nodes == NULL)
		return;

237
	penalty   *= NEIGHBOR_FACTOR;
238
	n_allowed  = rbitset_popcount(limited, n_regs);
239
240
	if (n_allowed > 1) {
		/* only create a very weak penalty if multiple regs are allowed */
Michael Beck's avatar
Michael Beck committed
241
		penalty = (penalty * 0.8f) / n_allowed;
242
	}
Matthias Braun's avatar
Matthias Braun committed
243
244
245
246
247
248
249
250
	foreach_ir_nodeset(live_nodes, neighbor, iter) {
		allocation_info_t *neighbor_info;

		/* TODO: if op is used on multiple inputs we might not do a
		 * continue here */
		if (neighbor == node)
			continue;

Christoph Mallon's avatar
Christoph Mallon committed
251
		neighbor_info = get_allocation_info(neighbor);
Matthias Braun's avatar
Matthias Braun committed
252
253
254
255
256
257
258
259
260
		for (r = 0; r < n_regs; ++r) {
			if (!rbitset_is_set(limited, r))
				continue;

			neighbor_info->prefs[r] -= penalty;
		}
	}
}

261
262
263
264
265
266
267
268
269
/**
 * Calculate the preferences of a definition for the current register class.
 * If the definition uses a limited set of registers, reduce the preferences
 * for the limited register on the node and its neighbors.
 *
 * @param live_nodes  the set of live nodes at the current node
 * @param weight      the weight
 * @param node        the current node
 */
Matthias Braun's avatar
Matthias Braun committed
270
271
272
static void check_defs(const ir_nodeset_t *live_nodes, float weight,
                       ir_node *node)
{
273
	const arch_register_req_t *req = arch_get_register_req_out(node);
Matthias Braun's avatar
Matthias Braun committed
274
275
276
277
278
279
280
281
282
283
284
285
	if (req->type & arch_register_req_type_limited) {
		const unsigned *limited = req->limited;
		float           penalty = weight * DEF_FACTOR;
		give_penalties_for_limits(live_nodes, penalty, limited, node);
	}

	if (req->type & arch_register_req_type_should_be_same) {
		ir_node           *insn  = skip_Proj(node);
		allocation_info_t *info  = get_allocation_info(node);
		int                arity = get_irn_arity(insn);
		int                i;

286
		float factor = 1.0f / rbitset_popcount(&req->other_same, arity);
Matthias Braun's avatar
Matthias Braun committed
287
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
288
			ir_node           *op;
Christoph Mallon's avatar
Christoph Mallon committed
289
			unsigned           r;
Michael Beck's avatar
Michael Beck committed
290
291
			allocation_info_t *op_info;

Matthias Braun's avatar
Matthias Braun committed
292
293
294
			if (!rbitset_is_set(&req->other_same, i))
				continue;

295
296
297
298
299
			op = get_irn_n(insn, i);

			/* if we the value at the should_be_same input doesn't die at the
			 * node, then it is no use to propagate the constraints (since a
			 * copy will emerge anyway) */
300
			if (ir_nodeset_contains(live_nodes, op))
301
302
				continue;

Michael Beck's avatar
Michael Beck committed
303
			op_info = get_allocation_info(op);
Matthias Braun's avatar
Matthias Braun committed
304
305
306
307
308
309
310
			for (r = 0; r < n_regs; ++r) {
				op_info->prefs[r] += info->prefs[r] * factor;
			}
		}
	}
}

311
312
313
314
/**
 * Walker: Runs an a block calculates the preferences for any
 * node and every register from the considered register class.
 */
Matthias Braun's avatar
Matthias Braun committed
315
316
static void analyze_block(ir_node *block, void *data)
{
317
	float         weight = (float)get_block_execfreq(execfreqs, block);
Matthias Braun's avatar
Matthias Braun committed
318
319
320
321
322
323
324
325
	ir_nodeset_t  live_nodes;
	ir_node      *node;
	(void) data;

	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	sched_foreach_reverse(block, node) {
Michael Beck's avatar
Michael Beck committed
326
		allocation_info_t *info;
327
328
		int                i;
		int                arity;
Matthias Braun's avatar
Matthias Braun committed
329

330
		if (is_Phi(node))
Matthias Braun's avatar
Matthias Braun committed
331
332
			break;

333
334
335
336
337
338
		if (create_preferences) {
			ir_node *value;
			be_foreach_definition(node, cls, value,
				check_defs(&live_nodes, weight, value);
			);
		}
Matthias Braun's avatar
Matthias Braun committed
339
340
341

		/* mark last uses */
		arity = get_irn_arity(node);
342
343
344
345

		/* the allocation info node currently only uses 1 unsigned value
		   to mark last used inputs. So we will fail for a node with more than
		   32 inputs. */
346
		if (arity >= (int) sizeof(info->last_uses) * 8) {
347
			panic("Node with more than %d inputs not supported yet",
348
					(int) sizeof(info->last_uses) * 8);
349
		}
Matthias Braun's avatar
Matthias Braun committed
350

Michael Beck's avatar
Michael Beck committed
351
		info = get_allocation_info(node);
Matthias Braun's avatar
Matthias Braun committed
352
		for (i = 0; i < arity; ++i) {
353
354
355
			ir_node                   *op  = get_irn_n(node, i);
			const arch_register_req_t *req = arch_get_register_req_out(op);
			if (req->cls != cls)
Matthias Braun's avatar
Matthias Braun committed
356
357
358
359
				continue;

			/* last usage of a value? */
			if (!ir_nodeset_contains(&live_nodes, op)) {
360
				rbitset_set(info->last_uses, i);
Matthias Braun's avatar
Matthias Braun committed
361
362
363
364
365
			}
		}

		be_liveness_transfer(cls, node, &live_nodes);

366
367
368
369
370
371
		if (create_preferences) {
			/* update weights based on usage constraints */
			for (i = 0; i < arity; ++i) {
				const arch_register_req_t *req;
				const unsigned            *limited;
				ir_node                   *op = get_irn_n(node, i);
Michael Beck's avatar
Michael Beck committed
372

373
374
				if (!arch_irn_consider_in_reg_alloc(cls, op))
					continue;
Matthias Braun's avatar
Matthias Braun committed
375

376
377
378
				req = arch_get_register_req(node, i);
				if (!(req->type & arch_register_req_type_limited))
					continue;
Matthias Braun's avatar
Matthias Braun committed
379

380
				limited = req->limited;
381
382
				give_penalties_for_limits(&live_nodes, weight * USE_FACTOR,
				                          limited, op);
383
			}
Matthias Braun's avatar
Matthias Braun committed
384
385
386
387
388
389
		}
	}

	ir_nodeset_destroy(&live_nodes);
}

390
static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node)
391
{
392
	const arch_register_req_t *req = arch_get_register_req_out(node);
393

394
	/* should be same constraint? */
395
	if (req->type & arch_register_req_type_should_be_same) {
396
		const ir_node *insn  = skip_Proj_const(node);
397
398
399
400
		int      arity = get_irn_arity(insn);
		int      i;
		unsigned node_idx = get_irn_idx(node);
		node_idx          = uf_find(congruence_classes, node_idx);
401

402
403
404
405
406
		for (i = 0; i < arity; ++i) {
			ir_node               *live;
			ir_node               *op;
			int                    op_idx;
			ir_nodeset_iterator_t  iter;
Michael Beck's avatar
Michael Beck committed
407
			bool                   interferes = false;
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423

			if (!rbitset_is_set(&req->other_same, i))
				continue;

			op     = get_irn_n(insn, i);
			op_idx = get_irn_idx(op);
			op_idx = uf_find(congruence_classes, op_idx);

			/* do we interfere with the value */
			foreach_ir_nodeset(live_nodes, live, iter) {
				int lv_idx = get_irn_idx(live);
				lv_idx     = uf_find(congruence_classes, lv_idx);
				if (lv_idx == op_idx) {
					interferes = true;
					break;
				}
424
			}
425
426
427
428
429
430
431
432
433
434
435
436
437
			/* don't put in same affinity class if we interfere */
			if (interferes)
				continue;

			node_idx = uf_union(congruence_classes, node_idx, op_idx);
			DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
			    node, op));
			/* one should_be_same is enough... */
			break;
		}
	}
}

438
static void create_congruence_class(ir_node *block, void *data)
439
440
441
442
443
444
445
446
447
448
{
	ir_nodeset_t  live_nodes;
	ir_node      *node;

	(void) data;
	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	/* check should be same constraints */
	sched_foreach_reverse(block, node) {
449
		ir_node *value;
450
451
452
		if (is_Phi(node))
			break;

453
454
455
		be_foreach_definition(node, cls, value,
			congruence_def(&live_nodes, value);
		);
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
		be_liveness_transfer(cls, node, &live_nodes);
	}

	/* check phi congruence classes */
	sched_foreach_reverse_from(node, node) {
		int i;
		int arity;
		int node_idx;
		assert(is_Phi(node));

		if (!arch_irn_consider_in_reg_alloc(cls, node))
			continue;

		node_idx = get_irn_idx(node);
		node_idx = uf_find(congruence_classes, node_idx);

		arity = get_irn_arity(node);
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
474
475
			bool                  interferes = false;
			ir_nodeset_iterator_t iter;
476
477
478
479
			unsigned              r;
			int                   old_node_idx;
			ir_node              *live;
			ir_node              *phi;
Michael Beck's avatar
Michael Beck committed
480
481
			allocation_info_t    *head_info;
			allocation_info_t    *other_info;
482
483
			ir_node              *op     = get_Phi_pred(node, i);
			int                   op_idx = get_irn_idx(op);
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
			op_idx = uf_find(congruence_classes, op_idx);

			/* do we interfere with the value */
			foreach_ir_nodeset(&live_nodes, live, iter) {
				int lv_idx = get_irn_idx(live);
				lv_idx     = uf_find(congruence_classes, lv_idx);
				if (lv_idx == op_idx) {
					interferes = true;
					break;
				}
			}
			/* don't put in same affinity class if we interfere */
			if (interferes)
				continue;
			/* any other phi has the same input? */
			sched_foreach(block, phi) {
				ir_node *oop;
				int      oop_idx;
				if (!is_Phi(phi))
					break;
				if (!arch_irn_consider_in_reg_alloc(cls, phi))
					continue;
				oop = get_Phi_pred(phi, i);
				if (oop == op)
					continue;
				oop_idx = get_irn_idx(oop);
				oop_idx = uf_find(congruence_classes, oop_idx);
				if (oop_idx == op_idx) {
					interferes = true;
					break;
				}
			}
			if (interferes)
				continue;

519
520
			/* merge the 2 congruence classes and sum up their preferences */
			old_node_idx = node_idx;
521
522
523
			node_idx = uf_union(congruence_classes, node_idx, op_idx);
			DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
			    node, op));
524

525
526
527
528
529
530
531
			old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx;
			head_info  = get_allocation_info(get_idx_irn(irg, node_idx));
			other_info = get_allocation_info(get_idx_irn(irg, old_node_idx));
			for (r = 0; r < n_regs; ++r) {
				head_info->prefs[r] += other_info->prefs[r];
			}
		}
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
	}
}

static void set_congruence_prefs(ir_node *node, void *data)
{
	allocation_info_t *info;
	allocation_info_t *head_info;
	unsigned node_idx = get_irn_idx(node);
	unsigned node_set = uf_find(congruence_classes, node_idx);

	(void) data;

	/* head of congruence class or not in any class */
	if (node_set == node_idx)
		return;

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

	head_info = get_allocation_info(get_idx_irn(irg, node_set));
	info      = get_allocation_info(node);

	memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0]));
}

static void combine_congruence_classes(void)
{
	size_t n = get_irg_last_idx(irg);
	congruence_classes = XMALLOCN(int, n);
	uf_init(congruence_classes, n);

	/* create congruence classes */
564
	irg_block_walk_graph(irg, create_congruence_class, NULL, NULL);
565
566
	/* merge preferences */
	irg_walk_graph(irg, set_congruence_prefs, NULL, NULL);
567
	free(congruence_classes);
568
569
570
571
}



572
573
574
575
576
577
/**
 * Assign register reg to the given node.
 *
 * @param node  the node
 * @param reg   the register
 */
Matthias Braun's avatar
Matthias Braun committed
578
579
static void use_reg(ir_node *node, const arch_register_t *reg)
{
580
	unsigned r = arch_register_get_index(reg);
581
	assignments[r] = node;
Matthias Braun's avatar
Matthias Braun committed
582
583
584
	arch_set_irn_register(node, reg);
}

585
586
587
588
589
590
591
592
593
594
595
596
static void free_reg_of_value(ir_node *node)
{
	const arch_register_t *reg;
	unsigned               r;

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

	reg        = arch_get_irn_register(node);
	r          = arch_register_get_index(reg);
	/* assignment->value may be NULL if a value is used at 2 inputs
	   so it gets freed twice. */
597
598
	assert(assignments[r] == node || assignments[r] == NULL);
	assignments[r] = NULL;
599
600
}

601
602
603
/**
 * Compare two register preferences in decreasing order.
 */
Matthias Braun's avatar
Matthias Braun committed
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
static int compare_reg_pref(const void *e1, const void *e2)
{
	const reg_pref_t *rp1 = (const reg_pref_t*) e1;
	const reg_pref_t *rp2 = (const reg_pref_t*) e2;
	if (rp1->pref < rp2->pref)
		return 1;
	if (rp1->pref > rp2->pref)
		return -1;
	return 0;
}

static void fill_sort_candidates(reg_pref_t *regprefs,
                                 const allocation_info_t *info)
{
	unsigned r;

	for (r = 0; r < n_regs; ++r) {
		float pref = info->prefs[r];
		regprefs[r].num  = r;
		regprefs[r].pref = pref;
	}
	/* TODO: use a stable sort here to avoid unnecessary register jumping */
	qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref);
}

629
static bool try_optimistic_split(ir_node *to_split, ir_node *before,
Matthias Braun's avatar
Matthias Braun committed
630
                                 float pref, float pref_delta,
Matthias Braun's avatar
Matthias Braun committed
631
                                 unsigned *forbidden_regs, int recursion)
632
{
Matthias Braun's avatar
Matthias Braun committed
633
	const arch_register_t *from_reg;
634
	const arch_register_t *reg;
635
	ir_node               *original_insn;
636
637
638
	ir_node               *block;
	ir_node               *copy;
	unsigned               r;
Matthias Braun's avatar
Matthias Braun committed
639
	unsigned               from_r;
640
641
	unsigned               i;
	allocation_info_t     *info = get_allocation_info(to_split);
Michael Beck's avatar
Michael Beck committed
642
643
	reg_pref_t            *prefs;
	float                  delta;
644
	float                  split_threshold;
645
646
647

	(void) pref;

648
649
650
	/* stupid hack: don't optimisticallt split don't spill nodes...
	 * (so we don't split away the values produced because of
	 *  must_be_different constraints) */
651
652
	original_insn = skip_Proj(info->original_value);
	if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill)
653
654
		return false;

Matthias Braun's avatar
Matthias Braun committed
655
656
657
	from_reg        = arch_get_irn_register(to_split);
	from_r          = arch_register_get_index(from_reg);
	block           = get_nodes_block(before);
658
	split_threshold = (float)get_block_execfreq(execfreqs, block) * SPLIT_DELTA;
Matthias Braun's avatar
Matthias Braun committed
659
660
661
662

	if (pref_delta < split_threshold*0.5)
		return false;

663
	/* find the best free position where we could move to */
Michael Beck's avatar
Michael Beck committed
664
	prefs = ALLOCAN(reg_pref_t, n_regs);
665
666
	fill_sort_candidates(prefs, info);
	for (i = 0; i < n_regs; ++i) {
Matthias Braun's avatar
Matthias Braun committed
667
668
669
670
671
672
673
		float apref;
		float apref_delta;
		bool  res;
		bool  old_source_state;

		/* we need a normal register which is not an output register
		   an different from the current register of to_split */
674
675
676
		r = prefs[i].num;
		if (!rbitset_is_set(normal_regs, r))
			continue;
Matthias Braun's avatar
Matthias Braun committed
677
678
679
		if (rbitset_is_set(forbidden_regs, r))
			continue;
		if (r == from_r)
Matthias Braun's avatar
Matthias Braun committed
680
			continue;
Matthias Braun's avatar
Matthias Braun committed
681
682
683
684
685
686
687
688
689
690

		/* is the split worth it? */
		delta = pref_delta + prefs[i].pref;
		if (delta < split_threshold) {
			DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n",
				to_split, recursion, delta));
			return false;
		}

		/* if the register is free then we can do the split */
691
		if (assignments[r] == NULL)
692
			break;
Matthias Braun's avatar
Matthias Braun committed
693
694
695
696
697
698
699
700
701

		/* otherwise we might try recursively calling optimistic_split */
		if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION)
			continue;

		apref        = prefs[i].pref;
		apref_delta  = i+1 < n_regs ? apref - prefs[i+1].pref : 0;
		apref_delta += pref_delta - split_threshold;

Christoph Mallon's avatar
Christoph Mallon committed
702
		/* our source register isn't a useful destination for recursive
Matthias Braun's avatar
Matthias Braun committed
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
		   splits */
		old_source_state = rbitset_is_set(forbidden_regs, from_r);
		rbitset_set(forbidden_regs, from_r);
		/* try recursive split */
		res = try_optimistic_split(assignments[r], before, apref,
		                           apref_delta, forbidden_regs, recursion+1);
		/* restore our destination */
		if (old_source_state) {
			rbitset_set(forbidden_regs, from_r);
		} else {
			rbitset_clear(forbidden_regs, from_r);
		}

		if (res)
			break;
718
	}
Matthias Braun's avatar
Matthias Braun committed
719
	if (i >= n_regs)
720
721
		return false;

Matthias Braun's avatar
Matthias Braun committed
722
	reg  = arch_register_for_index(cls, r);
723
724
	copy = be_new_Copy(cls, block, to_split);
	mark_as_copy_of(copy, to_split);
Matthias Braun's avatar
Matthias Braun committed
725
726
727
	/* hacky, but correct here */
	if (assignments[arch_register_get_index(from_reg)] == to_split)
		free_reg_of_value(to_split);
728
729
730
731
	use_reg(copy, reg);
	sched_add_before(before, copy);

	DB((dbg, LEVEL_3,
Matthias Braun's avatar
Matthias Braun committed
732
733
	    "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n",
	    copy, to_split, from_reg->name, reg->name, before, delta, recursion));
734
735
736
	return true;
}

737
738
739
/**
 * Determine and assign a register for node @p node
 */
Matthias Braun's avatar
Matthias Braun committed
740
static void assign_reg(const ir_node *block, ir_node *node,
Matthias Braun's avatar
Matthias Braun committed
741
                       unsigned *forbidden_regs)
Matthias Braun's avatar
Matthias Braun committed
742
{
Michael Beck's avatar
Michael Beck committed
743
744
745
746
	const arch_register_t     *reg;
	allocation_info_t         *info;
	const arch_register_req_t *req;
	reg_pref_t                *reg_prefs;
Michael Beck's avatar
Michael Beck committed
747
	ir_node                   *in_node;
748
749
	unsigned                   i;
	const unsigned            *allowed_regs;
Michael Beck's avatar
Michael Beck committed
750
	unsigned                   r;
Michael Beck's avatar
Michael Beck committed
751

Matthias Braun's avatar
Matthias Braun committed
752
	assert(!is_Phi(node));
Matthias Braun's avatar
Matthias Braun committed
753
	/* preassigned register? */
Michael Beck's avatar
Michael Beck committed
754
	reg = arch_get_irn_register(node);
Matthias Braun's avatar
Matthias Braun committed
755
756
757
758
759
760
	if (reg != NULL) {
		DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
		return;
	}

761
762
763
	req = arch_get_register_req_out(node);
	/* ignore reqs must be preassigned */
	assert (! (req->type & arch_register_req_type_ignore));
764

765
766
	/* give should_be_same boni */
	info    = get_allocation_info(node);
Michael Beck's avatar
Michael Beck committed
767
	in_node = skip_Proj(node);
Matthias Braun's avatar
Matthias Braun committed
768
	if (req->type & arch_register_req_type_should_be_same) {
769
		float weight = (float)get_block_execfreq(execfreqs, block);
770
		int   arity  = get_irn_arity(in_node);
Matthias Braun's avatar
Matthias Braun committed
771
		int   i;
Michael Beck's avatar
Michael Beck committed
772

Matthias Braun's avatar
Matthias Braun committed
773
774
775
776
777
778
779
780
		assert(arity <= (int) sizeof(req->other_same) * 8);
		for (i = 0; i < arity; ++i) {
			ir_node               *in;
			const arch_register_t *reg;
			unsigned               r;
			if (!rbitset_is_set(&req->other_same, i))
				continue;

781
			in  = get_irn_n(in_node, i);
Matthias Braun's avatar
Matthias Braun committed
782
783
784
			reg = arch_get_irn_register(in);
			assert(reg != NULL);
			r = arch_register_get_index(reg);
785
786
787

			/* if the value didn't die here then we should not propagate the
			 * should_be_same info */
788
			if (assignments[r] == in)
789
790
				continue;

791
			info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
Matthias Braun's avatar
Matthias Braun committed
792
793
794
		}
	}

Matthias Braun's avatar
Matthias Braun committed
795
	/* create list of register candidates and sort by their preference */
Matthias Braun's avatar
Matthias Braun committed
796
	DB((dbg, LEVEL_2, "Candidates for %+F:", node));
797
	reg_prefs = ALLOCAN(reg_pref_t, n_regs);
Matthias Braun's avatar
Matthias Braun committed
798
799
	fill_sort_candidates(reg_prefs, info);
	for (i = 0; i < n_regs; ++i) {
Matthias Braun's avatar
Matthias Braun committed
800
		unsigned num = reg_prefs[i].num;
Michael Beck's avatar
Michael Beck committed
801
802
		const arch_register_t *reg;

803
804
805
		if (!rbitset_is_set(normal_regs, num))
			continue;

Michael Beck's avatar
Michael Beck committed
806
		reg = arch_register_for_index(cls, num);
Matthias Braun's avatar
Matthias Braun committed
807
808
809
810
		DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
	}
	DB((dbg, LEVEL_2, "\n"));

811
812
813
814
815
	allowed_regs = normal_regs;
	if (req->type & arch_register_req_type_limited) {
		allowed_regs = req->limited;
	}

Matthias Braun's avatar
Matthias Braun committed
816
	for (i = 0; i < n_regs; ++i) {
Michael Beck's avatar
Michael Beck committed
817
818
819
820
		float   pref, delta;
		ir_node *before;
		bool    res;

821
822
823
		r = reg_prefs[i].num;
		if (!rbitset_is_set(allowed_regs, r))
			continue;
824
		if (assignments[r] == NULL)
825
			break;
Michael Beck's avatar
Michael Beck committed
826
827
828
829
830
		pref   = reg_prefs[i].pref;
		delta  = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
		before = skip_Proj(node);
		res    = try_optimistic_split(assignments[r], before,
		                              pref, delta, forbidden_regs, 0);
Matthias Braun's avatar
Matthias Braun committed
831
832
		if (res)
			break;
833
834
	}
	if (i >= n_regs) {
835
836
		/* the common reason to hit this panic is when 1 of your nodes is not
		 * register pressure faithful */
Matthias Braun's avatar
Matthias Braun committed
837
		panic("No register left for %+F\n", node);
Matthias Braun's avatar
Matthias Braun committed
838
839
	}

840
841
842
	reg = arch_register_for_index(cls, r);
	DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
	use_reg(node, reg);
Matthias Braun's avatar
Matthias Braun committed
843
844
}

845
846
847
848
/**
 * Add an permutation in front of a node and change the assignments
 * due to this permutation.
 *
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
 * To understand this imagine a permutation like this:
 *
 * 1 -> 2
 * 2 -> 3
 * 3 -> 1, 5
 * 4 -> 6
 * 5
 * 6
 * 7 -> 7
 *
 * First we count how many destinations a single value has. At the same time
 * we can be sure that each destination register has at most 1 source register
 * (it can have 0 which means we don't care what value is in it).
 * We ignore all fullfilled permuations (like 7->7)
 * In a first pass we create as much copy instructions as possible as they
 * are generally cheaper than exchanges. We do this by counting into how many
 * destinations a register has to be copied (in the example it's 2 for register
 * 3, or 1 for the registers 1,2,4 and 7).
 * We can then create a copy into every destination register when the usecount
 * of that register is 0 (= noone else needs the value in the register).
 *
 * After this step we should have cycles left. We implement a cyclic permutation
 * of n registers with n-1 transpositions.
 *
873
874
 * @param live_nodes   the set of live nodes, updated due to live range split
 * @param before       the node before we add the permutation
875
876
877
 * @param permutation  the permutation array indices are the destination
 *                     registers, the values in the array are the source
 *                     registers.
878
 */
yb9976's avatar
typos    
yb9976 committed
879
static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
Matthias Braun's avatar
Matthias Braun committed
880
881
                             unsigned *permutation)
{
882
	unsigned  *n_used = ALLOCANZ(unsigned, n_regs);
883
	ir_node   *block;
884
	unsigned   r;
Matthias Braun's avatar
Matthias Braun committed
885

886
	/* determine how often each source register needs to be read */
Matthias Braun's avatar
Matthias Braun committed
887
	for (r = 0; r < n_regs; ++r) {
888
889
		unsigned  old_reg = permutation[r];
		ir_node  *value;
Matthias Braun's avatar
Matthias Braun committed
890

891
		value = assignments[old_reg];
Matthias Braun's avatar
Matthias Braun committed
892
		if (value == NULL) {
893
894
			/* nothing to do here, reg is not live. Mark it as fixpoint
			 * so we ignore it in the next steps */
Matthias Braun's avatar
Matthias Braun committed
895
896
897
898
			permutation[r] = r;
			continue;
		}

899
		++n_used[old_reg];
Matthias Braun's avatar
Matthias Braun committed
900
901
	}

Michael Beck's avatar
Michael Beck committed
902
	block = get_nodes_block(before);
Matthias Braun's avatar
Matthias Braun committed
903

904
905
906
	/* step1: create copies where immediately possible */
	for (r = 0; r < n_regs; /* empty */) {
		ir_node *copy;
907
		ir_node *src;
Michael Beck's avatar
Michael Beck committed
908
		const arch_register_t *reg;
909
		unsigned               old_r = permutation[r];
Michael Beck's avatar
Michael Beck committed
910

911
912
913
		/* - no need to do anything for fixed points.
		   - we can't copy if the value in the dest reg is still needed */
		if (old_r == r || n_used[r] > 0) {
914
			++r;
Matthias Braun's avatar
Matthias Braun committed
915
			continue;
916
		}
Matthias Braun's avatar
Matthias Braun committed
917

918
		/* create a copy */
919
		src  = assignments[old_r];
920
		copy = be_new_Copy(cls, block, src);
921
		sched_add_before(before, copy);
922
		reg = arch_register_for_index(cls, r);
923
924
925
		DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
		    copy, src, before, reg->name));
		mark_as_copy_of(copy, src);
926
		use_reg(copy, reg);
927
928
929
930

		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, copy);
		}
931

932
933
		/* old register has 1 user less, permutation is resolved */
		assert(arch_register_get_index(arch_get_irn_register(src)) == old_r);
934
935
		permutation[r] = r;

936
937
		assert(n_used[old_r] > 0);
		--n_used[old_r];
938
		if (n_used[old_r] == 0) {
939
940
941
			if (live_nodes != NULL) {
				ir_nodeset_remove(live_nodes, src);
			}
942
943
			free_reg_of_value(src);
		}
944

945
		/* advance or jump back (if this copy enabled another copy) */
946
		if (old_r < r && n_used[old_r] == 0) {
947
			r = old_r;
948
		} else {
949
			++r;
950
		}
951
	}
Matthias Braun's avatar
Matthias Braun committed
952

953
954
955
956
957
	/* at this point we only have "cycles" left which we have to resolve with
	 * perm instructions
	 * TODO: if we have free registers left, then we should really use copy
	 * instructions for any cycle longer than 2 registers...
	 * (this is probably architecture dependent, there might be archs where
958
	 *  copies are preferable even for 2-cycles) */
959

960
	/* create perms with the rest */
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
	for (r = 0; r < n_regs; /* empty */) {
		const arch_register_t *reg;
		unsigned  old_r = permutation[r];
		unsigned  r2;
		ir_node  *in[2];
		ir_node  *perm;
		ir_node  *proj0;
		ir_node  *proj1;

		if (old_r == r) {
			++r;
			continue;
		}

		/* we shouldn't have copies from 1 value to multiple destinations left*/
		assert(n_used[old_r] == 1);

		/* exchange old_r and r2; after that old_r is a fixed point */
		r2 = permutation[old_r];

981
982
		in[0] = assignments[r2];
		in[1] = assignments[old_r];
983
		perm = be_new_Perm(cls, block, 2, in);
984
985
986
		sched_add_before(before, perm);
		DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n",
		    perm, in[0], in[1], before));
987

988
		proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0);
989
		mark_as_copy_of(proj0, in[0]);
990
991
992
		reg = arch_register_for_index(cls, old_r);
		use_reg(proj0, reg);

993
		proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1);
994
995
996
		mark_as_copy_of(proj1, in[1]);
		reg = arch_register_for_index(cls, r2);
		use_reg(proj1, reg);
997
998
999
1000
1001

		/* 1 value is now in the correct register */
		permutation[old_r] = old_r;
		/* the source of r changed to r2 */
		permutation[r] = r2;
1002
1003

		/* if we have reached a fixpoint update data structures */
1004
1005
1006
1007
		if (live_nodes != NULL) {
			ir_nodeset_remove(live_nodes, in[0]);
			ir_nodeset_remove(live_nodes, in[1]);
			ir_nodeset_remove(live_nodes, proj0);
1008
			ir_nodeset_insert(live_nodes, proj1);
1009
		}
Matthias Braun's avatar
Matthias Braun committed
1010
	}
1011
1012
1013
1014
1015
1016
1017

#ifdef DEBUG_libfirm
	/* now we should only have fixpoints left */
	for (r = 0; r < n_regs; ++r) {
		assert(permutation[r] == r);
	}
#endif
Matthias Braun's avatar
Matthias Braun committed
1018
1019
}

1020
1021
1022
1023
1024
1025
/**
 * Free regs for values last used.
 *
 * @param live_nodes   set of live nodes, will be updated
 * @param node         the node to consider
 */
Matthias Braun's avatar
Matthias Braun committed
1026
1027
static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
{
1028
1029
1030
1031
	allocation_info_t *info      = get_allocation_info(node);
	const unsigned    *last_uses = info->last_uses;
	int                arity     = get_irn_arity(node);
	int                i;
Matthias Braun's avatar
Matthias Braun committed
1032

Matthias Braun's avatar
Matthias Braun committed
1033
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
1034
1035
		ir_node *op;

1036
		/* check if one operand is the last use */
1037
		if (!rbitset_is_set(last_uses, i))
Matthias Braun's avatar
Matthias Braun committed
1038
1039
			continue;

Michael Beck's avatar
Michael Beck committed
1040
		op = get_irn_n(node, i);
Matthias Braun's avatar
Matthias Braun committed
1041
1042
1043
1044
1045
		free_reg_of_value(op);
		ir_nodeset_remove(live_nodes, op);
	}
}

1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
/**
 * change inputs of a node to the current value (copies/perms)
 */
static void rewire_inputs(ir_node *node)
{
	int i;
	int arity = get_irn_arity(node);

	for (i = 0; i < arity; ++i) {
		ir_node           *op = get_irn_n(node, i);
1056
		allocation_info_t *info = try_get_allocation_info(op);
1057

1058
		if (info == NULL)
1059
1060
			continue;

1061
		info = get_allocation_info(info->original_value);
1062
1063
1064
1065
1066
1067
		if (info->current_value != op) {
			set_irn_n(node, i, info->current_value);
		}
	}
}

1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
/**
 * Create a bitset of registers occupied with value living through an
 * instruction
 */
static void determine_live_through_regs(unsigned *bitset, ir_node *node)
{
	const allocation_info_t *info = get_allocation_info(node);
	unsigned r;
	int i;
	int arity;

	/* mark all used registers as potentially live-through */
	for (r = 0; r < n_regs; ++r) {
1081
		if (assignments[r] == NULL)
1082
			continue;
1083
1084
		if (!rbitset_is_set(normal_regs, r))
			continue;
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094

		rbitset_set(bitset, r);
	}

	/* remove registers of value dying at the instruction */
	arity = get_irn_arity(node);
	for (i = 0; i < arity; ++i) {
		ir_node               *op;
		const arch_register_t *reg;

1095
		if (!rbitset_is_set(info->last_uses, i))
1096
1097
1098
1099
1100
1101
1102
1103
			continue;

		op  = get_irn_n(node, i);
		reg = arch_get_irn_register(op);
		rbitset_clear(bitset, arch_register_get_index(reg));
	}
}

1104
1105
1106
/**
 * Enforce constraints at a node by live range splits.
 *
1107
1108
 * @param  live_nodes  the set of live nodes, might be changed
 * @param  node        the current node
1109
 */
Matthias Braun's avatar
Matthias Braun committed
1110
static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node,
Matthias Braun's avatar
Matthias Braun committed
1111
                                unsigned *forbidden_regs)
Matthias Braun's avatar
Matthias Braun committed
1112
1113
{
	int arity = get_irn_arity(node);
1114
	int i, res;
Michael Beck's avatar
Michael Beck committed
1115
	hungarian_problem_t *bp;
1116
	unsigned l, r;
Michael Beck's avatar
Michael Beck committed
1117
	unsigned *assignment;
1118
	ir_node  *value;
Matthias Braun's avatar
Matthias Braun committed
1119

Michael Beck's avatar
Michael Beck committed
1120
1121
1122
	/* construct a list of register occupied by live-through values */
	unsigned *live_through_regs = NULL;

Matthias Braun's avatar
Matthias Braun committed
1123
1124
1125
	/* see if any use constraints are not met */
	bool good = true;
	for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
1126
		ir_node                   *op = get_irn_n(node, i);
1127
		const arch_register_t     *reg;
Michael Beck's avatar
Michael Beck committed
1128
1129
1130
1131
		const arch_register_req_t *req;
		const unsigned            *limited;
		unsigned                  r;

Matthias Braun's avatar
Matthias Braun committed
1132
1133
1134
		if (!arch_irn_consider_in_reg_alloc(cls, op))
			continue;

1135
		/* are there any limitations for the i'th operand? */
Michael Beck's avatar
Michael Beck committed
1136
		req = arch_get_register_req(node, i);
Christoph Mallon's avatar
Christoph Mallon committed
1137
		if (!(req->type & arch_register_req_type_limited))
Matthias Braun's avatar
Matthias Braun committed
1138
1139
			continue;

Michael Beck's avatar
Michael Beck committed
1140
		limited = req->limited;