beprefalloc.c 52.8 KB
Newer Older
Matthias Braun's avatar
Matthias Braun committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Matthias Braun's avatar
Matthias Braun committed
4
5
6
7
 */

/**
 * @file
8
 * @brief       Preference Guided Register Assignment
Matthias Braun's avatar
Matthias Braun committed
9
10
11
12
 * @author      Matthias Braun
 * @date        14.2.2009
 *
 * The idea is to allocate registers in 2 passes:
Michael Beck's avatar
Michael Beck committed
13
 * 1. A first pass to determine "preferred" registers for live-ranges. This
Matthias Braun's avatar
Matthias Braun committed
14
 *    calculates for each register and each live-range a value indicating
Michael Beck's avatar
Michael Beck committed
15
 *    the usefulness. (You can roughly think of the value as the negative
Matthias Braun's avatar
Matthias Braun committed
16
17
 *    costs needed for copies when the value is in the specific registers...)
 *
18
19
20
 * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
 *    registers with high preferences. When register constraints are not met,
 *    add copies and split live-ranges.
Matthias Braun's avatar
Matthias Braun committed
21
22
 *
 * TODO:
yb9976's avatar
typos    
yb9976 committed
23
 *  - make use of free registers in the permute_values code
Matthias Braun's avatar
Matthias Braun committed
24
25
 */
#include <float.h>
26
#include <stdbool.h>
27
#include <math.h>
28
#include "lpp.h"
Matthias Braun's avatar
Matthias Braun committed
29

Matthias Braun's avatar
Matthias Braun committed
30
#include "panic.h"
31
#include "execfreq.h"
Matthias Braun's avatar
Matthias Braun committed
32
#include "ircons.h"
33
#include "irdom.h"
34
35
36
37
#include "iredges_t.h"
#include "irgraph_t.h"
#include "irgwalk.h"
#include "irnode_t.h"
38
#include "irdump.h"
39
#include "util.h"
40
#include "obst.h"
41
#include "raw_bitset.h"
42
#include "unionfind.h"
43
44
#include "pdeq.h"
#include "hungarian.h"
45
#include "statev.h"
46
47
#include "beabi.h"
#include "bechordal_t.h"
Matthias Braun's avatar
Matthias Braun committed
48
#include "be.h"
49
#include "beirg.h"
Matthias Braun's avatar
Matthias Braun committed
50
51
#include "belive_t.h"
#include "bemodule.h"
52
#include "benode.h"
53
54
#include "bera.h"
#include "besched.h"
55
#include "bespill.h"
56
#include "bespillutil.h"
Matthias Braun's avatar
Matthias Braun committed
57
#include "beverify.h"
58
#include "beutil.h"
59
#include "bestack.h"
Matthias Braun's avatar
Matthias Braun committed
60

Matthias Braun's avatar
Matthias Braun committed
61
62
63
64
65
66
#define USE_FACTOR                     1.0f
#define DEF_FACTOR                     1.0f
#define NEIGHBOR_FACTOR                0.2f
#define AFF_SHOULD_BE_SAME             0.5f
#define AFF_PHI                        1.0f
#define SPLIT_DELTA                    1.0f
67
#define MAX_OPTIMISTIC_SPLIT_RECURSION 0
Matthias Braun's avatar
Matthias Braun committed
68
69
70
71
72
73
74
75

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

static struct obstack               obst;
static ir_graph                    *irg;
static const arch_register_class_t *cls;
static be_lv_t                     *lv;
static unsigned                     n_regs;
76
static unsigned                    *normal_regs;
77
static int                         *congruence_classes;
78
static ir_node                    **block_order;
Michael Beck's avatar
Michael Beck committed
79
static size_t                       n_block_order;
Matthias Braun's avatar
Matthias Braun committed
80

81
82
83
/** currently active assignments (while processing a basic block)
 * maps registers to values(their current copies) */
static ir_node **assignments;
Matthias Braun's avatar
Matthias Braun committed
84

85
86
87
88
/**
 * allocation information: last_uses, register preferences
 * the information is per firm-node.
 */
Matthias Braun's avatar
Matthias Braun committed
89
struct allocation_info_t {
90
	unsigned  last_uses[2];   /**< bitset indicating last uses (input pos) */
91
92
	ir_node  *current_value;  /**< copy of the value that should be used */
	ir_node  *original_value; /**< for copies point to original value */
Matthias Braun's avatar
Matthias Braun committed
93
	float     prefs[];        /**< register preferences */
Matthias Braun's avatar
Matthias Braun committed
94
};
95
typedef struct allocation_info_t allocation_info_t;
Matthias Braun's avatar
Matthias Braun committed
96

97
/** helper datastructure used when sorting register preferences */
Matthias Braun's avatar
Matthias Braun committed
98
99
100
101
struct reg_pref_t {
	unsigned num;
	float    pref;
};
102
103
104
105
typedef struct reg_pref_t reg_pref_t;

/** per basic-block information */
struct block_info_t {
Michael Beck's avatar
Michael Beck committed
106
	bool     processed;       /**< indicate whether block is processed */
Matthias Braun's avatar
Matthias Braun committed
107
	ir_node *assignments[];   /**< register assignments at end of block */
108
109
};
typedef struct block_info_t block_info_t;
Matthias Braun's avatar
Matthias Braun committed
110

111
112
113
114
/**
 * Get the allocation info for a node.
 * The info is allocated on the first visit of a node.
 */
Matthias Braun's avatar
Matthias Braun committed
115
116
static allocation_info_t *get_allocation_info(ir_node *node)
{
117
	allocation_info_t *info = (allocation_info_t*)get_irn_link(node);
118
	if (info == NULL) {
119
		info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs);
120
121
		info->current_value  = node;
		info->original_value = node;
Matthias Braun's avatar
Matthias Braun committed
122
123
124
125
126
127
		set_irn_link(node, info);
	}

	return info;
}

128
129
130
131
132
static allocation_info_t *try_get_allocation_info(const ir_node *node)
{
	return (allocation_info_t*) get_irn_link(node);
}

133
134
135
136
137
/**
 * Get allocation information for a basic block
 */
static block_info_t *get_block_info(ir_node *block)
{
138
	block_info_t *info = (block_info_t*)get_irn_link(block);
139
140

	assert(is_Block(block));
141
	if (info == NULL) {
142
		info = OALLOCFZ(&obst, block_info_t, assignments, n_regs);
143
144
145
146
147
148
		set_irn_link(block, info);
	}

	return info;
}

149
150
151
152
153
/**
 * Link the allocation info of a node to a copy.
 * Afterwards, both nodes uses the same allocation info.
 * Copy must not have an allocation info assigned yet.
 *
154
 * @param copy   the node that gets the allocation info assigned
155
156
 * @param value  the original node
 */
157
static void mark_as_copy_of(ir_node *copy, ir_node *value)
Matthias Braun's avatar
Matthias Braun committed
158
{
159
160
161
	allocation_info_t *info      = get_allocation_info(value);
	allocation_info_t *copy_info = get_allocation_info(copy);

162
	/* find original value */
163
	ir_node *original = info->original_value;
164
165
166
167
168
	if (original != value) {
		info = get_allocation_info(original);
	}

	assert(info->original_value == original);
169
170
171
172
	info->current_value = copy;

	/* the copy should not be linked to something else yet */
	assert(copy_info->original_value == copy);
173
174
	copy_info->original_value = original;

175
176
	/* copy over allocation preferences */
	memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
Matthias Braun's avatar
Matthias Braun committed
177
178
}

179
180
181
/**
 * Calculate the penalties for every register on a node and its live neighbors.
 *
182
183
184
185
 * @param live_nodes  the set of live nodes at the current position, may be NULL
 * @param penalty     the penalty to subtract from
 * @param limited     a raw bitset containing the limited set for the node
 * @param node        the node
186
 */
Matthias Braun's avatar
Matthias Braun committed
187
188
static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
                                      float penalty, const unsigned* limited,
189
                                      ir_node *node)
Matthias Braun's avatar
Matthias Braun committed
190
{
191
	allocation_info_t *info = get_allocation_info(node);
Matthias Braun's avatar
Matthias Braun committed
192
193

	/* give penalty for all forbidden regs */
194
	for (unsigned r = 0; r < n_regs; ++r) {
Matthias Braun's avatar
Matthias Braun committed
195
196
197
198
199
200
201
202
203
204
		if (rbitset_is_set(limited, r))
			continue;

		info->prefs[r] -= penalty;
	}

	/* all other live values should get a penalty for allowed regs */
	if (live_nodes == NULL)
		return;

205
	penalty   *= NEIGHBOR_FACTOR;
206
	size_t n_allowed = rbitset_popcount(limited, n_regs);
207
208
	if (n_allowed > 1) {
		/* only create a very weak penalty if multiple regs are allowed */
Michael Beck's avatar
Michael Beck committed
209
		penalty = (penalty * 0.8f) / n_allowed;
210
	}
Matthias Braun's avatar
Matthias Braun committed
211
212
213
214
215
216
217
218
	foreach_ir_nodeset(live_nodes, neighbor, iter) {
		allocation_info_t *neighbor_info;

		/* TODO: if op is used on multiple inputs we might not do a
		 * continue here */
		if (neighbor == node)
			continue;

Christoph Mallon's avatar
Christoph Mallon committed
219
		neighbor_info = get_allocation_info(neighbor);
220
		for (unsigned r = 0; r < n_regs; ++r) {
Matthias Braun's avatar
Matthias Braun committed
221
222
223
224
225
226
227
228
			if (!rbitset_is_set(limited, r))
				continue;

			neighbor_info->prefs[r] -= penalty;
		}
	}
}

229
230
231
232
233
234
235
236
237
/**
 * Calculate the preferences of a definition for the current register class.
 * If the definition uses a limited set of registers, reduce the preferences
 * for the limited register on the node and its neighbors.
 *
 * @param live_nodes  the set of live nodes at the current node
 * @param weight      the weight
 * @param node        the current node
 */
238
static void check_defs(ir_nodeset_t const *const live_nodes, float const weight, ir_node *const node, arch_register_req_t const *const req)
Matthias Braun's avatar
Matthias Braun committed
239
{
240
	if (arch_register_req_is(req, limited)) {
Matthias Braun's avatar
Matthias Braun committed
241
242
243
244
245
		const unsigned *limited = req->limited;
		float           penalty = weight * DEF_FACTOR;
		give_penalties_for_limits(live_nodes, penalty, limited, node);
	}

246
	if (arch_register_req_is(req, should_be_same)) {
Matthias Braun's avatar
Matthias Braun committed
247
248
249
250
		ir_node           *insn  = skip_Proj(node);
		allocation_info_t *info  = get_allocation_info(node);
		int                arity = get_irn_arity(insn);

251
		float factor = 1.0f / rbitset_popcount(&req->other_same, arity);
252
		foreach_irn_in(insn, i, op) {
Matthias Braun's avatar
Matthias Braun committed
253
254
255
			if (!rbitset_is_set(&req->other_same, i))
				continue;

256
257
258
			/* if we the value at the should_be_same input doesn't die at the
			 * node, then it is no use to propagate the constraints (since a
			 * copy will emerge anyway) */
259
			if (ir_nodeset_contains(live_nodes, op))
260
261
				continue;

262
263
			allocation_info_t *op_info = get_allocation_info(op);
			for (unsigned r = 0; r < n_regs; ++r) {
Matthias Braun's avatar
Matthias Braun committed
264
265
266
267
268
269
				op_info->prefs[r] += info->prefs[r] * factor;
			}
		}
	}
}

270
271
272
273
/**
 * Walker: Runs an a block calculates the preferences for any
 * node and every register from the considered register class.
 */
Matthias Braun's avatar
Matthias Braun committed
274
275
static void analyze_block(ir_node *block, void *data)
{
276
	float        weight = (float)get_block_execfreq(block);
277
	ir_nodeset_t live_nodes;
Matthias Braun's avatar
Matthias Braun committed
278
279
280
281
282
283
	(void) data;

	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	sched_foreach_reverse(block, node) {
284
		if (is_Phi(node))
Matthias Braun's avatar
Matthias Braun committed
285
286
			break;

287
288
		be_foreach_definition(node, cls, value, req,
			check_defs(&live_nodes, weight, value, req);
289
		);
Matthias Braun's avatar
Matthias Braun committed
290

291
292
293
		/* the allocation info node currently only uses 1 unsigned value
		   to mark last used inputs. So we will fail for a node with more than
		   32 inputs. */
294
		allocation_info_t *info = get_allocation_info(node);
295
		if (get_irn_arity(node) >= (int)sizeof(info->last_uses) * 8) {
296
			panic("Node with more than %d inputs not supported yet",
297
					(int) sizeof(info->last_uses) * 8);
298
		}
Matthias Braun's avatar
Matthias Braun committed
299

300
301
		/* mark last uses */
		foreach_irn_in(node, i, op) {
302
			const arch_register_req_t *req = arch_get_irn_register_req(op);
303
			if (req->cls != cls)
Matthias Braun's avatar
Matthias Braun committed
304
305
306
307
				continue;

			/* last usage of a value? */
			if (!ir_nodeset_contains(&live_nodes, op)) {
308
				rbitset_set(info->last_uses, i);
Matthias Braun's avatar
Matthias Braun committed
309
310
311
312
313
			}
		}

		be_liveness_transfer(cls, node, &live_nodes);

314
		/* update weights based on usage constraints */
315
		be_foreach_use(node, cls, req, op, op_req,
316
			if (!arch_register_req_is(req, limited))
317
				continue;
Matthias Braun's avatar
Matthias Braun committed
318

319
320
			give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, req->limited, op);
		);
Matthias Braun's avatar
Matthias Braun committed
321
322
323
324
325
	}

	ir_nodeset_destroy(&live_nodes);
}

326
static void congruence_def(ir_nodeset_t *const live_nodes, ir_node const *const node, arch_register_req_t const *const req)
327
328
{
	/* should be same constraint? */
329
	if (arch_register_req_is(req, should_be_same)) {
330
331
332
		const ir_node *insn     = skip_Proj_const(node);
		unsigned       node_idx = get_irn_idx(node);
		node_idx = uf_find(congruence_classes, node_idx);
333

334
		foreach_irn_in(insn, i, op) {
335
336
337
			if (!rbitset_is_set(&req->other_same, i))
				continue;

338
			int op_idx = get_irn_idx(op);
339
340
341
			op_idx = uf_find(congruence_classes, op_idx);

			/* do we interfere with the value */
342
			bool interferes = false;
343
344
345
346
347
348
349
			foreach_ir_nodeset(live_nodes, live, iter) {
				int lv_idx = get_irn_idx(live);
				lv_idx     = uf_find(congruence_classes, lv_idx);
				if (lv_idx == op_idx) {
					interferes = true;
					break;
				}
350
			}
351
352
353
354
			/* don't put in same affinity class if we interfere */
			if (interferes)
				continue;

355
			uf_union(congruence_classes, node_idx, op_idx);
356
357
358
359
360
361
362
363
			DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
			    node, op));
			/* one should_be_same is enough... */
			break;
		}
	}
}

364
static void create_congruence_class(ir_node *block, void *data)
365
{
366
	ir_nodeset_t live_nodes;
367
368
369
370
371
372
373

	(void) data;
	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	/* check should be same constraints */
	sched_foreach_reverse(block, node) {
374
		if (is_Phi(node))
375
376
			break;

377
378
		be_foreach_definition(node, cls, value, req,
			congruence_def(&live_nodes, value, req);
379
		);
380
381
382
383
		be_liveness_transfer(cls, node, &live_nodes);
	}

	/* check phi congruence classes */
384
385
386
	sched_foreach(block, phi) {
		if (!is_Phi(phi))
			break;
387

388
		if (!arch_irn_consider_in_reg_alloc(cls, phi))
389
390
			continue;

391
		int node_idx = get_irn_idx(phi);
392
393
		node_idx = uf_find(congruence_classes, node_idx);

394
395
396
397
		int arity = get_irn_arity(phi);
		for (int i = 0; i < arity; ++i) {
			ir_node *op     = get_Phi_pred(phi, i);
			int      op_idx = get_irn_idx(op);
398
399
400
			op_idx = uf_find(congruence_classes, op_idx);

			/* do we interfere with the value */
401
			bool interferes = false;
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
			foreach_ir_nodeset(&live_nodes, live, iter) {
				int lv_idx = get_irn_idx(live);
				lv_idx     = uf_find(congruence_classes, lv_idx);
				if (lv_idx == op_idx) {
					interferes = true;
					break;
				}
			}
			/* don't put in same affinity class if we interfere */
			if (interferes)
				continue;
			/* any other phi has the same input? */
			sched_foreach(block, phi) {
				ir_node *oop;
				int      oop_idx;
				if (!is_Phi(phi))
					break;
				if (!arch_irn_consider_in_reg_alloc(cls, phi))
					continue;
				oop = get_Phi_pred(phi, i);
				if (oop == op)
					continue;
				oop_idx = get_irn_idx(oop);
				oop_idx = uf_find(congruence_classes, oop_idx);
				if (oop_idx == op_idx) {
					interferes = true;
					break;
				}
			}
			if (interferes)
				continue;

434
			/* merge the 2 congruence classes and sum up their preferences */
435
			int old_node_idx = node_idx;
436
437
			node_idx = uf_union(congruence_classes, node_idx, op_idx);
			DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
438
			    phi, op));
439

440
			old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx;
441
442
443
444
445
			allocation_info_t *head_info
				= get_allocation_info(get_idx_irn(irg, node_idx));
			allocation_info_t *other_info
				= get_allocation_info(get_idx_irn(irg, old_node_idx));
			for (unsigned r = 0; r < n_regs; ++r) {
446
447
448
				head_info->prefs[r] += other_info->prefs[r];
			}
		}
449
	}
450
	ir_nodeset_destroy(&live_nodes);
451
452
453
454
}

static void set_congruence_prefs(ir_node *node, void *data)
{
455
	(void) data;
456
457
458
459
460
461
462
463
464
465
	unsigned node_idx = get_irn_idx(node);
	unsigned node_set = uf_find(congruence_classes, node_idx);

	/* head of congruence class or not in any class */
	if (node_set == node_idx)
		return;

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

466
467
468
	ir_node *head = get_idx_irn(irg, node_set);
	allocation_info_t *head_info = get_allocation_info(head);
	allocation_info_t *info      = get_allocation_info(node);
469
470
471
472
473
474
475
476
477
478
479

	memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0]));
}

static void combine_congruence_classes(void)
{
	size_t n = get_irg_last_idx(irg);
	congruence_classes = XMALLOCN(int, n);
	uf_init(congruence_classes, n);

	/* create congruence classes */
480
	irg_block_walk_graph(irg, create_congruence_class, NULL, NULL);
481
482
	/* merge preferences */
	irg_walk_graph(irg, set_congruence_prefs, NULL, NULL);
483
	free(congruence_classes);
484
485
486
487
}



488
489
490
491
492
493
/**
 * Assign register reg to the given node.
 *
 * @param node  the node
 * @param reg   the register
 */
494
static void use_reg(ir_node *node, const arch_register_t *reg, unsigned width)
Matthias Braun's avatar
Matthias Braun committed
495
{
496
	unsigned r = reg->index;
497
498
	for (unsigned r0 = r; r0 < r + width; ++r0)
		assignments[r0] = node;
Matthias Braun's avatar
Matthias Braun committed
499
500
501
	arch_set_irn_register(node, reg);
}

502
503
504
505
506
static void free_reg_of_value(ir_node *node)
{
	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

507
508
	const arch_register_t     *reg = arch_get_irn_register(node);
	const arch_register_req_t *req = arch_get_irn_register_req(node);
509
	unsigned                   r   = reg->index;
510
	/* assignment->value may be NULL if a value is used at 2 inputs
511
512
513
514
515
	 * so it gets freed twice. */
	for (unsigned r0 = r; r0 < r + req->width; ++r0) {
		assert(assignments[r0] == node || assignments[r0] == NULL);
		assignments[r0] = NULL;
	}
516
517
}

518
519
520
/**
 * Compare two register preferences in decreasing order.
 */
Matthias Braun's avatar
Matthias Braun committed
521
522
523
524
525
526
527
528
529
530
531
532
533
534
static int compare_reg_pref(const void *e1, const void *e2)
{
	const reg_pref_t *rp1 = (const reg_pref_t*) e1;
	const reg_pref_t *rp2 = (const reg_pref_t*) e2;
	if (rp1->pref < rp2->pref)
		return 1;
	if (rp1->pref > rp2->pref)
		return -1;
	return 0;
}

static void fill_sort_candidates(reg_pref_t *regprefs,
                                 const allocation_info_t *info)
{
535
	for (unsigned r = 0; r < n_regs; ++r) {
Matthias Braun's avatar
Matthias Braun committed
536
537
538
539
540
		float pref = info->prefs[r];
		regprefs[r].num  = r;
		regprefs[r].pref = pref;
	}
	/* TODO: use a stable sort here to avoid unnecessary register jumping */
541
	QSORT(regprefs, n_regs, compare_reg_pref);
Matthias Braun's avatar
Matthias Braun committed
542
543
}

544
static bool try_optimistic_split(ir_node *to_split, ir_node *before,
Matthias Braun's avatar
Matthias Braun committed
545
                                 float pref, float pref_delta,
Matthias Braun's avatar
Matthias Braun committed
546
                                 unsigned *forbidden_regs, int recursion)
547
548
{
	(void) pref;
549
550
551
	unsigned           r = 0;
	allocation_info_t *info = get_allocation_info(to_split);
	float              delta = 0;
552

553
	/* stupid hack: don't optimistically split don't spill nodes...
554
555
	 * (so we don't split away the values produced because of
	 *  must_be_different constraints) */
556
	ir_node *original_insn = skip_Proj(info->original_value);
557
	if (arch_get_irn_flags(original_insn) & arch_irn_flag_dont_spill)
558
559
		return false;

560
	const arch_register_t *from_reg        = arch_get_irn_register(to_split);
561
	unsigned               from_r          = from_reg->index;
562
563
	ir_node               *block           = get_nodes_block(before);
	float split_threshold = (float)get_block_execfreq(block) * SPLIT_DELTA;
Matthias Braun's avatar
Matthias Braun committed
564
565
566
567

	if (pref_delta < split_threshold*0.5)
		return false;

568
	/* find the best free position where we could move to */
569
	reg_pref_t *prefs = ALLOCAN(reg_pref_t, n_regs);
570
	fill_sort_candidates(prefs, info);
571
	unsigned i;
572
	for (i = 0; i < n_regs; ++i) {
Matthias Braun's avatar
Matthias Braun committed
573
574
		/* we need a normal register which is not an output register
		   an different from the current register of to_split */
575
576
577
		r = prefs[i].num;
		if (!rbitset_is_set(normal_regs, r))
			continue;
Matthias Braun's avatar
Matthias Braun committed
578
579
580
		if (rbitset_is_set(forbidden_regs, r))
			continue;
		if (r == from_r)
Matthias Braun's avatar
Matthias Braun committed
581
			continue;
Matthias Braun's avatar
Matthias Braun committed
582
583
584
585
586
587
588
589
590
591

		/* is the split worth it? */
		delta = pref_delta + prefs[i].pref;
		if (delta < split_threshold) {
			DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n",
				to_split, recursion, delta));
			return false;
		}

		/* if the register is free then we can do the split */
592
		if (assignments[r] == NULL)
593
			break;
Matthias Braun's avatar
Matthias Braun committed
594
595
596
597
598

		/* otherwise we might try recursively calling optimistic_split */
		if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION)
			continue;

599
600
		float apref        = prefs[i].pref;
		float apref_delta  = i+1 < n_regs ? apref - prefs[i+1].pref : 0;
Matthias Braun's avatar
Matthias Braun committed
601
602
		apref_delta += pref_delta - split_threshold;

Christoph Mallon's avatar
Christoph Mallon committed
603
		/* our source register isn't a useful destination for recursive
Matthias Braun's avatar
Matthias Braun committed
604
		   splits */
605
		bool old_source_state = rbitset_is_set(forbidden_regs, from_r);
Matthias Braun's avatar
Matthias Braun committed
606
607
		rbitset_set(forbidden_regs, from_r);
		/* try recursive split */
608
609
		bool res = try_optimistic_split(assignments[r], before, apref,
		                              apref_delta, forbidden_regs, recursion+1);
Matthias Braun's avatar
Matthias Braun committed
610
611
612
613
614
615
616
617
618
		/* restore our destination */
		if (old_source_state) {
			rbitset_set(forbidden_regs, from_r);
		} else {
			rbitset_clear(forbidden_regs, from_r);
		}

		if (res)
			break;
619
	}
Matthias Braun's avatar
Matthias Braun committed
620
	if (i >= n_regs)
621
622
		return false;

623
624
625
	const arch_register_t *reg   = arch_register_for_index(cls, r);
	ir_node               *copy  = be_new_Copy(block, to_split);
	unsigned               width = 1;
626
	mark_as_copy_of(copy, to_split);
Matthias Braun's avatar
Matthias Braun committed
627
	/* hacky, but correct here */
628
	if (assignments[from_reg->index] == to_split)
Matthias Braun's avatar
Matthias Braun committed
629
		free_reg_of_value(to_split);
630
	use_reg(copy, reg, width);
631
632
633
	sched_add_before(before, copy);

	DB((dbg, LEVEL_3,
Matthias Braun's avatar
Matthias Braun committed
634
635
	    "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n",
	    copy, to_split, from_reg->name, reg->name, before, delta, recursion));
636
637
638
	return true;
}

639
640
641
/**
 * Determine and assign a register for node @p node
 */
642
static void assign_reg(ir_node const *const block, ir_node *const node, arch_register_req_t const *const req, unsigned *const forbidden_regs)
Matthias Braun's avatar
Matthias Braun committed
643
{
Matthias Braun's avatar
Matthias Braun committed
644
	assert(!is_Phi(node));
Matthias Braun's avatar
Matthias Braun committed
645
	/* preassigned register? */
646
647
	arch_register_t const *final_reg = arch_get_irn_register(node);
	unsigned         const width     = req->width;
648
649
	if (final_reg != NULL) {
		DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, final_reg->name));
650
		use_reg(node, final_reg, width);
Matthias Braun's avatar
Matthias Braun committed
651
652
653
		return;
	}

654
	/* ignore reqs must be preassigned */
655
	assert(!arch_register_req_is(req, ignore));
656

657
	/* give should_be_same boni */
658
659
	allocation_info_t *info    = get_allocation_info(node);
	ir_node           *in_node = skip_Proj(node);
660
	if (arch_register_req_is(req, should_be_same)) {
661
		float weight = (float)get_block_execfreq(block);
Michael Beck's avatar
Michael Beck committed
662

663
664
		assert(get_irn_arity(in_node) <= (int)sizeof(req->other_same) * 8);
		foreach_irn_in(in_node, i, in) {
Matthias Braun's avatar
Matthias Braun committed
665
666
667
			if (!rbitset_is_set(&req->other_same, i))
				continue;

668
			const arch_register_t *reg       = arch_get_irn_register(in);
669
			unsigned               reg_index = reg->index;
670
671
672

			/* if the value didn't die here then we should not propagate the
			 * should_be_same info */
673
			if (assignments[reg_index] == in)
674
675
				continue;

676
			info->prefs[reg_index] += weight * AFF_SHOULD_BE_SAME;
Matthias Braun's avatar
Matthias Braun committed
677
678
679
		}
	}

Matthias Braun's avatar
Matthias Braun committed
680
	/* create list of register candidates and sort by their preference */
Matthias Braun's avatar
Matthias Braun committed
681
	DB((dbg, LEVEL_2, "Candidates for %+F:", node));
682
	reg_pref_t *reg_prefs = ALLOCAN(reg_pref_t, n_regs);
Matthias Braun's avatar
Matthias Braun committed
683
	fill_sort_candidates(reg_prefs, info);
684
	for (unsigned r = 0; r < n_regs; ++r) {
685
		unsigned num = reg_prefs[r].num;
686
687
		if (!rbitset_is_set(normal_regs, num))
			continue;
yb9976's avatar
yb9976 committed
688
		DEBUG_ONLY(const arch_register_t *reg = arch_register_for_index(cls, num);)
689
		DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[r].pref));
Matthias Braun's avatar
Matthias Braun committed
690
691
692
	}
	DB((dbg, LEVEL_2, "\n"));

693
	const unsigned *allowed_regs = normal_regs;
694
	if (arch_register_req_is(req, limited)) {
695
696
697
		allowed_regs = req->limited;
	}

698
699
	unsigned final_reg_index = 0;
	unsigned r;
700
701
702
	for (r = 0; r < n_regs; ++r) {
		final_reg_index = reg_prefs[r].num;
		if (!rbitset_is_set(allowed_regs, final_reg_index))
703
			continue;
704
		/* alignment constraint? */
705
		if (width > 1) {
706
			if (arch_register_req_is(req, aligned) && (final_reg_index % width) != 0)
707
708
709
710
711
712
713
714
715
716
				continue;
			bool fine = true;
			for (unsigned r0 = r+1; r0 < r+width; ++r0) {
				if (assignments[r0] != NULL)
					fine = false;
			}
			/* TODO: attempt optimistic split here */
			if (!fine)
				continue;
		}
717

718
		if (assignments[final_reg_index] == NULL)
719
			break;
720
721
722
723
724
725
		float    pref   = reg_prefs[r].pref;
		float    delta  = r+1 < n_regs ? pref - reg_prefs[r+1].pref : 0;
		ir_node *before = skip_Proj(node);
		bool     res
			= try_optimistic_split(assignments[final_reg_index], before, pref,
			                       delta, forbidden_regs, 0);
Matthias Braun's avatar
Matthias Braun committed
726
727
		if (res)
			break;
728
	}
729
	if (r >= n_regs) {
730
731
		/* the common reason to hit this panic is when 1 of your nodes is not
		 * register pressure faithful */
Matthias Braun's avatar
Matthias Braun committed
732
		panic("No register left for %+F\n", node);
Matthias Braun's avatar
Matthias Braun committed
733
734
	}

735
736
	final_reg = arch_register_for_index(cls, final_reg_index);
	DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, final_reg->name));
737
	use_reg(node, final_reg, width);
Matthias Braun's avatar
Matthias Braun committed
738
739
}

740
741
742
743
/**
 * Add an permutation in front of a node and change the assignments
 * due to this permutation.
 *
744
745
746
747
748
749
750
751
752
753
754
755
756
 * To understand this imagine a permutation like this:
 *
 * 1 -> 2
 * 2 -> 3
 * 3 -> 1, 5
 * 4 -> 6
 * 5
 * 6
 * 7 -> 7
 *
 * First we count how many destinations a single value has. At the same time
 * we can be sure that each destination register has at most 1 source register
 * (it can have 0 which means we don't care what value is in it).
Matthias Braun's avatar
Matthias Braun committed
757
 * We ignore all fulfilled permuations (like 7->7)
758
759
760
761
762
763
764
 * In a first pass we create as much copy instructions as possible as they
 * are generally cheaper than exchanges. We do this by counting into how many
 * destinations a register has to be copied (in the example it's 2 for register
 * 3, or 1 for the registers 1,2,4 and 7).
 * We can then create a copy into every destination register when the usecount
 * of that register is 0 (= noone else needs the value in the register).
 *
Matthias Braun's avatar
Matthias Braun committed
765
766
 * After this step we should only have cycles left. We implement a cyclic
 * permutation of n registers with n-1 transpositions.
767
 *
768
769
 * @param live_nodes   the set of live nodes, updated due to live range split
 * @param before       the node before we add the permutation
770
771
772
 * @param permutation  the permutation array indices are the destination
 *                     registers, the values in the array are the source
 *                     registers.
773
 */
yb9976's avatar
typos    
yb9976 committed
774
static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
775
                           unsigned *permutation)
Matthias Braun's avatar
Matthias Braun committed
776
{
777
	unsigned *n_used = ALLOCANZ(unsigned, n_regs);
Matthias Braun's avatar
Matthias Braun committed
778

779
	/* determine how often each source register needs to be read */
780
	for (unsigned r = 0; r < n_regs; ++r) {
781
782
		unsigned  old_reg = permutation[r];
		ir_node  *value;
Matthias Braun's avatar
Matthias Braun committed
783

784
		value = assignments[old_reg];
Matthias Braun's avatar
Matthias Braun committed
785
		if (value == NULL) {
786
787
			/* nothing to do here, reg is not live. Mark it as fixpoint
			 * so we ignore it in the next steps */
Matthias Braun's avatar
Matthias Braun committed
788
789
790
791
			permutation[r] = r;
			continue;
		}

792
		++n_used[old_reg];
Matthias Braun's avatar
Matthias Braun committed
793
794
	}

795
	ir_node *block = get_nodes_block(before);
Matthias Braun's avatar
Matthias Braun committed
796

797
	/* step1: create copies where immediately possible */
798
799
	for (unsigned r = 0; r < n_regs; /* empty */) {
		unsigned old_r = permutation[r];
Michael Beck's avatar
Michael Beck committed
800

801
802
803
		/* - no need to do anything for fixed points.
		   - we can't copy if the value in the dest reg is still needed */
		if (old_r == r || n_used[r] > 0) {
804
			++r;
Matthias Braun's avatar
Matthias Braun committed
805
			continue;
806
		}
Matthias Braun's avatar
Matthias Braun committed
807

808
		/* create a copy */
809
810
		ir_node *src  = assignments[old_r];
		ir_node *copy = be_new_Copy(block, src);
811
		sched_add_before(before, copy);
812
		const arch_register_t *reg = arch_register_for_index(cls, r);
813
814
815
		DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
		    copy, src, before, reg->name));
		mark_as_copy_of(copy, src);
816
817
		unsigned width = 1; /* TODO */
		use_reg(copy, reg, width);
818
819
820
821

		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, copy);
		}
822

823
		/* old register has 1 user less, permutation is resolved */
824
		assert(arch_get_irn_register(src)->index == old_r);
825
826
		permutation[r] = r;

827
828
		assert(n_used[old_r] > 0);
		--n_used[old_r];
829
		if (n_used[old_r] == 0) {
830
831
832
			if (live_nodes != NULL) {
				ir_nodeset_remove(live_nodes, src);
			}
833
834
			free_reg_of_value(src);
		}
835

836
		/* advance or jump back (if this copy enabled another copy) */
837
		if (old_r < r && n_used[old_r] == 0) {
838
			r = old_r;
839
		} else {
840
			++r;
841
		}
842
	}
Matthias Braun's avatar
Matthias Braun committed
843

844
845
846
847
848
	/* at this point we only have "cycles" left which we have to resolve with
	 * perm instructions
	 * TODO: if we have free registers left, then we should really use copy
	 * instructions for any cycle longer than 2 registers...
	 * (this is probably architecture dependent, there might be archs where
849
	 *  copies are preferable even for 2-cycles) */
850

851
	/* create perms with the rest */
852
853
	for (unsigned r = 0; r < n_regs; /* empty */) {
		unsigned old_r = permutation[r];
854
855
856
857
858
859
860
861
862
863

		if (old_r == r) {
			++r;
			continue;
		}

		/* we shouldn't have copies from 1 value to multiple destinations left*/
		assert(n_used[old_r] == 1);

		/* exchange old_r and r2; after that old_r is a fixed point */
864
		unsigned r2 = permutation[old_r];
865

866
867
		ir_node *in[2] = { assignments[r2], assignments[old_r] };
		ir_node *perm = be_new_Perm(cls, block, 2, in);
868
869
870
		sched_add_before(before, perm);
		DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n",
		    perm, in[0], in[1], before));
871

872
873
		unsigned width = 1; /* TODO */

874
		ir_node *proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0);
875
		mark_as_copy_of(proj0, in[0]);
876
		const arch_register_t *reg0 = arch_register_for_index(cls, old_r);
877
		use_reg(proj0, reg0, width);
878

879
		ir_node *proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1);
880
		mark_as_copy_of(proj1, in[1]);
881
		const arch_register_t *reg1 = arch_register_for_index(cls, r2);
882
		use_reg(proj1, reg1, width);
883
884
885
886
887

		/* 1 value is now in the correct register */
		permutation[old_r] = old_r;
		/* the source of r changed to r2 */
		permutation[r] = r2;
888
889

		/* if we have reached a fixpoint update data structures */
890
891
892
893
		if (live_nodes != NULL) {
			ir_nodeset_remove(live_nodes, in[0]);
			ir_nodeset_remove(live_nodes, in[1]);
			ir_nodeset_remove(live_nodes, proj0);
894
			ir_nodeset_insert(live_nodes, proj1);
895
		}
Matthias Braun's avatar
Matthias Braun committed
896
	}
897

898
#ifndef NDEBUG
899
	/* now we should only have fixpoints left */
900
	for (unsigned r = 0; r < n_regs; ++r) {
901
902
903
		assert(permutation[r] == r);
	}
#endif
Matthias Braun's avatar
Matthias Braun committed
904
905
}

906
907
908
909
910
911
/**
 * Free regs for values last used.
 *
 * @param live_nodes   set of live nodes, will be updated
 * @param node         the node to consider
 */
Matthias Braun's avatar
Matthias Braun committed
912
913
static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node)
{
914
915
	allocation_info_t *info      = get_allocation_info(node);
	const unsigned    *last_uses = info->last_uses;
Michael Beck's avatar
Michael Beck committed
916

917
	foreach_irn_in(node, i, op) {
918
		/* check if one operand is the last use */
919
		if (!rbitset_is_set(last_uses, i))
Matthias Braun's avatar
Matthias Braun committed
920
921
922
923
924
925
926
			continue;

		free_reg_of_value(op);
		ir_nodeset_remove(live_nodes, op);
	}
}

927
928
929
930
931
/**
 * change inputs of a node to the current value (copies/perms)
 */
static void rewire_inputs(ir_node *node)
{
932
	foreach_irn_in(node, i, op) {
933
		allocation_info_t *info = try_get_allocation_info(op);
934

935
		if (info == NULL)
936
937
			continue;

938
		info = get_allocation_info(info->original_value);
939
940
941
942
943
944
		if (info->current_value != op) {
			set_irn_n(node, i, info->current_value);
		}
	}
}

945
946
947
948
949
950
951
952
953
/**
 * Create a bitset of registers occupied with value living through an
 * instruction
 */
static void determine_live_through_regs(unsigned *bitset, ir_node *node)
{
	const allocation_info_t *info = get_allocation_info(node);

	/* mark all used registers as potentially live-through */
954
	for (unsigned r = 0; r < n_regs; ++r) {
955
		if (assignments[r] == NULL)
956
			continue;
957
958
		if (!rbitset_is_set(normal_regs, r))
			continue;
959
960
961
962
963

		rbitset_set(bitset, r);
	}

	/* remove registers of value dying at the instruction */
964
	foreach_irn_in(node, i, op) {
965
		if (!rbitset_is_set(info->last_uses, i))
966
967
			continue;

968
		const arch_register_t *reg = arch_get_irn_register(op);
969
		rbitset_clear(bitset, reg->index);
970
971