beprefalloc.c 55.3 KB
Newer Older
Matthias Braun's avatar
Matthias Braun committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
22
 * @brief       Preference Guided Register Assignment
Matthias Braun's avatar
Matthias Braun committed
23
24
25
26
27
 * @author      Matthias Braun
 * @date        14.2.2009
 * @version     $Id$
 *
 * The idea is to allocate registers in 2 passes:
Michael Beck's avatar
Michael Beck committed
28
 * 1. A first pass to determine "preferred" registers for live-ranges. This
Matthias Braun's avatar
Matthias Braun committed
29
 *    calculates for each register and each live-range a value indicating
Michael Beck's avatar
Michael Beck committed
30
 *    the usefulness. (You can roughly think of the value as the negative
Matthias Braun's avatar
Matthias Braun committed
31
32
 *    costs needed for copies when the value is in the specific registers...)
 *
33
34
35
 * 2. Walk blocks and assigns registers in a greedy fashion. Preferring
 *    registers with high preferences. When register constraints are not met,
 *    add copies and split live-ranges.
Matthias Braun's avatar
Matthias Braun committed
36
37
 *
 * TODO:
yb9976's avatar
typos    
yb9976 committed
38
 *  - make use of free registers in the permute_values code
Matthias Braun's avatar
Matthias Braun committed
39
40
41
42
 */
#include "config.h"

#include <float.h>
43
#include <stdbool.h>
44
#include <math.h>
Matthias Braun's avatar
Matthias Braun committed
45

46
47
#include "error.h"
#include "execfreq.h"
Matthias Braun's avatar
Matthias Braun committed
48
#include "ircons.h"
49
#include "irdom.h"
50
51
52
53
#include "iredges_t.h"
#include "irgraph_t.h"
#include "irgwalk.h"
#include "irnode_t.h"
54
#include "irprintf.h"
55
#include "obst.h"
56
#include "raw_bitset.h"
57
#include "unionfind.h"
58
59
#include "pdeq.h"
#include "hungarian.h"
Matthias Braun's avatar
Matthias Braun committed
60

61
62
#include "beabi.h"
#include "bechordal_t.h"
Matthias Braun's avatar
Matthias Braun committed
63
#include "be.h"
64
#include "beirg.h"
Matthias Braun's avatar
Matthias Braun committed
65
66
#include "belive_t.h"
#include "bemodule.h"
67
#include "benode.h"
68
69
#include "bera.h"
#include "besched.h"
70
#include "bespill.h"
71
#include "bespillutil.h"
Matthias Braun's avatar
Matthias Braun committed
72
#include "beverify.h"
73
#include "beutil.h"
Matthias Braun's avatar
Matthias Braun committed
74

Matthias Braun's avatar
Matthias Braun committed
75
76
77
78
79
80
#define USE_FACTOR                     1.0f
#define DEF_FACTOR                     1.0f
#define NEIGHBOR_FACTOR                0.2f
#define AFF_SHOULD_BE_SAME             0.5f
#define AFF_PHI                        1.0f
#define SPLIT_DELTA                    1.0f
81
#define MAX_OPTIMISTIC_SPLIT_RECURSION 0
Matthias Braun's avatar
Matthias Braun committed
82
83
84
85
86
87
88

DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

static struct obstack               obst;
static be_irg_t                    *birg;
static ir_graph                    *irg;
static const arch_register_class_t *cls;
89
static const arch_register_req_t   *default_cls_req;
Matthias Braun's avatar
Matthias Braun committed
90
91
92
static be_lv_t                     *lv;
static const ir_exec_freq          *execfreqs;
static unsigned                     n_regs;
93
static unsigned                    *normal_regs;
94
static int                         *congruence_classes;
95
96
static ir_node                    **block_order;
static int                          n_block_order;
97
98
99
static int                          create_preferences        = true;
static int                          create_congruence_classes = true;
static int                          propagate_phi_registers   = true;
100
101
102
103
104
105
106

static const lc_opt_table_entry_t options[] = {
	LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences),
	LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes),
	LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers),
	LC_OPT_LAST
};
Matthias Braun's avatar
Matthias Braun committed
107

108
109
110
/** currently active assignments (while processing a basic block)
 * maps registers to values(their current copies) */
static ir_node **assignments;
Matthias Braun's avatar
Matthias Braun committed
111

112
113
114
115
/**
 * allocation information: last_uses, register preferences
 * the information is per firm-node.
 */
Matthias Braun's avatar
Matthias Braun committed
116
struct allocation_info_t {
117
118
119
120
	unsigned  last_uses;      /**< bitset indicating last uses (input pos) */
	ir_node  *current_value;  /**< copy of the value that should be used */
	ir_node  *original_value; /**< for copies point to original value */
	float     prefs[0];       /**< register preferences */
Matthias Braun's avatar
Matthias Braun committed
121
};
122
typedef struct allocation_info_t allocation_info_t;
Matthias Braun's avatar
Matthias Braun committed
123

124
/** helper datastructure used when sorting register preferences */
Matthias Braun's avatar
Matthias Braun committed
125
126
127
128
struct reg_pref_t {
	unsigned num;
	float    pref;
};
129
130
131
132
typedef struct reg_pref_t reg_pref_t;

/** per basic-block information */
struct block_info_t {
Michael Beck's avatar
Michael Beck committed
133
	bool     processed;       /**< indicate whether block is processed */
134
	ir_node *assignments[0];  /**< register assignments at end of block */
135
136
};
typedef struct block_info_t block_info_t;
Matthias Braun's avatar
Matthias Braun committed
137

138
139
140
141
/**
 * Get the allocation info for a node.
 * The info is allocated on the first visit of a node.
 */
Matthias Braun's avatar
Matthias Braun committed
142
143
static allocation_info_t *get_allocation_info(ir_node *node)
{
144
145
	allocation_info_t *info = get_irn_link(node);
	if (info == NULL) {
146
		info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs);
147
148
		info->current_value  = node;
		info->original_value = node;
Matthias Braun's avatar
Matthias Braun committed
149
150
151
152
153
154
		set_irn_link(node, info);
	}

	return info;
}

155
156
157
158
159
static allocation_info_t *try_get_allocation_info(const ir_node *node)
{
	return (allocation_info_t*) get_irn_link(node);
}

160
161
162
163
164
/**
 * Get allocation information for a basic block
 */
static block_info_t *get_block_info(ir_node *block)
{
165
	block_info_t *info = get_irn_link(block);
166
167

	assert(is_Block(block));
168
	if (info == NULL) {
169
		info = OALLOCFZ(&obst, block_info_t, assignments, n_regs);
170
171
172
173
174
175
		set_irn_link(block, info);
	}

	return info;
}

176
177
178
179
180
181
182
/**
 * Get default register requirement for the current register class
 */
static const arch_register_req_t *get_default_req_current_cls(void)
{
	if (default_cls_req == NULL) {
		struct obstack      *obst = get_irg_obstack(irg);
183
		arch_register_req_t *req  = OALLOCZ(obst, arch_register_req_t);
184
185
186
187
188
189
190
191
192

		req->type = arch_register_req_type_normal;
		req->cls  = cls;

		default_cls_req = req;
	}
	return default_cls_req;
}

193
194
195
196
197
/**
 * Link the allocation info of a node to a copy.
 * Afterwards, both nodes uses the same allocation info.
 * Copy must not have an allocation info assigned yet.
 *
198
 * @param copy   the node that gets the allocation info assigned
199
200
 * @param value  the original node
 */
201
static void mark_as_copy_of(ir_node *copy, ir_node *value)
Matthias Braun's avatar
Matthias Braun committed
202
{
203
	ir_node           *original;
204
205
206
	allocation_info_t *info      = get_allocation_info(value);
	allocation_info_t *copy_info = get_allocation_info(copy);

207
208
209
210
211
212
213
	/* find original value */
	original = info->original_value;
	if (original != value) {
		info = get_allocation_info(original);
	}

	assert(info->original_value == original);
214
215
216
217
	info->current_value = copy;

	/* the copy should not be linked to something else yet */
	assert(copy_info->original_value == copy);
218
219
	copy_info->original_value = original;

220
221
	/* copy over allocation preferences */
	memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0]));
Matthias Braun's avatar
Matthias Braun committed
222
223
}

224
225
226
/**
 * Calculate the penalties for every register on a node and its live neighbors.
 *
227
228
229
230
 * @param live_nodes  the set of live nodes at the current position, may be NULL
 * @param penalty     the penalty to subtract from
 * @param limited     a raw bitset containing the limited set for the node
 * @param node        the node
231
 */
Matthias Braun's avatar
Matthias Braun committed
232
233
static void give_penalties_for_limits(const ir_nodeset_t *live_nodes,
                                      float penalty, const unsigned* limited,
234
                                      ir_node *node)
Matthias Braun's avatar
Matthias Braun committed
235
{
Michael Beck's avatar
Michael Beck committed
236
237
	ir_nodeset_iterator_t iter;
	unsigned              r;
238
	unsigned              n_allowed;
Matthias Braun's avatar
Matthias Braun committed
239
	allocation_info_t     *info = get_allocation_info(node);
Michael Beck's avatar
Michael Beck committed
240
	ir_node               *neighbor;
Matthias Braun's avatar
Matthias Braun committed
241
242
243
244
245
246
247
248
249
250
251
252
253

	/* give penalty for all forbidden regs */
	for (r = 0; r < n_regs; ++r) {
		if (rbitset_is_set(limited, r))
			continue;

		info->prefs[r] -= penalty;
	}

	/* all other live values should get a penalty for allowed regs */
	if (live_nodes == NULL)
		return;

254
	penalty   *= NEIGHBOR_FACTOR;
255
	n_allowed  = rbitset_popcount(limited, n_regs);
256
257
	if (n_allowed > 1) {
		/* only create a very weak penalty if multiple regs are allowed */
Michael Beck's avatar
Michael Beck committed
258
		penalty = (penalty * 0.8f) / n_allowed;
259
	}
Matthias Braun's avatar
Matthias Braun committed
260
261
262
263
264
265
266
267
	foreach_ir_nodeset(live_nodes, neighbor, iter) {
		allocation_info_t *neighbor_info;

		/* TODO: if op is used on multiple inputs we might not do a
		 * continue here */
		if (neighbor == node)
			continue;

Christoph Mallon's avatar
Christoph Mallon committed
268
		neighbor_info = get_allocation_info(neighbor);
Matthias Braun's avatar
Matthias Braun committed
269
270
271
272
273
274
275
276
277
		for (r = 0; r < n_regs; ++r) {
			if (!rbitset_is_set(limited, r))
				continue;

			neighbor_info->prefs[r] -= penalty;
		}
	}
}

278
279
280
281
282
283
284
285
286
/**
 * Calculate the preferences of a definition for the current register class.
 * If the definition uses a limited set of registers, reduce the preferences
 * for the limited register on the node and its neighbors.
 *
 * @param live_nodes  the set of live nodes at the current node
 * @param weight      the weight
 * @param node        the current node
 */
Matthias Braun's avatar
Matthias Braun committed
287
288
289
static void check_defs(const ir_nodeset_t *live_nodes, float weight,
                       ir_node *node)
{
Michael Beck's avatar
Michael Beck committed
290
291
	const arch_register_req_t *req;

Matthias Braun's avatar
Matthias Braun committed
292
293
294
295
296
297
298
299
300
301
302
303
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;
		foreach_out_edge(node, edge) {
			ir_node *proj = get_edge_src_irn(edge);
			check_defs(live_nodes, weight, proj);
		}
		return;
	}

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

Michael Beck's avatar
Michael Beck committed
304
	req = arch_get_register_req_out(node);
Matthias Braun's avatar
Matthias Braun committed
305
306
307
308
309
310
311
312
313
314
315
316
	if (req->type & arch_register_req_type_limited) {
		const unsigned *limited = req->limited;
		float           penalty = weight * DEF_FACTOR;
		give_penalties_for_limits(live_nodes, penalty, limited, node);
	}

	if (req->type & arch_register_req_type_should_be_same) {
		ir_node           *insn  = skip_Proj(node);
		allocation_info_t *info  = get_allocation_info(node);
		int                arity = get_irn_arity(insn);
		int                i;

317
		float factor = 1.0f / rbitset_popcount(&req->other_same, arity);
Matthias Braun's avatar
Matthias Braun committed
318
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
319
			ir_node           *op;
Christoph Mallon's avatar
Christoph Mallon committed
320
			unsigned           r;
Michael Beck's avatar
Michael Beck committed
321
322
			allocation_info_t *op_info;

Matthias Braun's avatar
Matthias Braun committed
323
324
325
			if (!rbitset_is_set(&req->other_same, i))
				continue;

326
327
328
329
330
			op = get_irn_n(insn, i);

			/* if we the value at the should_be_same input doesn't die at the
			 * node, then it is no use to propagate the constraints (since a
			 * copy will emerge anyway) */
331
			if (ir_nodeset_contains(live_nodes, op))
332
333
				continue;

Michael Beck's avatar
Michael Beck committed
334
			op_info = get_allocation_info(op);
Matthias Braun's avatar
Matthias Braun committed
335
336
337
338
339
340
341
			for (r = 0; r < n_regs; ++r) {
				op_info->prefs[r] += info->prefs[r] * factor;
			}
		}
	}
}

342
343
344
345
/**
 * Walker: Runs an a block calculates the preferences for any
 * node and every register from the considered register class.
 */
Matthias Braun's avatar
Matthias Braun committed
346
347
static void analyze_block(ir_node *block, void *data)
{
348
	float         weight = (float)get_block_execfreq(execfreqs, block);
Matthias Braun's avatar
Matthias Braun committed
349
350
351
352
353
354
355
356
	ir_nodeset_t  live_nodes;
	ir_node      *node;
	(void) data;

	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	sched_foreach_reverse(block, node) {
Michael Beck's avatar
Michael Beck committed
357
		allocation_info_t *info;
358
359
		int                i;
		int                arity;
Matthias Braun's avatar
Matthias Braun committed
360

361
		if (is_Phi(node))
Matthias Braun's avatar
Matthias Braun committed
362
363
			break;

364
365
		if (create_preferences)
			check_defs(&live_nodes, weight, node);
Matthias Braun's avatar
Matthias Braun committed
366
367
368

		/* mark last uses */
		arity = get_irn_arity(node);
369
370
371
372
373
374
375
376

		/* the allocation info node currently only uses 1 unsigned value
		   to mark last used inputs. So we will fail for a node with more than
		   32 inputs. */
		if (arity >= (int) sizeof(unsigned) * 8) {
			panic("Node with more than %d inputs not supported yet",
					(int) sizeof(unsigned) * 8);
		}
Matthias Braun's avatar
Matthias Braun committed
377

Michael Beck's avatar
Michael Beck committed
378
		info = get_allocation_info(node);
Matthias Braun's avatar
Matthias Braun committed
379
380
381
382
383
384
385
386
387
388
389
390
391
		for (i = 0; i < arity; ++i) {
			ir_node *op = get_irn_n(node, i);
			if (!arch_irn_consider_in_reg_alloc(cls, op))
				continue;

			/* last usage of a value? */
			if (!ir_nodeset_contains(&live_nodes, op)) {
				rbitset_set(&info->last_uses, i);
			}
		}

		be_liveness_transfer(cls, node, &live_nodes);

392
393
394
395
396
397
		if (create_preferences) {
			/* update weights based on usage constraints */
			for (i = 0; i < arity; ++i) {
				const arch_register_req_t *req;
				const unsigned            *limited;
				ir_node                   *op = get_irn_n(node, i);
Michael Beck's avatar
Michael Beck committed
398

399
400
				if (!arch_irn_consider_in_reg_alloc(cls, op))
					continue;
Matthias Braun's avatar
Matthias Braun committed
401

402
403
404
				req = arch_get_register_req(node, i);
				if (!(req->type & arch_register_req_type_limited))
					continue;
Matthias Braun's avatar
Matthias Braun committed
405

406
				limited = req->limited;
407
408
				give_penalties_for_limits(&live_nodes, weight * USE_FACTOR,
				                          limited, op);
409
			}
Matthias Braun's avatar
Matthias Braun committed
410
411
412
413
414
415
		}
	}

	ir_nodeset_destroy(&live_nodes);
}

416
static void congruence_def(ir_nodeset_t *live_nodes, ir_node *node)
417
{
Michael Beck's avatar
Michael Beck committed
418
419
	const arch_register_req_t *req;

420
421
422
423
424
	if (get_irn_mode(node) == mode_T) {
		const ir_edge_t *edge;
		foreach_out_edge(node, edge) {
			ir_node *def = get_edge_src_irn(edge);
			congruence_def(live_nodes, def);
425
426
427
		}
		return;
	}
428
429
430
431

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

432
	/* should be same constraint? */
Michael Beck's avatar
Michael Beck committed
433
	req = arch_get_register_req_out(node);
434
435
436
437
438
439
	if (req->type & arch_register_req_type_should_be_same) {
		ir_node *insn  = skip_Proj(node);
		int      arity = get_irn_arity(insn);
		int      i;
		unsigned node_idx = get_irn_idx(node);
		node_idx          = uf_find(congruence_classes, node_idx);
440

441
442
443
444
445
		for (i = 0; i < arity; ++i) {
			ir_node               *live;
			ir_node               *op;
			int                    op_idx;
			ir_nodeset_iterator_t  iter;
Michael Beck's avatar
Michael Beck committed
446
			bool                   interferes = false;
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462

			if (!rbitset_is_set(&req->other_same, i))
				continue;

			op     = get_irn_n(insn, i);
			op_idx = get_irn_idx(op);
			op_idx = uf_find(congruence_classes, op_idx);

			/* do we interfere with the value */
			foreach_ir_nodeset(live_nodes, live, iter) {
				int lv_idx = get_irn_idx(live);
				lv_idx     = uf_find(congruence_classes, lv_idx);
				if (lv_idx == op_idx) {
					interferes = true;
					break;
				}
463
			}
464
465
466
467
468
469
470
471
472
473
474
475
476
			/* don't put in same affinity class if we interfere */
			if (interferes)
				continue;

			node_idx = uf_union(congruence_classes, node_idx, op_idx);
			DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
			    node, op));
			/* one should_be_same is enough... */
			break;
		}
	}
}

477
static void create_congruence_class(ir_node *block, void *data)
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
{
	ir_nodeset_t  live_nodes;
	ir_node      *node;

	(void) data;
	ir_nodeset_init(&live_nodes);
	be_liveness_end_of_block(lv, cls, block, &live_nodes);

	/* check should be same constraints */
	sched_foreach_reverse(block, node) {
		if (is_Phi(node))
			break;

		congruence_def(&live_nodes, node);
		be_liveness_transfer(cls, node, &live_nodes);
	}

	/* check phi congruence classes */
	sched_foreach_reverse_from(node, node) {
		int i;
		int arity;
		int node_idx;
		assert(is_Phi(node));

		if (!arch_irn_consider_in_reg_alloc(cls, node))
			continue;

		node_idx = get_irn_idx(node);
		node_idx = uf_find(congruence_classes, node_idx);

		arity = get_irn_arity(node);
		for (i = 0; i < arity; ++i) {
Michael Beck's avatar
Michael Beck committed
510
511
			bool                  interferes = false;
			ir_nodeset_iterator_t iter;
512
513
514
515
			unsigned              r;
			int                   old_node_idx;
			ir_node              *live;
			ir_node              *phi;
Michael Beck's avatar
Michael Beck committed
516
517
			allocation_info_t    *head_info;
			allocation_info_t    *other_info;
518
519
			ir_node              *op     = get_Phi_pred(node, i);
			int                   op_idx = get_irn_idx(op);
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
			op_idx = uf_find(congruence_classes, op_idx);

			/* do we interfere with the value */
			foreach_ir_nodeset(&live_nodes, live, iter) {
				int lv_idx = get_irn_idx(live);
				lv_idx     = uf_find(congruence_classes, lv_idx);
				if (lv_idx == op_idx) {
					interferes = true;
					break;
				}
			}
			/* don't put in same affinity class if we interfere */
			if (interferes)
				continue;
			/* any other phi has the same input? */
			sched_foreach(block, phi) {
				ir_node *oop;
				int      oop_idx;
				if (!is_Phi(phi))
					break;
				if (!arch_irn_consider_in_reg_alloc(cls, phi))
					continue;
				oop = get_Phi_pred(phi, i);
				if (oop == op)
					continue;
				oop_idx = get_irn_idx(oop);
				oop_idx = uf_find(congruence_classes, oop_idx);
				if (oop_idx == op_idx) {
					interferes = true;
					break;
				}
			}
			if (interferes)
				continue;

555
556
			/* merge the 2 congruence classes and sum up their preferences */
			old_node_idx = node_idx;
557
558
559
			node_idx = uf_union(congruence_classes, node_idx, op_idx);
			DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n",
			    node, op));
560

561
562
563
564
565
566
567
			old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx;
			head_info  = get_allocation_info(get_idx_irn(irg, node_idx));
			other_info = get_allocation_info(get_idx_irn(irg, old_node_idx));
			for (r = 0; r < n_regs; ++r) {
				head_info->prefs[r] += other_info->prefs[r];
			}
		}
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
	}
}

static void set_congruence_prefs(ir_node *node, void *data)
{
	allocation_info_t *info;
	allocation_info_t *head_info;
	unsigned node_idx = get_irn_idx(node);
	unsigned node_set = uf_find(congruence_classes, node_idx);

	(void) data;

	/* head of congruence class or not in any class */
	if (node_set == node_idx)
		return;

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

	head_info = get_allocation_info(get_idx_irn(irg, node_set));
	info      = get_allocation_info(node);

	memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0]));
}

static void combine_congruence_classes(void)
{
	size_t n = get_irg_last_idx(irg);
	congruence_classes = XMALLOCN(int, n);
	uf_init(congruence_classes, n);

	/* create congruence classes */
600
	irg_block_walk_graph(irg, create_congruence_class, NULL, NULL);
601
602
	/* merge preferences */
	irg_walk_graph(irg, set_congruence_prefs, NULL, NULL);
603
	free(congruence_classes);
604
605
606
607
608
609
}





610
611
612
613
614
615
/**
 * Assign register reg to the given node.
 *
 * @param node  the node
 * @param reg   the register
 */
Matthias Braun's avatar
Matthias Braun committed
616
617
static void use_reg(ir_node *node, const arch_register_t *reg)
{
618
	unsigned r = arch_register_get_index(reg);
619
	assignments[r] = node;
Matthias Braun's avatar
Matthias Braun committed
620
621
622
	arch_set_irn_register(node, reg);
}

623
624
625
626
627
628
629
630
631
632
633
634
static void free_reg_of_value(ir_node *node)
{
	const arch_register_t *reg;
	unsigned               r;

	if (!arch_irn_consider_in_reg_alloc(cls, node))
		return;

	reg        = arch_get_irn_register(node);
	r          = arch_register_get_index(reg);
	/* assignment->value may be NULL if a value is used at 2 inputs
	   so it gets freed twice. */
635
636
	assert(assignments[r] == node || assignments[r] == NULL);
	assignments[r] = NULL;
637
638
}

639
640
641
/**
 * Compare two register preferences in decreasing order.
 */
Matthias Braun's avatar
Matthias Braun committed
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
static int compare_reg_pref(const void *e1, const void *e2)
{
	const reg_pref_t *rp1 = (const reg_pref_t*) e1;
	const reg_pref_t *rp2 = (const reg_pref_t*) e2;
	if (rp1->pref < rp2->pref)
		return 1;
	if (rp1->pref > rp2->pref)
		return -1;
	return 0;
}

static void fill_sort_candidates(reg_pref_t *regprefs,
                                 const allocation_info_t *info)
{
	unsigned r;

	for (r = 0; r < n_regs; ++r) {
		float pref = info->prefs[r];
		regprefs[r].num  = r;
		regprefs[r].pref = pref;
	}
	/* TODO: use a stable sort here to avoid unnecessary register jumping */
	qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref);
}

667
static bool try_optimistic_split(ir_node *to_split, ir_node *before,
Matthias Braun's avatar
Matthias Braun committed
668
                                 float pref, float pref_delta,
Matthias Braun's avatar
Matthias Braun committed
669
                                 unsigned *forbidden_regs, int recursion)
670
{
Matthias Braun's avatar
Matthias Braun committed
671
	const arch_register_t *from_reg;
672
	const arch_register_t *reg;
673
	ir_node               *original_insn;
674
675
676
	ir_node               *block;
	ir_node               *copy;
	unsigned               r;
Matthias Braun's avatar
Matthias Braun committed
677
	unsigned               from_r;
678
679
	unsigned               i;
	allocation_info_t     *info = get_allocation_info(to_split);
Michael Beck's avatar
Michael Beck committed
680
681
	reg_pref_t            *prefs;
	float                  delta;
682
	float                  split_threshold;
683
684
685

	(void) pref;

686
687
688
	/* stupid hack: don't optimisticallt split don't spill nodes...
	 * (so we don't split away the values produced because of
	 *  must_be_different constraints) */
689
690
	original_insn = skip_Proj(info->original_value);
	if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill)
691
692
		return false;

Matthias Braun's avatar
Matthias Braun committed
693
694
695
	from_reg        = arch_get_irn_register(to_split);
	from_r          = arch_register_get_index(from_reg);
	block           = get_nodes_block(before);
696
	split_threshold = (float)get_block_execfreq(execfreqs, block) * SPLIT_DELTA;
Matthias Braun's avatar
Matthias Braun committed
697
698
699
700

	if (pref_delta < split_threshold*0.5)
		return false;

701
	/* find the best free position where we could move to */
Michael Beck's avatar
Michael Beck committed
702
	prefs = ALLOCAN(reg_pref_t, n_regs);
703
704
	fill_sort_candidates(prefs, info);
	for (i = 0; i < n_regs; ++i) {
Matthias Braun's avatar
Matthias Braun committed
705
706
707
708
709
710
711
		float apref;
		float apref_delta;
		bool  res;
		bool  old_source_state;

		/* we need a normal register which is not an output register
		   an different from the current register of to_split */
712
713
714
		r = prefs[i].num;
		if (!rbitset_is_set(normal_regs, r))
			continue;
Matthias Braun's avatar
Matthias Braun committed
715
716
717
		if (rbitset_is_set(forbidden_regs, r))
			continue;
		if (r == from_r)
Matthias Braun's avatar
Matthias Braun committed
718
			continue;
Matthias Braun's avatar
Matthias Braun committed
719
720
721
722
723
724
725
726
727
728

		/* is the split worth it? */
		delta = pref_delta + prefs[i].pref;
		if (delta < split_threshold) {
			DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n",
				to_split, recursion, delta));
			return false;
		}

		/* if the register is free then we can do the split */
729
		if (assignments[r] == NULL)
730
			break;
Matthias Braun's avatar
Matthias Braun committed
731
732
733
734
735
736
737
738
739

		/* otherwise we might try recursively calling optimistic_split */
		if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION)
			continue;

		apref        = prefs[i].pref;
		apref_delta  = i+1 < n_regs ? apref - prefs[i+1].pref : 0;
		apref_delta += pref_delta - split_threshold;

Christoph Mallon's avatar
Christoph Mallon committed
740
		/* our source register isn't a useful destination for recursive
Matthias Braun's avatar
Matthias Braun committed
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
		   splits */
		old_source_state = rbitset_is_set(forbidden_regs, from_r);
		rbitset_set(forbidden_regs, from_r);
		/* try recursive split */
		res = try_optimistic_split(assignments[r], before, apref,
		                           apref_delta, forbidden_regs, recursion+1);
		/* restore our destination */
		if (old_source_state) {
			rbitset_set(forbidden_regs, from_r);
		} else {
			rbitset_clear(forbidden_regs, from_r);
		}

		if (res)
			break;
756
	}
Matthias Braun's avatar
Matthias Braun committed
757
	if (i >= n_regs)
758
759
		return false;

Matthias Braun's avatar
Matthias Braun committed
760
	reg  = arch_register_for_index(cls, r);
761
762
	copy = be_new_Copy(cls, block, to_split);
	mark_as_copy_of(copy, to_split);
Matthias Braun's avatar
Matthias Braun committed
763
764
765
	/* hacky, but correct here */
	if (assignments[arch_register_get_index(from_reg)] == to_split)
		free_reg_of_value(to_split);
766
767
768
769
	use_reg(copy, reg);
	sched_add_before(before, copy);

	DB((dbg, LEVEL_3,
Matthias Braun's avatar
Matthias Braun committed
770
771
	    "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n",
	    copy, to_split, from_reg->name, reg->name, before, delta, recursion));
772
773
774
	return true;
}

775
776
777
/**
 * Determine and assign a register for node @p node
 */
Matthias Braun's avatar
Matthias Braun committed
778
static void assign_reg(const ir_node *block, ir_node *node,
Matthias Braun's avatar
Matthias Braun committed
779
                       unsigned *forbidden_regs)
Matthias Braun's avatar
Matthias Braun committed
780
{
Michael Beck's avatar
Michael Beck committed
781
782
783
784
	const arch_register_t     *reg;
	allocation_info_t         *info;
	const arch_register_req_t *req;
	reg_pref_t                *reg_prefs;
Michael Beck's avatar
Michael Beck committed
785
	ir_node                   *in_node;
786
787
	unsigned                   i;
	const unsigned            *allowed_regs;
Michael Beck's avatar
Michael Beck committed
788
	unsigned                   r;
Michael Beck's avatar
Michael Beck committed
789

Matthias Braun's avatar
Matthias Braun committed
790
	assert(!is_Phi(node));
Matthias Braun's avatar
Matthias Braun committed
791
792
793
	assert(arch_irn_consider_in_reg_alloc(cls, node));

	/* preassigned register? */
Michael Beck's avatar
Michael Beck committed
794
	reg = arch_get_irn_register(node);
Matthias Braun's avatar
Matthias Braun committed
795
796
797
798
799
800
801
	if (reg != NULL) {
		DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name));
		use_reg(node, reg);
		return;
	}

	/* give should_be_same boni */
Michael Beck's avatar
Michael Beck committed
802
803
	info = get_allocation_info(node);
	req  = arch_get_register_req_out(node);
804

Michael Beck's avatar
Michael Beck committed
805
	in_node = skip_Proj(node);
Matthias Braun's avatar
Matthias Braun committed
806
	if (req->type & arch_register_req_type_should_be_same) {
807
		float weight = (float)get_block_execfreq(execfreqs, block);
808
		int   arity  = get_irn_arity(in_node);
Matthias Braun's avatar
Matthias Braun committed
809
		int   i;
Michael Beck's avatar
Michael Beck committed
810

Matthias Braun's avatar
Matthias Braun committed
811
812
813
814
815
816
817
818
		assert(arity <= (int) sizeof(req->other_same) * 8);
		for (i = 0; i < arity; ++i) {
			ir_node               *in;
			const arch_register_t *reg;
			unsigned               r;
			if (!rbitset_is_set(&req->other_same, i))
				continue;

819
			in  = get_irn_n(in_node, i);
Matthias Braun's avatar
Matthias Braun committed
820
821
822
			reg = arch_get_irn_register(in);
			assert(reg != NULL);
			r = arch_register_get_index(reg);
823
824
825

			/* if the value didn't die here then we should not propagate the
			 * should_be_same info */
826
			if (assignments[r] == in)
827
828
				continue;

829
			info->prefs[r] += weight * AFF_SHOULD_BE_SAME;
Matthias Braun's avatar
Matthias Braun committed
830
831
832
		}
	}

Matthias Braun's avatar
Matthias Braun committed
833
	/* create list of register candidates and sort by their preference */
Matthias Braun's avatar
Matthias Braun committed
834
	DB((dbg, LEVEL_2, "Candidates for %+F:", node));
Michael Beck's avatar
Michael Beck committed
835
	reg_prefs = alloca(n_regs * sizeof(reg_prefs[0]));
Matthias Braun's avatar
Matthias Braun committed
836
837
	fill_sort_candidates(reg_prefs, info);
	for (i = 0; i < n_regs; ++i) {
Matthias Braun's avatar
Matthias Braun committed
838
		unsigned num = reg_prefs[i].num;
Michael Beck's avatar
Michael Beck committed
839
840
		const arch_register_t *reg;

841
842
843
		if (!rbitset_is_set(normal_regs, num))
			continue;

Michael Beck's avatar
Michael Beck committed
844
		reg = arch_register_for_index(cls, num);
Matthias Braun's avatar
Matthias Braun committed
845
846
847
848
		DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref));
	}
	DB((dbg, LEVEL_2, "\n"));

849
850
851
852
853
	allowed_regs = normal_regs;
	if (req->type & arch_register_req_type_limited) {
		allowed_regs = req->limited;
	}

Matthias Braun's avatar
Matthias Braun committed
854
	for (i = 0; i < n_regs; ++i) {
Michael Beck's avatar
Michael Beck committed
855
856
857
858
		float   pref, delta;
		ir_node *before;
		bool    res;

859
860
861
		r = reg_prefs[i].num;
		if (!rbitset_is_set(allowed_regs, r))
			continue;
862
		if (assignments[r] == NULL)
863
			break;
Michael Beck's avatar
Michael Beck committed
864
865
866
867
868
		pref   = reg_prefs[i].pref;
		delta  = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0;
		before = skip_Proj(node);
		res    = try_optimistic_split(assignments[r], before,
		                              pref, delta, forbidden_regs, 0);
Matthias Braun's avatar
Matthias Braun committed
869
870
		if (res)
			break;
871
872
	}
	if (i >= n_regs) {
Matthias Braun's avatar
Matthias Braun committed
873
		panic("No register left for %+F\n", node);
Matthias Braun's avatar
Matthias Braun committed
874
875
	}

876
877
878
	reg = arch_register_for_index(cls, r);
	DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name));
	use_reg(node, reg);
Matthias Braun's avatar
Matthias Braun committed
879
880
}

881
882
883
884
/**
 * Add an permutation in front of a node and change the assignments
 * due to this permutation.
 *
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
 * To understand this imagine a permutation like this:
 *
 * 1 -> 2
 * 2 -> 3
 * 3 -> 1, 5
 * 4 -> 6
 * 5
 * 6
 * 7 -> 7
 *
 * First we count how many destinations a single value has. At the same time
 * we can be sure that each destination register has at most 1 source register
 * (it can have 0 which means we don't care what value is in it).
 * We ignore all fullfilled permuations (like 7->7)
 * In a first pass we create as much copy instructions as possible as they
 * are generally cheaper than exchanges. We do this by counting into how many
 * destinations a register has to be copied (in the example it's 2 for register
 * 3, or 1 for the registers 1,2,4 and 7).
 * We can then create a copy into every destination register when the usecount
 * of that register is 0 (= noone else needs the value in the register).
 *
 * After this step we should have cycles left. We implement a cyclic permutation
 * of n registers with n-1 transpositions.
 *
909
910
 * @param live_nodes   the set of live nodes, updated due to live range split
 * @param before       the node before we add the permutation
911
912
913
 * @param permutation  the permutation array indices are the destination
 *                     registers, the values in the array are the source
 *                     registers.
914
 */
yb9976's avatar
typos    
yb9976 committed
915
static void permute_values(ir_nodeset_t *live_nodes, ir_node *before,
Matthias Braun's avatar
Matthias Braun committed
916
917
                             unsigned *permutation)
{
918
	unsigned  *n_used = ALLOCANZ(unsigned, n_regs);
919
	ir_node   *block;
920
	unsigned   r;
Matthias Braun's avatar
Matthias Braun committed
921

922
	/* determine how often each source register needs to be read */
Matthias Braun's avatar
Matthias Braun committed
923
	for (r = 0; r < n_regs; ++r) {
924
925
		unsigned  old_reg = permutation[r];
		ir_node  *value;
Matthias Braun's avatar
Matthias Braun committed
926

927
		value = assignments[old_reg];
Matthias Braun's avatar
Matthias Braun committed
928
		if (value == NULL) {
929
930
			/* nothing to do here, reg is not live. Mark it as fixpoint
			 * so we ignore it in the next steps */
Matthias Braun's avatar
Matthias Braun committed
931
932
933
934
			permutation[r] = r;
			continue;
		}

935
		++n_used[old_reg];
Matthias Braun's avatar
Matthias Braun committed
936
937
	}

Michael Beck's avatar
Michael Beck committed
938
	block = get_nodes_block(before);
Matthias Braun's avatar
Matthias Braun committed
939

940
941
942
	/* step1: create copies where immediately possible */
	for (r = 0; r < n_regs; /* empty */) {
		ir_node *copy;
943
		ir_node *src;
Michael Beck's avatar
Michael Beck committed
944
		const arch_register_t *reg;
945
		unsigned               old_r = permutation[r];
Michael Beck's avatar
Michael Beck committed
946

947
948
949
		/* - no need to do anything for fixed points.
		   - we can't copy if the value in the dest reg is still needed */
		if (old_r == r || n_used[r] > 0) {
950
			++r;
Matthias Braun's avatar
Matthias Braun committed
951
			continue;
952
		}
Matthias Braun's avatar
Matthias Braun committed
953

954
		/* create a copy */
955
		src  = assignments[old_r];
956
		copy = be_new_Copy(cls, block, src);
957
		sched_add_before(before, copy);
958
		reg = arch_register_for_index(cls, r);
959
960
961
		DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n",
		    copy, src, before, reg->name));
		mark_as_copy_of(copy, src);
962
		use_reg(copy, reg);
963
964
965
966

		if (live_nodes != NULL) {
			ir_nodeset_insert(live_nodes, copy);
		}
967

968
969
		/* old register has 1 user less, permutation is resolved */
		assert(arch_register_get_index(arch_get_irn_register(src)) == old_r);
970
971
		permutation[r] = r;

972
973
		assert(n_used[old_r] > 0);
		--n_used[old_r];
974
		if (n_used[old_r] == 0) {
975
976
977
			if (live_nodes != NULL) {
				ir_nodeset_remove(live_nodes, src);
			}
978
979
			free_reg_of_value(src);
		}
980

981
		/* advance or jump back (if this copy enabled another copy) */
982
		if (old_r < r && n_used[old_r] == 0) {
983
			r = old_r;
984
		} else {
985
			++r;
986
		}
987
	}
Matthias Braun's avatar
Matthias Braun committed
988

989
990
991
992
993
	/* at this point we only have "cycles" left which we have to resolve with
	 * perm instructions
	 * TODO: if we have free registers left, then we should really use copy
	 * instructions for any cycle longer than 2 registers...
	 * (this is probably architecture dependent, there might be archs where
994
	 *  copies are preferable even for 2-cycles) */
995

996
	/* create perms with the rest */
997
998
999
1000
	for (r = 0; r < n_regs; /* empty */) {
		const arch_register_t *reg;
		unsigned  old_r = permutation[r];
		unsigned  r2;
For faster browsing, not all history is shown. View entire blame