beblocksched.c 16.3 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Christian Würdig's avatar
Christian Würdig committed
6
7
8
9
10
/**
 * @file
 * @brief       Block-scheduling strategies.
 * @author      Matthias Braun, Christoph Mallon
 * @date        27.09.2006
11
12
13
14
15
16
17
18
19
20
 *
 * The goals of the greedy (and ILP) algorithm here works by assuming that
 * we want to change as many jumps to fallthroughs as possible (executed jumps
 * actually, we have to look at the execution frequencies). The algorithms
 * do this by collecting execution frequencies of all branches (which is easily
 * possible when all critical edges are split) then removes critical edges where
 * possible as we don't need and want them anymore now. The algorithms then try
 * to change as many edges to fallthroughs as possible, this is done by setting
 * a next and prev pointers on blocks. The greedy algorithm sorts the edges by
 * execution frequencies and tries to transform them to fallthroughs in this order
21
22
23
24
25
 */
#include "beblocksched.h"

#include <stdlib.h>

Matthias Braun's avatar
Matthias Braun committed
26
#include "util.h"
27
28
#include "array.h"
#include "pdeq.h"
29
#include "beirg.h"
30
31
#include "iredges.h"
#include "irgwalk.h"
Michael Beck's avatar
Michael Beck committed
32
#include "irnode_t.h"
33
#include "irgraph_t.h"
34
#include "irgmod.h"
35
#include "irloop.h"
Michael Beck's avatar
Michael Beck committed
36
#include "execfreq.h"
37
#include "irdump_t.h"
38
39
#include "irtools.h"
#include "debug.h"
40
#include "bearch.h"
41
#include "bemodule.h"
42
#include "besched.h"
43
#include "beutil.h"
44
#include "be.h"
Matthias Braun's avatar
Matthias Braun committed
45
#include "panic.h"
46

Matthias Braun's avatar
Matthias Braun committed
47
48
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
static bool blocks_removed;

/**
 * Post-block-walker: Find blocks containing only one jump and
 * remove them.
 */
static void remove_empty_block(ir_node *block)
{
	if (irn_visited_else_mark(block))
		return;

	if (get_Block_n_cfgpreds(block) != 1)
		goto check_preds;

	ir_node *jump = NULL;
	sched_foreach(block, node) {
		if (!is_Jmp(node)
		    && !(arch_get_irn_flags(node) & arch_irn_flag_simple_jump))
			goto check_preds;
		if (jump != NULL) {
			/* we should never have 2 jumps in a block */
			panic("found 2 jumps in a block");
		}
		jump = node;
	}
	if (jump == NULL)
		goto check_preds;

	ir_entity *entity     = get_Block_entity(block);
	ir_node   *pred       = get_Block_cfgpred(block, 0);
	ir_node   *succ_block = NULL;
	foreach_out_edge_safe(jump, edge) {
		int pos = get_edge_src_pos(edge);

		assert(succ_block == NULL);
		succ_block = get_edge_src_irn(edge);
		if (get_Block_entity(succ_block) != NULL && entity != NULL) {
			/* Currently we can add only one label for a block. Therefore we
			 * cannot combine them if both block already have one. :-( */
			goto check_preds;
		}

		set_irn_n(succ_block, pos, pred);
	}

	/* move the label to the successor block */
	set_Block_entity(succ_block, entity);

	/* there can be some non-scheduled Pin nodes left in the block, move them
	 * to the succ block (Pin) or pred block (Sync) */
	foreach_out_edge_safe(block, edge) {
		ir_node *const node = get_edge_src_irn(edge);

		if (node == jump)
			continue;
		/* we simply kill Pins, because there are some strange interactions
		 * between jump threading, which produce PhiMs with Pins, we simply
		 * kill the pins here, everything is scheduled anyway */
		if (is_Pin(node)) {
			exchange(node, get_Pin_op(node));
			continue;
		}
		if (is_Sync(node)) {
			set_nodes_block(node, get_nodes_block(pred));
			continue;
		}
		if (is_End(node)) { /* End-keep, reroute it to the successor */
			int pos = get_edge_src_pos(edge);
			set_irn_n(node, pos, succ_block);
			continue;
		}
120
		panic("unexpected node %+F in block %+F with empty schedule", node, block);
121
122
	}

123
	ir_graph *irg = get_irn_irg(block);
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
	set_Block_cfgpred(block, 0, new_r_Bad(irg, mode_X));
	kill_node(jump);
	blocks_removed = true;

	/* check predecessor */
	remove_empty_block(get_nodes_block(pred));
	return;

check_preds:
	for (int i = 0, arity = get_Block_n_cfgpreds(block); i < arity; ++i) {
		ir_node *pred = get_Block_cfgpred_block(block, i);
		remove_empty_block(pred);
	}
}

/* removes basic blocks that just contain a jump instruction */
static void remove_empty_blocks(ir_graph *irg)
{
	blocks_removed = false;

	ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
	inc_irg_visited(irg);
	remove_empty_block(get_irg_end_block(irg));
	foreach_irn_in(get_irg_end(irg), i, pred) {
		if (!is_Block(pred))
			continue;
		remove_empty_block(pred);
	}
	ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);

	if (blocks_removed) {
		/* invalidate analysis info */
		clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
	}
}

160
161
162
163
164
165
typedef struct blocksched_entry_t blocksched_entry_t;
struct blocksched_entry_t {
	ir_node            *block;
	blocksched_entry_t *next;
	blocksched_entry_t *prev;
};
166

167
168
typedef struct edge_t edge_t;
struct edge_t {
169
170
171
	ir_node *block;             /**< source block */
	int     pos;                /**< number of cfg predecessor (target) */
	double  execfreq;           /**< the frequency */
172
173
174
175
176
177
178
	double  outedge_penalty_freq; /**< for edges leaving the loop this is the
	                                   penality when we make them a
	                                   fallthrough. */
	int     highest_execfreq;   /**< flag that indicates whether this edge is
	                                 the edge with the highest execfreq pointing
	                                 away from this block */
};
179

180
181
typedef struct blocksched_env_t blocksched_env_t;
struct blocksched_env_t {
182
	ir_graph       *irg;
183
	struct obstack  obst;
184
185
186
	edge_t         *edges;
	pdeq           *worklist;
	int            blockcount;
187
};
188

189
190
191
192
193
static blocksched_entry_t* get_blocksched_entry(const ir_node *block)
{
	return (blocksched_entry_t*)get_irn_link(block);
}

194
195
196
197
/**
 * Collect cfg frequencies of all edges between blocks.
 * Also determines edge with highest frequency.
 */
198
199
static void collect_egde_frequency(ir_node *block, void *data)
{
200
	blocksched_env_t   *env = (blocksched_env_t*)data;
201
202
	int                arity;
	edge_t             edge;
203
	blocksched_entry_t *entry;
204
	ir_loop            *loop;
205

206
207
	memset(&edge, 0, sizeof(edge));

208
	entry = OALLOCZ(&env->obst, blocksched_entry_t);
209
210
211
	entry->block = block;
	set_irn_link(block, entry);

212
213
	loop = get_irn_loop(block);

214
	arity = get_Block_n_cfgpreds(block);
215

216
	if (arity == 0) {
217
218
		/* must be the start block (or end-block for endless loops),
		 * everything else is dead code and should be removed by now */
219
220
		assert(block == get_irg_start_block(env->irg)
				|| block == get_irg_end_block(env->irg));
221
		/* nothing to do here */
222
223
		return;
	} else if (arity == 1) {
224
225
		ir_node *pred_block = get_Block_cfgpred_block(block, 0);
		ir_loop *pred_loop  = get_irn_loop(pred_block);
226
		float    freq       = (float)get_block_execfreq(block);
227
228
229

		/* is it an edge leaving a loop */
		if (get_loop_depth(pred_loop) > get_loop_depth(loop)) {
230
			float pred_freq = (float)get_block_execfreq(pred_block);
231
232
233
			edge.outedge_penalty_freq = -(pred_freq - freq);
		}

234
235
		edge.block            = block;
		edge.pos              = 0;
236
		edge.execfreq         = freq;
237
238
239
		edge.highest_execfreq = 1;
		ARR_APP1(edge_t, env->edges, edge);
	} else {
240
		int    i;
241
		double highest_execfreq = -1.0;
242
		int    highest_edge_num = -1;
243
244

		edge.block = block;
245
246
		for (i = 0; i < arity; ++i) {
			double  execfreq;
247
			ir_node *pred_block = get_Block_cfgpred_block(block, i);
248

249
			execfreq = get_block_execfreq(pred_block);
250

251
252
			edge.pos              = i;
			edge.execfreq         = execfreq;
253
254
			edge.highest_execfreq = 0;
			ARR_APP1(edge_t, env->edges, edge);
255
256

			if (execfreq > highest_execfreq) {
257
258
259
260
261
				highest_execfreq = execfreq;
				highest_edge_num = ARR_LEN(env->edges) - 1;
			}
		}

262
		if (highest_edge_num >= 0)
263
			env->edges[highest_edge_num].highest_execfreq = 1;
264
265
266
	}
}

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
static int cmp_edges_base(const edge_t *e1, const edge_t *e2)
{
	long nr1 = get_irn_node_nr(e1->block);
	long nr2 = get_irn_node_nr(e2->block);
	if (nr1 < nr2) {
		return 1;
	} else if (nr1 > nr2) {
		return -1;
	} else {
		if (e1->pos < e2->pos) {
			return 1;
		} else if (e1->pos > e2->pos) {
			return -1;
		} else {
			return 0;
		}
	}
}

286
287
static int cmp_edges(const void *d1, const void *d2)
{
288
289
	const edge_t *e1 = (const edge_t*)d1;
	const edge_t *e2 = (const edge_t*)d2;
290
291
292
293
294
295
296
297
298
	double        freq1 = e1->execfreq;
	double        freq2 = e2->execfreq;
	if (freq1 < freq2) {
		return 1;
	} else if (freq1 > freq2) {
		return -1;
	} else {
		return cmp_edges_base(e1, e2);
	}
299
300
}

301
302
static int cmp_edges_outedge_penalty(const void *d1, const void *d2)
{
303
304
305
306
307
308
309
310
311
312
313
	const edge_t *e1   = (const edge_t*)d1;
	const edge_t *e2   = (const edge_t*)d2;
	double        pen1 = e1->outedge_penalty_freq;
	double        pen2 = e2->outedge_penalty_freq;
	if (pen1 > pen2) {
		return 1;
	} else if (pen1 < pen2) {
		return -1;
	} else {
		return cmp_edges_base(e1, e2);
	}
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
}

static void clear_loop_links(ir_loop *loop)
{
	int i, n;

	set_loop_link(loop, NULL);
	n = get_loop_n_elements(loop);
	for (i = 0; i < n; ++i) {
		loop_element elem = get_loop_element(loop, i);
		if (*elem.kind == k_ir_loop) {
			clear_loop_links(elem.son);
		}
	}
}

330
331
332
333
static void coalesce_blocks(blocksched_env_t *env)
{
	int i;
	int edge_count = ARR_LEN(env->edges);
334
335
336
	edge_t *edges = env->edges;

	/* sort interblock edges by execution frequency */
337
	QSORT_ARR(edges, cmp_edges);
338

339
340
	/* run1: only look at jumps */
	for (i = 0; i < edge_count; ++i) {
341
		const edge_t *edge  = &edges[i];
342
		ir_node      *block = edge->block;
343
		int           pos   = edge->pos;
344
		ir_node      *pred_block;
345
346
		blocksched_entry_t *entry, *pred_entry;

347
348
		/* only check edge with highest frequency */
		if (! edge->highest_execfreq)
349
350
			continue;

351
352
353
354
355
		/* the block might have been removed already... */
		if (is_Bad(get_Block_cfgpred(block, 0)))
			continue;

		pred_block = get_Block_cfgpred_block(block, pos);
Christoph Mallon's avatar
Christoph Mallon committed
356
357
		entry      = get_blocksched_entry(block);
		pred_entry = get_blocksched_entry(pred_block);
358

359
		if (pred_entry->next != NULL || entry->prev != NULL)
360
			continue;
361
362
363

		/* only coalesce jumps */
		if (get_block_succ_next(pred_block, get_block_succ_first(pred_block)) != NULL)
364
365
			continue;

366
		/* schedule the 2 blocks behind each other */
367
		DB((dbg, LEVEL_1, "Coalesce (Jump) %+F -> %+F (%.3g)\n",
368
		           pred_entry->block, entry->block, edge->execfreq));
369
		pred_entry->next = entry;
370
		entry->prev      = pred_entry;
371
372
	}

373
374
375
	/* run2: pick loop fallthroughs */
	clear_loop_links(get_irg_loop(env->irg));

376
	QSORT_ARR(edges, cmp_edges_outedge_penalty);
377
	for (i = 0; i < edge_count; ++i) {
378
		const edge_t *edge  = &edges[i];
379
		ir_node      *block = edge->block;
380
		int           pos   = edge->pos;
381
		ir_node      *pred_block;
382
		blocksched_entry_t *entry, *pred_entry;
383
384
385
386
387
388
		ir_loop      *loop;
		ir_loop      *outer_loop;

		/* already seen all loop outedges? */
		if (edge->outedge_penalty_freq == 0)
			break;
389

390
		/* the block might have been removed already... */
391
		if (is_Bad(get_Block_cfgpred(block, pos)))
392
393
			continue;

394
		pred_block = get_Block_cfgpred_block(block, pos);
Christoph Mallon's avatar
Christoph Mallon committed
395
396
		entry      = get_blocksched_entry(block);
		pred_entry = get_blocksched_entry(pred_block);
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421

		if (pred_entry->next != NULL || entry->prev != NULL)
			continue;

		/* we want at most 1 outedge fallthrough per loop */
		loop = get_irn_loop(pred_block);
		if (get_loop_link(loop) != NULL)
			continue;

		/* schedule the 2 blocks behind each other */
		DB((dbg, LEVEL_1, "Coalesce (Loop Outedge) %+F -> %+F (%.3g)\n",
		           pred_entry->block, entry->block, edge->execfreq));
		pred_entry->next = entry;
		entry->prev      = pred_entry;

		/* all loops left have an outedge now */
		outer_loop = get_irn_loop(block);
		do {
			/* we set loop link to loop to mark it */
			set_loop_link(loop, loop);
			loop = get_loop_outer_loop(loop);
		} while (loop != outer_loop);
	}

	/* sort interblock edges by execution frequency */
422
	QSORT_ARR(edges, cmp_edges);
423
424
425
426
427
428
429
430
431
432
433

	/* run3: remaining edges */
	for (i = 0; i < edge_count; ++i) {
		const edge_t *edge  = &edges[i];
		ir_node      *block = edge->block;
		int           pos   = edge->pos;
		ir_node      *pred_block;
		blocksched_entry_t *entry, *pred_entry;

		/* the block might have been removed already... */
		if (is_Bad(get_Block_cfgpred(block, pos)))
434
435
436
			continue;

		pred_block = get_Block_cfgpred_block(block, pos);
Christoph Mallon's avatar
Christoph Mallon committed
437
438
		entry      = get_blocksched_entry(block);
		pred_entry = get_blocksched_entry(pred_block);
439

Matthias Braun's avatar
Matthias Braun committed
440
		/* is 1 of the blocks already attached to another block? */
441
		if (pred_entry->next != NULL || entry->prev != NULL)
442
443
			continue;

444
		/* schedule the 2 blocks behind each other */
445
		DB((dbg, LEVEL_1, "Coalesce (CondJump) %+F -> %+F (%.3g)\n",
446
		           pred_entry->block, entry->block, edge->execfreq));
447
		pred_entry->next = entry;
448
		entry->prev      = pred_entry;
449
450
451
452
453
	}
}

static void pick_block_successor(blocksched_entry_t *entry, blocksched_env_t *env)
{
454
455
	ir_node            *block = entry->block;
	ir_node            *succ  = NULL;
456
	blocksched_entry_t *succ_entry;
457
	double              best_succ_execfreq;
458

459
	if (irn_visited_else_mark(block))
460
		return;
461

462
463
	env->blockcount++;

464
	DB((dbg, LEVEL_1, "Pick succ of %+F\n", block));
465

466
	/* put all successors into the worklist */
467
468
469
	foreach_block_succ(block, edge) {
		ir_node *succ_block = get_edge_src_irn(edge);

470
		if (irn_visited(succ_block))
471
472
			continue;

473
474
		/* we only need to put the first of a series of already connected
		 * blocks into the worklist */
Christoph Mallon's avatar
Christoph Mallon committed
475
		succ_entry = get_blocksched_entry(succ_block);
476
477
478
		while (succ_entry->prev != NULL) {
			/* break cycles... */
			if (succ_entry->prev->block == succ_block) {
479
				succ_entry->prev->next = NULL;
480
				succ_entry->prev       = NULL;
481
482
483
				break;
			}
			succ_entry = succ_entry->prev;
484
		}
485

486
		if (irn_visited(succ_entry->block))
487
488
			continue;

489
		DB((dbg, LEVEL_1, "Put %+F into worklist\n", succ_entry->block));
490
491
492
		pdeq_putr(env->worklist, succ_entry->block);
	}

493
	if (entry->next != NULL) {
494
495
496
497
		pick_block_successor(entry->next, env);
		return;
	}

498
	DB((dbg, LEVEL_1, "deciding...\n"));
499
	best_succ_execfreq = -1;
500

501
	/* no successor yet: pick the successor block with the highest execution
502
503
	 * frequency which has no predecessor yet */

504
505
506
	foreach_block_succ(block, edge) {
		ir_node *succ_block = get_edge_src_irn(edge);

507
		if (irn_visited(succ_block))
508
509
			continue;

Christoph Mallon's avatar
Christoph Mallon committed
510
		succ_entry = get_blocksched_entry(succ_block);
511
		if (succ_entry->prev != NULL)
512
513
			continue;

514
		double execfreq = get_block_execfreq(succ_block);
515
		if (execfreq > best_succ_execfreq) {
516
517
518
519
520
			best_succ_execfreq = execfreq;
			succ = succ_block;
		}
	}

521
	if (succ == NULL) {
522
		DB((dbg, LEVEL_1, "pick from worklist\n"));
523
524

		do {
525
			if (pdeq_empty(env->worklist)) {
526
				DB((dbg, LEVEL_1, "worklist empty\n"));
527
528
				return;
			}
529
			succ = (ir_node*)pdeq_getl(env->worklist);
530
		} while (irn_visited(succ));
531
532
	}

Christoph Mallon's avatar
Christoph Mallon committed
533
	succ_entry       = get_blocksched_entry(succ);
534
	entry->next      = succ_entry;
535
536
537
538
539
540
541
	succ_entry->prev = entry;

	pick_block_successor(succ_entry, env);
}

static blocksched_entry_t *finish_block_schedule(blocksched_env_t *env)
{
542
543
	ir_graph           *irg        = env->irg;
	ir_node            *startblock = get_irg_start_block(irg);
Christoph Mallon's avatar
Christoph Mallon committed
544
	blocksched_entry_t *entry      = get_blocksched_entry(startblock);
545

546
	ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED);
547
548
549
550
551
552
553
	inc_irg_visited(irg);

	env->worklist = new_pdeq();
	pick_block_successor(entry, env);
	assert(pdeq_empty(env->worklist));
	del_pdeq(env->worklist);

554
	ir_free_resources(irg, IR_RESOURCE_IRN_VISITED);
555

556
557
558
	return entry;
}

559
560
561
static ir_node **create_blocksched_array(blocksched_env_t *env,
                                         blocksched_entry_t *first,
                                         int count, struct obstack* obst)
562
563
564
{
	int                i = 0;
	ir_node            **block_list;
565
	blocksched_entry_t *entry;
566
	(void) env;
567
568

	block_list = NEW_ARR_D(ir_node *, obst, count);
569
	DB((dbg, LEVEL_1, "Blockschedule:\n"));
570
571

	for (entry = first; entry != NULL; entry = entry->next) {
572
573
		assert(i < count);
		block_list[i++] = entry->block;
574
		DB((dbg, LEVEL_1, "\t%+F\n", entry->block));
575
576
577
578
579
580
	}
	assert(i == count);

	return block_list;
}

581
ir_node **be_create_block_schedule(ir_graph *irg)
582
{
583
	blocksched_env_t   env;
584
	blocksched_entry_t *start_entry;
585
	ir_node            **block_list;
586

587
588
589
	env.irg        = irg;
	env.edges      = NEW_ARR_F(edge_t, 0);
	env.worklist   = NULL;
590
	env.blockcount = 0;
591
	obstack_init(&env.obst);
592

593
	assure_loopinfo(irg);
594

595
596
597
	// collect edge execution frequencies
	irg_block_walk_graph(irg, collect_egde_frequency, NULL, &env);

598
	remove_empty_blocks(irg);
599

600
	coalesce_blocks(&env);
601
602

	start_entry = finish_block_schedule(&env);
603
	block_list  = create_blocksched_array(&env, start_entry, env.blockcount,
604
	                                      be_get_be_obst(irg));
605
606

	DEL_ARR_F(env.edges);
607
	obstack_free(&env.obst, NULL);
608
609
610
611

	return block_list;
}

Matthias Braun's avatar
Matthias Braun committed
612
BE_REGISTER_MODULE_CONSTRUCTOR(be_init_blocksched)
613
void be_init_blocksched(void)
614
{
Matthias Braun's avatar
Matthias Braun committed
615
	FIRM_DBG_REGISTER(dbg, "firm.be.blocksched");
616
}