execfreq.c 11.3 KB
Newer Older
1
/*
Matthias Braun's avatar
Matthias Braun committed
2
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Matthias Braun's avatar
Matthias Braun committed
4
5
6
7
8
9
10
 */

/**
 * @file
 * @brief       Compute an estimate of basic block executions.
 * @author      Adam M. Szalkowski
 * @date        28.05.2006
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
 *
 * Estimate execution frequencies. We do this by creating a system of linear
 * equations with the following observations:
 *   - Each edge leaving a block (block successors, not block_cfgpreds) has
 *     a probabilty between 0 and 1.0 that it is taken.
 *   - The execution frequencies of a basic block is the sum of all execution
 *     frequencies of its predecessor blocks scaled by the probability factors
 *     of the edges to the predecessors.
 *   - All outgoing probabilities have a sum of 1.0.
 * We then assign equaly distributed probablilities for normal controlflow
 * splits, and higher probabilities for backedges.
 *
 * Special case: In case of endless loops or "noreturn" calls some blocks have
 * no path to the end node, which produces undesired results (0, infinite
 * execution frequencies). We aleviate that by adding artificial edges from kept
 * blocks with a path to end.
27
28
29
 */
#include <stdio.h>
#include <string.h>
30
#include <limits.h>
Sebastian Hack's avatar
Sebastian Hack committed
31
#include <math.h>
32

33
#include "gaussseidel.h"
34
35
36

#include "set.h"
#include "hashptr.h"
37
#include "debug.h"
Matthias Braun's avatar
Matthias Braun committed
38
#include "statev_t.h"
39
40
#include "dfs_t.h"
#include "absgraph.h"
41
42
43
44
45
46

#include "irprog_t.h"
#include "irgraph_t.h"
#include "irnode_t.h"
#include "irloop.h"
#include "irgwalk.h"
Matthias Braun's avatar
Matthias Braun committed
47
#include "iredges.h"
48
#include "irouts.h"
49
#include "irprintf.h"
50
#include "util.h"
Sebastian Hack's avatar
Sebastian Hack committed
51
#include "irhooks.h"
52
#include "irnodehashmap.h"
53

54
#include "execfreq_t.h"
55

56
#define EPSILON          1e-5
57
58
#define UNDEF(x)         (fabs(x) < EPSILON)
#define SEIDEL_TOLERANCE 1e-7
59
#define KEEP_FAC         0.1
60

61
62
#define MAX_INT_FREQ 1000000

63
static hook_entry_t hook;
Sebastian Hack's avatar
Sebastian Hack committed
64

65
double get_block_execfreq(const ir_node *block)
66
{
67
	return block->attr.block.execfreq;
68
69
}

70
void set_block_execfreq(ir_node *block, double newfreq)
71
{
72
	assert(!isinf(newfreq) && newfreq >= 0);
73
	block->attr.block.execfreq = newfreq;
74
75
}

76
static void exec_freq_node_info(void *ctx, FILE *f, const ir_node *irn)
77
{
78
79
80
	(void)ctx;
	if (!is_Block(irn))
		return;
81
82
83
	double freq = get_block_execfreq(irn);
	if (freq != 0.0)
		fprintf(f, "execution frequency: %g\n", freq);
84
85
}

86
void init_execfreq(void)
87
{
88
89
90
	memset(&hook, 0, sizeof(hook));
	hook.hook._hook_node_info = exec_freq_node_info;
	register_hook(hook_node_info, &hook);
91
92
}

93
void exit_execfreq(void)
94
{
95
	unregister_hook(hook_node_info, &hook);
96
97
}

98

99
static double *solve_lgs(gs_matrix_t *mat, double *x, int size)
100
{
101
	/* better convergence. */
Matthias Braun's avatar
Matthias Braun committed
102
103
	double init = 1.0 / size;
	for (int i = 0; i < size; ++i)
104
105
106
107
		x[i] = init;

	stat_ev_dbl("execfreq_matrix_size", size);
	stat_ev_tim_push();
Matthias Braun's avatar
Matthias Braun committed
108
109
	int    iter = 0;
	double dev;
110
111
	do {
		++iter;
112
		dev = gs_matrix_gauss_seidel(mat, x);
113
	} while (dev > SEIDEL_TOLERANCE);
114
115
116
117
	stat_ev_tim_pop("execfreq_seidel_time");
	stat_ev_dbl("execfreq_seidel_iter", iter);

	return x;
118
119
}

120
static bool has_path_to_end(const ir_node *block)
121
{
122
123
	return Block_block_visited(block);
}
124

125
126
127
128
static bool is_kept_block(const ir_node *block)
{
	return irn_visited(block);
}
129

130
131
132
133
static double get_sum_succ_factors(const ir_node *block, double inv_loop_weight)
{
	const ir_loop *loop  = get_irn_loop(block);
	const int      depth = get_loop_depth(loop);
Matthias Braun's avatar
Matthias Braun committed
134

135
136
	double sum = 0.0;
	foreach_block_succ(block, edge) {
137
138
139
140
		const ir_node *succ       = get_edge_src_irn(edge);
		const ir_loop *succ_loop  = get_irn_loop(succ);
		int            succ_depth = get_loop_depth(succ_loop);

141
142
		double fac = 1.0;
		for (int d = succ_depth; d < depth; ++d) {
143
144
145
			fac *= inv_loop_weight;
		}
		sum += fac;
Matthias Braun's avatar
Matthias Braun committed
146
	}
147

148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
	/* we add an artifical edge from each kept block which has no path to the
	 * end node */
	if (is_kept_block(block) && !has_path_to_end(block))
		sum += KEEP_FAC;

	return sum;
}

/*
 * Determine probability that predecessor pos takes this cf edge.
 */
static double get_cf_probability(const ir_node *bb, int pos,
                                 double inv_loop_weight)
{
	const ir_node *pred = get_Block_cfgpred_block(bb, pos);
163
	if (pred == NULL)
164
165
166
167
168
169
170
171
172
173
174
175
176
		return 0;

	const ir_loop *loop       = get_irn_loop(bb);
	const int      depth      = get_loop_depth(loop);
	const ir_loop *pred_loop  = get_irn_loop(pred);
	const int      pred_depth = get_loop_depth(pred_loop);

	double cur = 1.0;
	for (int d = depth; d < pred_depth; ++d) {
		cur *= inv_loop_weight;
	}
	double sum = get_sum_succ_factors(pred, inv_loop_weight);

Matthias Braun's avatar
Matthias Braun committed
177
	return cur/sum;
178
179
}

180
181
182
static double *freqs;
static double  min_non_zero;
static double  max_freq;
183

184
185
186
187
188
189
190
191
192
static void collect_freqs(ir_node *node, void *data)
{
	(void) data;
	double freq = get_block_execfreq(node);
	if (freq > max_freq)
		max_freq = freq;
	if (freq > 0.0 && freq < min_non_zero)
		min_non_zero = freq;
	ARR_APP1(double, freqs, freq);
Sebastian Hack's avatar
Sebastian Hack committed
193
194
}

195
196
void ir_calculate_execfreq_int_factors(ir_execfreq_int_factors *factors,
                                       ir_graph *irg)
197
{
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
	/* compute m and b of the transformation used to convert the doubles into
	 * scaled ints */
	freqs = NEW_ARR_F(double, 0);
	min_non_zero = HUGE_VAL;
	max_freq     = 0.0;
	irg_block_walk_graph(irg, collect_freqs, NULL, NULL);

	/*
	 * find the smallest difference of the execution frequencies
	 * we try to ressolve it with 1 integer.
	 */
	size_t n_freqs       = ARR_LEN(freqs);
	double smallest_diff = 1.0;
	for (size_t i = 0; i < n_freqs; ++i) {
		if (freqs[i] <= 0.0)
			continue;
214

215
216
		for (size_t j = i + 1; j < n_freqs; ++j) {
			double diff = fabs(freqs[i] - freqs[j]);
217

218
219
220
221
			if (!UNDEF(diff))
				smallest_diff = MIN(diff, smallest_diff);
		}
	}
222

223
224
225
226
	double l2 = min_non_zero;
	double h2 = max_freq;
	double l1 = 1.0;
	double h1 = MAX_INT_FREQ;
227

228
229
230
	/* according to that the slope of the translation function is
	 * 1.0 / smallest_diff */
	factors->m = 1.0 / smallest_diff;
231

232
233
234
	/* the abscissa is then given by */
	factors->b = l1 - factors->m * l2;

235
236
	factors->min_non_zero = min_non_zero;

237
238
239
240
241
242
243
244
245
246
247
	/*
	 * if the slope is so high that the largest integer would be larger than
	 * MAX_INT_FREQ set the largest int freq to that upper limit and recompute
	 * the translation function
	 */
	if (factors->m * h2 + factors->b > MAX_INT_FREQ) {
		factors->m = (h1 - l1) / (h2 - l2);
		factors->b = l1 - factors->m * l2;
	}

	DEL_ARR_F(freqs);
248
249
}

250
251
int get_block_execfreq_int(const ir_execfreq_int_factors *factors,
                           const ir_node *block)
Sebastian Hack's avatar
Sebastian Hack committed
252
{
253
254
	assert(factors->min_non_zero > 0.0);
	assert(factors->m != 0.0);
255
256
257
	double f   = get_block_execfreq(block);
	int    res = (int) (f > factors->min_non_zero ? factors->m * f + factors->b : 1.0);
	return res;
Sebastian Hack's avatar
Sebastian Hack committed
258
259
}

260
261
262
263
264
265
266
267
268
269
270
271
272
static void block_walk_no_keeps(ir_node *block)
{
	if (Block_block_visited(block))
		return;
	mark_Block_block_visited(block);

	for (int n_block_cfgspreds = get_Block_n_cfgpreds(block), i = 0;
	     i < n_block_cfgspreds; ++i) {
	    ir_node *cfgpred_block = get_Block_cfgpred_block(block, i);
	    block_walk_no_keeps(cfgpred_block);
	}
}

273
void ir_estimate_execfreq(ir_graph *irg)
274
{
275
	double loop_weight = 10.0;
276

277
278
	assure_irg_properties(irg,
		IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES
279
		| IR_GRAPH_PROPERTY_NO_BADS
280
281
		| IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO
		| IR_GRAPH_PROPERTY_NO_UNREACHABLE_CODE);
282
283

	/* compute a DFS.
284
285
	 * using a toposort on the CFG (without back edges) will propagate
	 * the values better for the gauss/seidel iteration.
286
	 * => they can "flow" from start to end. */
287
	dfs_t *dfs = dfs_new(&absgraph_irg_cfg_succ, irg);
Sebastian Hack's avatar
Sebastian Hack committed
288

289
290
	int          size = dfs_get_n_nodes(dfs);
	gs_matrix_t *mat  = gs_new_matrix(size, size);
291

292
293
	ir_node *const start_block = get_irg_start_block(irg);
	ir_node *const end_block   = get_irg_end_block(irg);
294
295
296
297
298
299
	const int      start_idx   = size - dfs_get_post_num(dfs, start_block) - 1;

	ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED
	                     | IR_RESOURCE_IRN_VISITED);
	inc_irg_block_visited(irg);
	/* mark all blocks reachable from end_block as (block)visited
Christoph Mallon's avatar
Christoph Mallon committed
300
	 * (so we can detect places like endless-loops/noreturn calls which
301
302
	 *  do not reach the End block) */
	block_walk_no_keeps(end_block);
Christoph Mallon's avatar
Christoph Mallon committed
303
	/* mark all kept blocks as (node)visited */
304
	inc_irg_visited(irg);
305
306
307
308
309
310
311
	const ir_node *end          = get_irg_end(irg);
	int const      n_keepalives = get_End_n_keepalives(end);
	for (int k = n_keepalives - 1; k >= 0; --k) {
		ir_node *keep = get_End_keepalive(end, k);
		if (is_Block(keep))
			mark_irn_visited(keep);
	}
312

313
	double const inv_loop_weight = 1.0 / loop_weight;
314
	for (int idx = size - 1; idx >= 0; --idx) {
315
		const ir_node *bb = (ir_node*)dfs_get_post_num_node(dfs, size-idx-1);
316

yb9976's avatar
yb9976 committed
317
		/* Sum of (execution frequency of predecessor * probability of cf edge) ... */
318
319
320
		for (int i = get_Block_n_cfgpreds(bb) - 1; i >= 0; --i) {
			const ir_node *pred           = get_Block_cfgpred_block(bb, i);
			int            pred_idx       = size - dfs_get_post_num(dfs, pred)-1;
321
			double         cf_probability = get_cf_probability(bb, i, inv_loop_weight);
322
			gs_matrix_set(mat, idx, pred_idx, cf_probability);
323
		}
yb9976's avatar
yb9976 committed
324
		/* ... equals my execution frequency */
325
		gs_matrix_set(mat, idx, idx, -1.0);
326

327
		if (bb == end_block) {
328
329
330
331
332
333
334
335
336
337
			/* Add an edge from end to start.
			 * The problem is then an eigenvalue problem:
			 * Solve A*x = 1*x => (A-I)x = 0
			 */
			gs_matrix_set(mat, start_idx, idx, 1.0);

			/* add artifical edges from "kept blocks without a path to end"
			 * to end */
			for (int k = n_keepalives - 1; k >= 0; --k) {
				ir_node *keep = get_End_keepalive(end, k);
338
339
				if (!is_Block(keep) || has_path_to_end(keep)
				    || keep == start_block)
340
341
342
343
344
					continue;

				double sum      = get_sum_succ_factors(keep, inv_loop_weight);
				double fac      = KEEP_FAC/sum;
				int    keep_idx = size - dfs_get_post_num(dfs, keep)-1;
345
				gs_matrix_set(mat, start_idx, keep_idx, fac);
346
			}
347
348
		}
	}
349
350

	/* solve the system and delete the matrix */
351
	double *x = XMALLOCN(double, size);
352
	//ir_fprintf(stderr, "%+F:\n", irg);
353
	//gs_matrix_dump(mat, stderr);
354
355
356
	solve_lgs(mat, x, size);
	gs_delete_matrix(mat);

357
	/* compute the normalization factor.
358
	 * 1.0 / exec freq of start block.
359
360
	 * (note: start_idx is != 0 in strange cases involving endless loops,
	 *  probably a misfeature/bug)
361
	 */
362
363
	double start_freq = x[start_idx];
	double norm       = start_freq != 0.0 ? 1.0 / start_freq : 1.0;
364
	bool   valid_freq = true;
365
	for (int idx = size - 1; idx >= 0; --idx) {
366
		ir_node *bb = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
367

Sebastian Hack's avatar
Sebastian Hack committed
368
		/* take abs because it sometimes can be -0 in case of endless loops */
369
		double freq = fabs(x[idx]) * norm;
370
371
372
373
374
375

		/* Check for inf, nan and negative values. */
		if (isinf(freq) || !(freq >= 0)) {
			valid_freq = false;
			break;
		}
376
		set_block_execfreq(bb, freq);
377
	}
378

379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
	/* Fallback solution: Use loop weight. */
	if (!valid_freq) {
		valid_freq = true;

		for (int idx = size - 1; idx >= 0; --idx) {
			ir_node       *bb    = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
			const ir_loop *loop  = get_irn_loop(bb);
			const int      depth = get_loop_depth(loop);
			double         freq  = 1.0;

			for (int d = 0; d < depth; ++d) {
				freq *= loop_weight;
			}

			/* Check for inf, nan and negative values. */
			if (isinf(freq) || !(freq >= 0)) {
				valid_freq = false;
				break;
			}
			set_block_execfreq(bb, freq);
		}
	}

	/* Fallback solution: All blocks have the same execution frequency. */
	if (!valid_freq) {
		for (int idx = size - 1; idx >= 0; --idx) {
			ir_node *bb = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
			set_block_execfreq(bb, 1.0);
		}
	}

410
411
	ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED | IR_RESOURCE_IRN_VISITED);

412
	dfs_free(dfs);
413

414
	free(x);
415
}