Commit 0a7d7324 authored by Matthias Braun's avatar Matthias Braun
Browse files

simplify and cleanup execfreq API

parent 5d18527f
......@@ -32,31 +32,23 @@
/**
* @ingroup irana
* @defgroup execfreq Basic Block Execution Frequency
*
* Execution frequencies specify how often a basic block is expected to get
* executed during execution of a function.
* For example the start block has a natural execution frequency of 1.0, the
* two branches of a simple if 0.5, nodes in a simple loop 10.0 ...
* Execution frequencies can either get estimated based on the structure of the
* control flow graph or can be calculated based on profile information.
* @{
*/
/** Creates execfreq structure (to be used with set_execfreq) */
FIRM_API ir_exec_freq *create_execfreq(ir_graph *irg);
/**
* Sets execution frequency of a basic block
/** Estimates execution frequency of a graph.
* You can query the frequencies with get_block_execfreq().
*/
FIRM_API void set_execfreq(ir_exec_freq *ef, const ir_node *block, double freq);
/** Creates execfreq structure and initialize with estimated frequencies. */
FIRM_API ir_exec_freq *compute_execfreq(ir_graph *irg, double loop_weight);
/** Frees memory occupied by execution frequency structure @p ef. */
FIRM_API void free_execfreq(ir_exec_freq *ef);
FIRM_API void ir_estimate_execfreq(ir_graph *irg);
/** Returns execution frequency of block @p block. */
FIRM_API double get_block_execfreq(const ir_exec_freq *ef,
const ir_node *block);
/** Returns execution frequency of block @p block, scaled into the range
* of an unsigned long type. */
FIRM_API unsigned long get_block_execfreq_ulong(const ir_exec_freq *ef,
const ir_node *block);
FIRM_API double get_block_execfreq(const ir_node *block);
/** @} */
......
......@@ -94,9 +94,6 @@ typedef struct ir_loop ir_loop;
/** @ingroup ir_entity
* Entity */
typedef struct ir_entity ir_entity;
/** @ingroup execfreq
* Execution Frequency Analysis Results */
typedef struct ir_exec_freq ir_exec_freq;
/** @ingroup ir_cdep
* Control Dependence Analysis Results */
typedef struct ir_cdep ir_cdep;
......
......@@ -49,16 +49,9 @@
#include "irprintf.h"
#include "util.h"
#include "irhooks.h"
#include "irnodehashmap.h"
#include "execfreq.h"
/* enable to also solve the equations with Gauss-Jordan */
#undef COMPARE_AGAINST_GAUSSJORDAN
#ifdef COMPARE_AGAINST_GAUSSJORDAN
#include "gaussjordan.h"
#endif
#include "execfreq_t.h"
#define EPSILON 1e-5
#define UNDEF(x) (fabs(x) < EPSILON)
......@@ -67,70 +60,58 @@
#define MAX_INT_FREQ 1000000
typedef struct freq_t {
const ir_node *irn;
int idx;
double freq;
double freq;
} freq_t;
struct ir_exec_freq {
set *freqs;
hook_entry_t hook;
double max;
double min_non_zero;
double m, b;
unsigned infeasible : 1;
};
static ir_nodehashmap_t freq_map;
static struct obstack obst;
static hook_entry_t hook;
static int cmp_freq(const void *a, const void *b, size_t size)
double get_block_execfreq(const ir_node *block)
{
const freq_t *p = (const freq_t*) a;
const freq_t *q = (const freq_t*) b;
(void) size;
return !(p->irn == q->irn);
const freq_t *freq = ir_nodehashmap_get(freq_t, &freq_map, block);
if (freq == NULL)
return 0.0;
return freq->freq;
}
static freq_t *set_find_freq(set *freqs, const ir_node *irn)
void set_block_execfreq(ir_node *block, double newfreq)
{
freq_t query;
query.irn = irn;
return set_find(freq_t, freqs, &query, sizeof(query), hash_ptr(irn));
freq_t *freq = ir_nodehashmap_get(freq_t, &freq_map, block);
if (freq == NULL) {
freq = OALLOC(&obst, freq_t);
ir_nodehashmap_insert(&freq_map, block, freq);
}
freq->freq = newfreq;
}
static freq_t *set_insert_freq(set *freqs, const ir_node *irn)
static void exec_freq_node_info(void *ctx, FILE *f, const ir_node *irn)
{
freq_t query;
query.irn = irn;
query.freq = 0.0;
query.idx = -1;
return set_insert(freq_t, freqs, &query, sizeof(query), hash_ptr(irn));
(void)ctx;
if (!is_Block(irn))
return;
fprintf(f, "execution frequency: %g\n", get_block_execfreq(irn));
}
double get_block_execfreq(const ir_exec_freq *ef, const ir_node *irn)
void init_execfreq(void)
{
if (!ef->infeasible) {
set *freqs = ef->freqs;
freq_t *freq;
assert(is_Block(irn));
freq = set_find_freq(freqs, irn);
assert(freq);
assert(freq->freq >= 0);
return freq->freq;
}
ir_nodehashmap_init(&freq_map);
obstack_init(&obst);
return 1.0;
memset(&hook, 0, sizeof(hook));
hook.hook._hook_node_info = exec_freq_node_info;
register_hook(hook_node_info, &hook);
}
unsigned long
get_block_execfreq_ulong(const ir_exec_freq *ef, const ir_node *bb)
void exit_execfreq(void)
{
double f = get_block_execfreq(ef, bb);
int res = (int) (f > ef->min_non_zero ? ef->m * f + ef->b : 1.0);
return res;
unregister_hook(hook_node_info, &hook);
obstack_free(&obst, NULL);
ir_nodehashmap_destroy(&freq_map);
}
static double *solve_lgs(gs_matrix_t *mat, double *x, int size)
{
double init = 1.0 / size;
......@@ -150,29 +131,13 @@ static double *solve_lgs(gs_matrix_t *mat, double *x, int size)
stat_ev_tim_pop("execfreq_seidel_time");
stat_ev_dbl("execfreq_seidel_iter", iter);
#ifdef COMPARE_AGAINST_GAUSSJORDAN
{
double *nw = XMALLOCN(double, size * size);
double *nx = XMALLOCNZ(double, size);
gs_matrix_export(mat, nw, size);
stat_ev_tim_push();
firm_gaussjordansolve(nw, nx, size);
stat_ev_tim_pop("execfreq_jordan_time");
xfree(nw);
xfree(nx);
}
#endif
return x;
}
/*
* Determine probability that predecessor pos takes this cf edge.
*/
static double get_cf_probability(ir_node *bb, int pos, double loop_weight)
static double get_cf_probability(const ir_node *bb, int pos, double loop_weight)
{
double sum = 0.0;
double cur = 1.0;
......@@ -211,114 +176,125 @@ static double get_cf_probability(ir_node *bb, int pos, double loop_weight)
return cur/sum;
}
static void exec_freq_node_info(void *ctx, FILE *f, const ir_node *irn)
{
ir_exec_freq *ef = (ir_exec_freq*) ctx;
if (!is_Block(irn))
return;
static double *freqs;
static double min_non_zero;
static double max_freq;
fprintf(f, "execution frequency: %g/%lu\n", get_block_execfreq(ef, irn), get_block_execfreq_ulong(ef, irn));
static void collect_freqs(ir_node *node, void *data)
{
(void) data;
double freq = get_block_execfreq(node);
if (freq > max_freq)
max_freq = freq;
if (freq > 0.0 && freq < min_non_zero)
min_non_zero = freq;
ARR_APP1(double, freqs, freq);
}
ir_exec_freq *create_execfreq(ir_graph *irg)
void ir_calculate_execfreq_int_factors(ir_execfreq_int_factors *factors,
ir_graph *irg)
{
ir_exec_freq *execfreq = XMALLOCZ(ir_exec_freq);
execfreq->freqs = new_set(cmp_freq, 32);
/* compute m and b of the transformation used to convert the doubles into
* scaled ints */
freqs = NEW_ARR_F(double, 0);
min_non_zero = HUGE_VAL;
max_freq = 0.0;
irg_block_walk_graph(irg, collect_freqs, NULL, NULL);
/*
* find the smallest difference of the execution frequencies
* we try to ressolve it with 1 integer.
*/
size_t n_freqs = ARR_LEN(freqs);
double smallest_diff = 1.0;
for (size_t i = 0; i < n_freqs; ++i) {
if (freqs[i] <= 0.0)
continue;
memset(&execfreq->hook, 0, sizeof(execfreq->hook));
for (size_t j = i + 1; j < n_freqs; ++j) {
double diff = fabs(freqs[i] - freqs[j]);
// set reasonable values to convert double execfreq to ulong execfreq
execfreq->m = 1.0;
if (!UNDEF(diff))
smallest_diff = MIN(diff, smallest_diff);
}
}
execfreq->hook.context = execfreq;
execfreq->hook.hook._hook_node_info = exec_freq_node_info;
register_hook(hook_node_info, &execfreq->hook);
(void) irg;
double l2 = min_non_zero;
double h2 = max_freq;
double l1 = 1.0;
double h1 = MAX_INT_FREQ;
return execfreq;
}
/* according to that the slope of the translation function is
* 1.0 / smallest_diff */
factors->m = 1.0 / smallest_diff;
void set_execfreq(ir_exec_freq *execfreq, const ir_node *block, double freq)
{
freq_t *f = set_insert_freq(execfreq->freqs, block);
f->freq = freq;
/* the abscissa is then given by */
factors->b = l1 - factors->m * l2;
/*
* if the slope is so high that the largest integer would be larger than
* MAX_INT_FREQ set the largest int freq to that upper limit and recompute
* the translation function
*/
if (factors->m * h2 + factors->b > MAX_INT_FREQ) {
factors->m = (h1 - l1) / (h2 - l2);
factors->b = l1 - factors->m * l2;
}
DEL_ARR_F(freqs);
}
static void collect_blocks(ir_node *bl, void *data)
int get_block_execfreq_int(const ir_execfreq_int_factors *factors,
const ir_node *block)
{
set *freqs = (set*) data;
set_insert_freq(freqs, bl);
double f = get_block_execfreq(block);
int res = (int) (f > factors->min_non_zero ? factors->m * f + factors->b : 1.0);
return res;
}
ir_exec_freq *compute_execfreq(ir_graph *irg, double loop_weight)
void ir_estimate_execfreq(ir_graph *irg)
{
gs_matrix_t *mat;
int size;
int n_keepalives;
int idx;
freq_t *freq, *s, *e;
ir_exec_freq *ef;
ir_node *end = get_irg_end(irg);
set *freqs;
dfs_t *dfs;
double *x;
double norm;
double loop_weight = 10.0;
/*
* compute a DFS.
assure_irg_properties(irg,
IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES
| IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
/* compute a DFS.
* using a toposort on the CFG (without back edges) will propagate
* the values better for the gauss/seidel iteration.
* => they can "flow" from start to end.
*/
dfs = dfs_new(&absgraph_irg_cfg_succ, irg);
ef = XMALLOCZ(ir_exec_freq);
ef->min_non_zero = HUGE_VAL; /* initialize with a reasonable large number. */
freqs = ef->freqs = new_set(cmp_freq, dfs_get_n_nodes(dfs));
dfs_t *dfs = dfs_new(&absgraph_irg_cfg_succ, irg);
/*
* Populate the exec freq set.
* The DFS cannot be used alone, since the CFG might not be connected
* due to unreachable code.
*/
irg_block_walk_graph(irg, collect_blocks, NULL, freqs);
int size = dfs_get_n_nodes(dfs);
gs_matrix_t *mat = gs_new_matrix(size, size);
construct_cf_backedges(irg);
assure_edges(irg);
ir_node *end_block = get_irg_end_block(irg);
size = dfs_get_n_nodes(dfs);
mat = gs_new_matrix(size, size);
x = XMALLOCN(double, size);
for (idx = dfs_get_n_nodes(dfs) - 1; idx >= 0; --idx) {
ir_node *bb = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
int i;
freq = set_insert_freq(freqs, bb);
freq->idx = idx;
for (int idx = dfs_get_n_nodes(dfs) - 1; idx >= 0; --idx) {
const ir_node *bb = (ir_node*)dfs_get_post_num_node(dfs, size-idx-1);
/* Sum of (execution frequency of predecessor * probability of cf edge) ... */
for (i = get_Block_n_cfgpreds(bb) - 1; i >= 0; --i) {
ir_node *pred = get_Block_cfgpred_block(bb, i);
int pred_idx = size - dfs_get_post_num(dfs, pred) - 1;
gs_matrix_set(mat, idx, pred_idx, get_cf_probability(bb, i, loop_weight));
for (int i = get_Block_n_cfgpreds(bb) - 1; i >= 0; --i) {
const ir_node *pred = get_Block_cfgpred_block(bb, i);
int pred_idx = size - dfs_get_post_num(dfs, pred)-1;
double cf_probability = get_cf_probability(bb, i, loop_weight);
gs_matrix_set(mat, idx, pred_idx, cf_probability);
}
/* ... equals my execution frequency */
gs_matrix_set(mat, idx, idx, -1.0);
}
dfs_free(dfs);
/*
* Add an edge from end to start.
* The problem is then an eigenvalue problem:
* Solve A*x = 1*x => (A-I)x = 0
*/
s = set_find_freq(freqs, get_irg_start_block(irg));
e = set_find_freq(freqs, get_irg_end_block(irg));
if (e->idx >= 0)
gs_matrix_set(mat, s->idx, e->idx, 1.0);
/* Add an edge from end to start.
* The problem is then an eigenvalue problem:
* Solve A*x = 1*x => (A-I)x = 0
*/
if (bb == end_block) {
const ir_node *start_block = get_irg_start_block(irg);
int s_idx = size - dfs_get_post_num(dfs, start_block)-1;
gs_matrix_set(mat, s_idx, idx, 1.0);
}
}
/*
* Also add an edge for each kept block to start.
......@@ -326,104 +302,43 @@ ir_exec_freq *compute_execfreq(ir_graph *irg, double loop_weight)
* This avoid strange results for e.g. an irg containing a exit()-call
* which block has no cfg successor.
*/
n_keepalives = get_End_n_keepalives(end);
for (idx = n_keepalives - 1; idx >= 0; --idx) {
ir_node *start_block = get_irg_start_block(irg);
int s_idx = size - dfs_get_post_num(dfs, start_block)-1;
const ir_node *end = get_irg_end(irg);
int n_keepalives = get_End_n_keepalives(end);
for (int idx = n_keepalives - 1; idx >= 0; --idx) {
ir_node *keep = get_End_keepalive(end, idx);
if (!is_Block(keep) || get_irn_n_edges_kind(keep, EDGE_KIND_BLOCK) > 0)
continue;
if (is_Block(keep) && get_Block_n_cfg_outs(keep) == 0) {
freq_t *k = set_find_freq(freqs, keep);
if (k->idx >= 0)
gs_matrix_set(mat, s->idx, k->idx, 1.0);
}
int k_idx = size-dfs_get_post_num(dfs, keep)-1;
if (k_idx > 0)
gs_matrix_set(mat, s_idx, k_idx, 1.0);
}
/* solve the system and delete the matrix */
double *x = XMALLOCN(double, size);
solve_lgs(mat, x, size);
gs_delete_matrix(mat);
/*
* compute the normalization factor.
/* compute the normalization factor.
* 1.0 / exec freq of start block.
* (note: start_idx is != 0 in strange cases involving endless loops,
* probably a misfeature/bug)
*/
norm = x[s->idx] != 0.0 ? 1.0 / x[s->idx] : 1.0;
int start_idx = size-dfs_get_post_num(dfs, get_irg_start_block(irg))-1;
double start_freq = x[start_idx];
double norm = start_freq != 0.0 ? 1.0 / start_freq : 1.0;
ef->max = 0.0;
foreach_set(freqs, freq_t, freq) {
idx = freq->idx;
for (int idx = dfs_get_n_nodes(dfs) - 1; idx >= 0; --idx) {
ir_node *bb = (ir_node *) dfs_get_post_num_node(dfs, size - idx - 1);
/* take abs because it sometimes can be -0 in case of endless loops */
freq->freq = fabs(x[idx]) * norm;
/* get the maximum exec freq */
ef->max = MAX(ef->max, freq->freq);
/* Get the minimum non-zero execution frequency. */
if (freq->freq > 0.0)
ef->min_non_zero = MIN(ef->min_non_zero, freq->freq);
double freq = fabs(x[idx]) * norm;
set_block_execfreq(bb, freq);
}
/* compute m and b of the transformation used to convert the doubles into scaled ints */
{
double smallest_diff = 1.0;
double l2 = ef->min_non_zero;
double h2 = ef->max;
double l1 = 1.0;
double h1 = MAX_INT_FREQ;
double *fs = (double*) malloc(set_count(freqs) * sizeof(fs[0]));
int i, j, n = 0;
foreach_set(freqs, freq_t, freq)
fs[n++] = freq->freq;
/*
* find the smallest difference of the execution frequencies
* we try to ressolve it with 1 integer.
*/
for (i = 0; i < n; ++i) {
if (fs[i] <= 0.0)
continue;
for (j = i + 1; j < n; ++j) {
double diff = fabs(fs[i] - fs[j]);
if (!UNDEF(diff))
smallest_diff = MIN(diff, smallest_diff);
}
}
/* according to that the slope of the translation function is 1.0 / smallest diff */
ef->m = 1.0 / smallest_diff;
/* the abscissa is then given by */
ef->b = l1 - ef->m * l2;
/*
* if the slope is so high that the largest integer would be larger than MAX_INT_FREQ
* set the largest int freq to that upper limit and recompute the translation function
*/
if (ef->m * h2 + ef->b > MAX_INT_FREQ) {
ef->m = (h1 - l1) / (h2 - l2);
ef->b = l1 - ef->m * l2;
}
free(fs);
}
memset(&ef->hook, 0, sizeof(ef->hook));
ef->hook.context = ef;
ef->hook.hook._hook_node_info = exec_freq_node_info;
register_hook(hook_node_info, &ef->hook);
dfs_free(dfs);
xfree(x);
return ef;
}
void free_execfreq(ir_exec_freq *ef)
{
del_set(ef->freqs);
unregister_hook(hook_node_info, &ef->hook);
free(ef);
}
/*
* Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
*
* This file is part of libFirm.
*
* This file may be distributed and/or modified under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation and appearing in the file LICENSE.GPL included in the
* packaging of this file.
*
* Licensees holding valid libFirm Professional Edition licenses may use
* this file in accordance with the libFirm Commercial License.
* Agreement provided with the Software.
*
* This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
* WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE.
*/
/**
* @file
* @brief Compute an estimate of basic block executions.
* @author Adam M. Szalkowski
* @date 28.05.2006
*/
#ifndef FIRM_ANA_EXECFREQ_T_H
#define FIRM_ANA_EXECFREQ_T_H
#include "execfreq.h"
void init_execfreq(void);
void exit_execfreq(void);
void set_block_execfreq(ir_node *block, double freq);
typedef struct ir_execfreq_int_factors {
double max;
double min_non_zero;
double m, b;
} ir_execfreq_int_factors;
void ir_calculate_execfreq_int_factors(ir_execfreq_int_factors *factors,
ir_graph *irg);
int get_block_execfreq_int(const ir_execfreq_int_factors *factors,
const ir_node *block);
#endif
......@@ -121,7 +121,6 @@ typedef struct blocksched_env_t blocksched_env_t;
struct blocksched_env_t {
ir_graph *irg;
struct obstack *obst;
ir_exec_freq *execfreqs;
edge_t *edges;
pdeq *worklist;
int blockcount;
......@@ -164,11 +163,11 @@ static void collect_egde_frequency(ir_node *block, void *data)
} else if (arity == 1) {
ir_node *pred_block = get_Block_cfgpred_block(block, 0);
ir_loop *pred_loop = get_irn_loop(pred_block);
float freq = (float)get_block_execfreq(env->execfreqs, block);
float freq = (float)get_block_execfreq(block);
/* is it an edge leaving a loop */
if (get_loop_depth(pred_loop) > get_loop_depth(loop)) {
float pred_freq = (float)get_block_execfreq(env->execfreqs, pred_block);
float pred_freq = (float)get_block_execfreq(pred_block);
edge.outedge_penalty_freq = -(pred_freq - freq);
}
......@@ -187,7 +186,7 @@ static void collect_egde_frequency(ir_node *block, void *data)
double execfreq;
ir_node *pred_block = get_Block_cfgpred_block(block, i);