Commit d2da8950 authored by Götz Lindenmaier's avatar Götz Lindenmaier
Browse files

Bugfix in irdom.

  Implemented code placement.

[r323]
parent ba5d7687
......@@ -103,12 +103,17 @@ typedef struct {
} dom_env;
void init_tmp_dom_info(ir_node *bl, tmp_dom_info *parent, tmp_dom_info *tdi_list, int* used) {
/* Walks Blocks along the out datastructure. If recursion started with
Start block misses control dead blocks. */
void init_tmp_dom_info(ir_node *bl, tmp_dom_info *parent,
tmp_dom_info *tdi_list, int* used) {
tmp_dom_info *tdi;
int i;
assert(get_irn_op(bl) == op_Block);
if (get_irg_block_visited(current_ir_graph) == get_Block_block_visited(bl)) return;
if (get_irg_block_visited(current_ir_graph) == get_Block_block_visited(bl))
return;
mark_Block_block_visited(bl);
set_Block_pre_num(bl, *used);
......@@ -192,8 +197,8 @@ void compute_doms(ir_graph *irg) {
compute_outs(current_ir_graph);
/** Initialize the temporary information, add link to parent. We don't do
this with a standard walker as passing the parent to the sons isn't
simple. **/
this with a standard walker as passing the parent to the sons isn't
simple. **/
used = 0;
inc_irg_block_visited(current_ir_graph);
init_tmp_dom_info(get_irg_start_block(current_ir_graph), NULL, tdi_list, &used);
......@@ -216,7 +221,7 @@ void compute_doms(ir_graph *irg) {
ir_node *pred = get_nodes_Block(get_Block_cfgpred(w->block, j));
tmp_dom_info *u;
if ((is_Bad(pred)) || (get_Block_pre_num (pred) == -1))
if ((is_Bad(get_Block_cfgpred(w->block, j))) || (get_Block_pre_num (pred) == -1))
continue; /* control-dead */
u = dom_eval (&tdi_list[get_Block_pre_num(pred)]);
......@@ -266,183 +271,6 @@ void free_dom_and_peace(ir_graph *irg) {
assert(get_irg_phase_state(current_ir_graph) != phase_building);
current_ir_graph->dom_state = no_dom;
/* @@@ free */
}
#if 0
/* Dominator Tree */
/* temporary type used while constructing the dominator tree. */
typedef struct tmp_dom_info tmp_dom_info;
struct tmp_dom_info {
ir_node *region;
tmp_dom_info *semi; /* semidominator */
tmp_dom_info *parent;
tmp_dom_info *label; /* used for LINK and EVAL */
tmp_dom_info *ancestor; /* used for LINK and EVAL */
tmp_dom_info *dom; /* After step 3, if the semidominator
of w is its immediate dominator, then w->dom is the immediate
dominator of w. Otherwise w->dom is a vertex v whose number is
smaller than w and whose immediate dominator is also w's immediate
dominator. After step 4, w->dom is the immediate dominator of w. */
tmp_dom_info *bucket; /* set of vertices with same semidominator */
};
static int
dom_count_regions (ir_node *n)
{
int i, count = 1;
n->visit = ir_visited;
for (i = IR_ARITY (n); i > 0; --i) {
ir_node *pr = prev_region (n, i);
if (pr && pr->visit != ir_visited) {
count += dom_count_regions (pr);
}
}
return count;
}
struct dt_desc { tmp_dom_info *dt; int used;};
static void
dom_setup (ir_node *n, tmp_dom_info *parent, struct dt_desc *dt_desc)
{
tmp_dom_info *dt = &dt_desc->dt[dt_desc->used];
int i;
if (n->visit == ir_visited) return;
n->visit = ir_visited;
assert (IR_CFG_NODE (n));
n->data.r.pre_num = dt_desc->used;
dt->semi = dt;
dt->label = dt;
dt->ancestor = NULL;
dt->bucket = NULL;
dt->parent = parent;
dt->region = n;
++(dt_desc->used);
for (i = 0; i < n->data.r.cfg_outs; ++i) {
dom_setup (n->data.r.cfg_out[i], dt, dt_desc);
}
/* With the implementation right now there is nothing to free,
but better call it anyways... */
}
static void
dom_compress (tmp_dom_info *v)
{
assert (v->ancestor);
if (v->ancestor->ancestor) {
dom_compress (v->ancestor);
if (v->ancestor->label->semi < v->label->semi) {
v->label = v->ancestor->label;
}
v->ancestor = v->ancestor->ancestor;
}
}
/* if V is a root, return v, else return the vertex u, not being the
root, with minimum u->semi on the path from v to its root. */
static tmp_dom_info*
dom_eval (tmp_dom_info *v)
{
if (!v->ancestor) return v;
dom_compress (v);
return v->label;
}
/* make V W's ancestor */
static void
dom_link (tmp_dom_info *v, tmp_dom_info *w)
{
w->ancestor = v;
}
void
irg_gen_idom (ir_graph *irg)
{
int regions, i;
tmp_dom_info *dt;
struct dt_desc dt_desc;
if (!(irg->state & irgs_has_CFG)) irg_gen_out (irg);
++ir_visited;
regions = 0;
/* walk all the artificially kept alive parts of the CFG instead of
the CFG beginning from the Start just for fun and safety */
keep_alives_in_arr (irg);
for (i = ARR_LEN (irg->keep.alive) - 1; i >= 0; --i)
if ( IR_CFG_NODE (irg->keep.alive[i])
&& irg->keep.alive[i]->visit != ir_visited)
regions += dom_count_regions (irg->keep.alive[i]);
dt = alloca ((regions+1) * sizeof (tmp_dom_info));
memset (dt, 0, (regions+1) * sizeof (tmp_dom_info));
/* Step 1 */
dt_desc.dt = dt;
dt_desc.used = 1;
++ir_visited;
dom_setup (irg->start, NULL, &dt_desc);
/* This assert will fail, if not all Regions are reachable by
walking the CFG starting from Start, that is when there is
[control] dead code, violating the single entry precondition of
this algorithm. */
assert (dt_desc.used == regions + 1);
for (i = regions; i > 1; --i) {
tmp_dom_info *w = &dt[i];
tmp_dom_info *v;
int j, r_ins;
/* Step 2 */
r_ins = IR_ARITY (w->region);
for (j = 1; j <= r_ins; ++j) {
ir_node *prev = prev_region (w->region, j);
tmp_dom_info *u;
if (!prev) continue; /* control-dead */
u = dom_eval (&dt[prev->data.r.pre_num]);
if (u->semi < w->semi) w->semi = u->semi;
}
/* Add w to w->semi's bucket. w is in exactly one bucket, so
buckets can ben implemented as linked lists. */
w->bucket = w->semi->bucket;
w->semi->bucket = w;
dom_link (w->parent, w);
/* Step 3 */
while ((v = w->parent->bucket)) {
tmp_dom_info *u;
/* remove v from w->parent->bucket */
w->parent->bucket = v->bucket;
v->bucket = NULL;
u = dom_eval (v);
v->dom = u->semi < v->semi ? u : w->parent;
}
}
/* Step 4 */
dt[1].dom = NULL;
dt[1].region->data.r.idom = NULL;
dt[1].region->data.r.dom_depth = 1;
for (i = 2; i <= regions; ++i) {
tmp_dom_info *w = &dt[i];
if (w->dom != w->semi) w->dom = w->dom->dom;
w->region->data.r.idom = w->dom->region;
w->region->data.r.dom_depth = w->dom->region->data.r.dom_depth + 1;
}
current_ir_graph = sirg;
}
#endif
......@@ -909,6 +909,7 @@ dump_ir_graph (ir_graph *irg)
/***********************************************************************/
int node_floats(ir_node *n) {
return ((get_op_pinned(get_irn_op(n)) == floats) &&
(get_irg_pinned(current_ir_graph) == floats));
}
......@@ -1012,7 +1013,10 @@ dump_block_to_cfg (ir_node *block, void *env) {
/* This is a block. Dump a node for the block. */
xfprintf (F, "node: {title:\""); PRINT_NODEID(block);
xfprintf (F, "\" label: \"%I ", block->op->name); PRINT_NODEID(block);
xfprintf (F, "\"}\n");
xfprintf (F, "\" ");
if (dump_dominator_information_flag)
xfprintf(F, "info1:\"dom depth %d\"", get_Block_dom_depth(block));
xfprintf (F, "}\n");
/* Dump the edges */
for ( i = 0; i < get_Block_n_cfgpreds(block); i++)
if (get_irn_op(skip_Proj(get_Block_cfgpred(block, i))) != op_Bad) {
......@@ -1024,7 +1028,7 @@ dump_block_to_cfg (ir_node *block, void *env) {
fprintf (F, "\" }\n");
}
/* Dump dominator information */
/* Dump dominator edge */
if (dump_dominator_information_flag && get_Block_idom(block)) {
pred = get_Block_idom(block);
xfprintf (F, "edge: { sourcename: \"");
......
......@@ -25,6 +25,8 @@
# include "irgmod.h"
# include "array.h"
# include "pset.h"
# include "pdeq.h" /* provisorisch fuer code placement */
# include "irouts.h"
/* Defined in iropt.c */
pset *new_identities (void);
......@@ -737,3 +739,255 @@ void inline_small_irgs(ir_graph *irg, int size) {
current_ir_graph = rem;
}
/********************************************************************/
/* Code Placement. Pinns all floating nodes to a block where they */
/* will be executed only if needed. */
/********************************************************************/
static pdeq *worklist; /* worklist of ir_node*s */
/* Find the earliest correct block for N. --- Place N into the
same Block as its dominance-deepest Input. */
static void
place_floats_early (ir_node *n)
{
int i, start;
/* we must not run into an infinite loop */
assert (irn_not_visited(n));
mark_irn_visited(n);
/* Place floating nodes. */
if (get_op_pinned(get_irn_op(n)) == floats) {
int depth = 0;
ir_node *b = new_Bad(); /* The block to place this node in */
assert(get_irn_op(n) != op_Block);
if ((get_irn_op(n) == op_Const) ||
(get_irn_op(n) == op_SymConst) ||
(is_Bad(n))) {
/* These nodes will not be placed by the loop below. */
b = get_irg_start_block(current_ir_graph);
depth = 1;
}
/* find the block for this node. */
for (i = 0; i < get_irn_arity(n); i++) {
ir_node *dep = get_irn_n(n, i);
ir_node *dep_block;
if ((irn_not_visited(dep)) &&
(get_op_pinned(get_irn_op(dep)) == floats)) {
place_floats_early (dep);
}
/* Because all loops contain at least one pinned node, now all
our inputs are either pinned or place_early has already
been finished on them. We do not have any unfinished inputs! */
dep_block = get_nodes_Block(dep);
if ((!is_Bad(dep_block)) &&
(get_Block_dom_depth(dep_block) > depth)) {
b = dep_block;
depth = get_Block_dom_depth(dep_block);
}
/* Avoid that the node is placed in the Start block */
if ((depth == 1) && (get_Block_dom_depth(get_nodes_Block(n)) > 1)) {
b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
assert(b != get_irg_start_block(current_ir_graph));
depth = 2;
}
}
set_nodes_Block(n, b);
}
/* Add predecessors of non floating nodes on worklist. */
start = (get_irn_op(n) == op_Block) ? 0 : -1;
for (i = start; i < get_irn_arity(n); i++) {
ir_node *pred = get_irn_n(n, i);
if (irn_not_visited(pred)) {
pdeq_putr (worklist, pred);
}
}
}
/* Floating nodes form subgraphs that begin at nodes as Const, Load,
Start, Call and end at pinned nodes as Store, Call. Place_early
places all floating nodes reachable from its argument through floating
nodes and adds all beginnings at pinned nodes to the worklist. */
inline void place_early () {
int i;
bool del_me;
assert(worklist);
inc_irg_visited(current_ir_graph);
/* this inits the worklist */
place_floats_early (get_irg_end(current_ir_graph));
/* Work the content of the worklist. */
while (!pdeq_empty (worklist)) {
ir_node *n = pdeq_getl (worklist);
if (irn_not_visited(n)) place_floats_early (n);
}
set_irg_outs_inconsistent(current_ir_graph);
current_ir_graph->pinned = pinned;
}
/* deepest common dominance ancestor of DCA and CONSUMER of PRODUCER */
static ir_node *
consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
{
ir_node *block;
/* Compute the latest block into which we can place a node so that it is
before consumer. */
if (get_irn_op(consumer) == op_Phi) {
/* our comsumer is a Phi-node, the effective use is in all those
blocks through which the Phi-node reaches producer */
int i;
ir_node *phi_block = get_nodes_Block(consumer);
for (i = 0; i < get_irn_arity(consumer); i++) {
if (get_irn_n(consumer, i) == producer) {
block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
}
}
} else {
assert(is_no_Block(consumer));
block = get_nodes_Block(consumer);
}
/* Compute the deepest common ancestor of block and dca. */
assert(block);
if (!dca) return block;
while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
block = get_Block_idom(block);
while (get_Block_dom_depth(dca) > get_Block_dom_depth(block))
dca = get_Block_idom(dca);
while (block != dca)
{ block = get_Block_idom(block); dca = get_Block_idom(dca); }
return dca;
}
#if 0
/* @@@ Needs loop informations. Will implement later interprocedural. */
static void
move_out_of_loops (ir_node *n, ir_node *dca)
{
assert(dca);
/* Find the region deepest in the dominator tree dominating
dca with the least loop nesting depth, but still dominated
by our early placement. */
ir_node *best = dca;
while (dca != get_nodes_Block(n)) {
dca = get_Block_idom(dca);
if (!dca) break; /* should we put assert(dca)? */
if (get_Block_loop_depth(dca) < get_Block_loop_depth(best)) {
best = dca;
}
}
if (get_Block_dom_depth(best) >= get_Block_dom_depth(get_nodes_Block(n)))
set_nodes_Block(n, best);
}
#endif
/* Find the latest legal block for N and place N into the
`optimal' Block between the latest and earliest legal block.
The `optimal' block is the dominance-deepest block of those
with the least loop-nesting-depth. This places N out of as many
loops as possible and then makes it as controldependant as
possible. */
static void
place_floats_late (ir_node *n)
{
int i;
assert (irn_not_visited(n)); /* no multiple placement */
/* no need to place block nodes, control nodes are already placed. */
if ((get_irn_op(n) != op_Block) && (!is_cfop(n)) && (get_irn_mode(n) != mode_X)) {
/* Assure that our users are all placed, except the Phi-nodes.
--- Each dataflow cycle contains at least one Phi-node. We
have to break the `user has to be placed before the
producer' dependance cycle and the Phi-nodes are the
place to do so, because we need to base our placement on the
final region of our users, which is OK with Phi-nodes, as they
are pinned, and they never have to be placed after a
producer of one of their inputs in the same block anyway. */
for (i = 0; i < get_irn_n_outs(n); i++) {
ir_node *succ = get_irn_out(n, i);
if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
place_floats_late (succ);
}
/* We have to determine the final block of this node... except for constants. */
if ((get_op_pinned(get_irn_op(n)) == floats) &&
(get_irn_op(n) != op_Const) &&
(get_irn_op(n) != op_SymConst)) {
ir_node *dca = NULL; /* deepest common ancestor in the
dominator tree of all nodes'
blocks depending on us; our final
placement has to dominate DCA. */
for (i = 0; i < get_irn_n_outs(n); i++) {
dca = consumer_dom_dca (dca, get_irn_out(n, i), n);
}
set_nodes_Block(n, dca);
#if 0
move_out_of_loops (n, dca);
#endif
}
}
mark_irn_visited(n);
/* Add predecessors of all non-floating nodes on list. (Those of floating
nodes are placeded already and therefore are marked.) */
for (i = 0; i < get_irn_n_outs(n); i++) {
if (irn_not_visited(get_irn_out(n, i))) {
pdeq_putr (worklist, get_irn_out(n, i));
}
}
}
inline void place_late() {
assert(worklist);
inc_irg_visited(current_ir_graph);
/* This fills the worklist initially. */
place_floats_late(get_irg_start_block(current_ir_graph));
/* And now empty the worklist again... */
while (!pdeq_empty (worklist)) {
ir_node *n = pdeq_getl (worklist);
if (irn_not_visited(n)) place_floats_late(n);
}
}
void place_code(ir_graph *irg) {
ir_graph *rem = current_ir_graph;
current_ir_graph = irg;
if (!(get_optimize() && get_opt_global_cse())) return;
/* Handle graph state */
assert(get_irg_phase_state(irg) != phase_building);
if (get_irg_dom_state(irg) != dom_consistent)
compute_doms(irg);
/* Place all floating nodes as early as possible. This guarantees
a legal code placement. */
worklist = new_pdeq ();
place_early();
/* place_early invalidates the outs, place_late needs them. */
compute_outs(irg);
/* Now move the nodes down in the dominator tree. This reduces the
unnecessary executions of the node. */
place_late();
del_pdeq (worklist);
current_ir_graph = rem;
}
......@@ -72,4 +72,19 @@ void inline_method(ir_node *call, ir_graph *called_graph);
combination as control flow operation. */
void inline_small_irgs(ir_graph *irg, int size);
/* Code Placement. Pinns all floating nodes to a block where they
will be executed only if needed. Depends on the flag opt_global_cse.
Graph may not be in phase_building. Does not schedule control dead
code. Uses dominator information which it computes if the irg is not
in state dom_consistent. Destroys the out information as it moves nodes
to other blocks. Optimizes Tuples in Control edges. (@@@ This
is not tested!)
@@@ A more powerful code placement would move operations past Phi nodes
out of loops. Further the control flow should be normalized before
computing the dominator information so that there exists an optimal block
for all operations. */
void place_code(ir_graph *irg);
# endif /* _IRGOPT_H_ */
......@@ -308,6 +308,12 @@ mark_irn_visited (ir_node *node) {
node->visited = current_ir_graph->visited;
}
inline int
irn_not_visited (ir_node *node) {
assert (node);
return (node->visited < current_ir_graph->visited);
}
inline void
set_irn_link (ir_node *node, ir_node *link) {
assert (node);
......
/* Copyright (C) 1998 - 2000 by Universitaet Karlsruhe
/* Copyright (C) 1998 - 2000 by Universitaet Karlsruhe 3 2002/02/28 13:33:52
** All rights reserved.
**
** Authors: Martin Trapp, Christian Schaefer
......@@ -93,6 +93,8 @@ inline void set_irn_visited (ir_node *node, unsigned long visited);
inline unsigned long get_irn_visited (ir_node *node);
/* Sets visited to get_irg_visited(current_ir_graph) */
inline void mark_irn_visited (ir_node *node);
/* Returns 1 of visited < get_irg_visited(current_ir_graph). */
inline int irn_not_visited (ir_node *node);
inline void set_irn_link (ir_node *node, ir_node *link);
inline ir_node *get_irn_link (ir_node *node);
#ifdef DEBUG_libfirm
......
......@@ -1141,5 +1141,9 @@ optimize_in_place (ir_node *n) {
set_irg_pinned(current_ir_graph, floats);
if (get_irg_outs_state(current_ir_graph) == outs_consistent)
set_irg_outs_inconsistent(current_ir_graph);
/* Maybe we could also test whether optimizing the node can
change the control graph. */
if (get_irg_dom_state(current_ir_graph) == dom_consistent)
set_irg_dom_inconsistent(current_ir_graph);
return optimize_in_place_2 (n);
}
......@@ -485,6 +485,8 @@ irg_vrfy (ir_graph *irg)
rem = current_ir_graph;
current_ir_graph = irg;
assert(get_irg_pinned(irg) == pinned);
irg_walk(irg->end, vrfy_wrap, NULL, NULL);
current_ir_graph = rem;
......
......@@ -18,7 +18,8 @@
Checknode must be in current_ir_graph. */
void irn_vrfy (struct ir_node *checknode);
/* Calls irn_vrfy for each node in irg. */
/* Calls irn_vrfy for each node in irg.
Graph must be in state "pinned".*/
void irg_vrfy (ir_graph *irg);
......
......@@ -49,7 +49,7 @@ main(void)
set_optimize(1);
set_opt_constant_folding(1);
set_opt_cse(1);
set_opt_global_cse(1);
set_opt_global_cse(0);
set_opt_dead_node_elimination (1);
prim_t_int = new_type_primitive(id_from_str ("int", 3), mode_i);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment