irgopt.c 43.2 KB
Newer Older
1
/* Coyright (C) 1998 - 2000 by Universitaet Karlsruhe
Christian Schäfer's avatar
Christian Schäfer committed
2
3
4
5
** All rights reserved.
**
** Author: Christian Schaefer
**
Götz Lindenmaier's avatar
Götz Lindenmaier committed
6
** Optimizations for a whole ir graph, i.e., a procedure.
Christian Schäfer's avatar
Christian Schäfer committed
7
8
*/

Boris Boesler's avatar
Boris Boesler committed
9
10
/* $Id$ */

Boris Boesler's avatar
added    
Boris Boesler committed
11
12
13
14
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

Götz Lindenmaier's avatar
Götz Lindenmaier committed
15
16
# include <assert.h>

17
# include "irprog.h"
Christian Schäfer's avatar
Christian Schäfer committed
18
# include "irgopt.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
19
# include "irnode_t.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
20
# include "irgraph_t.h"
21
# include "iropt_t.h"
Christian Schäfer's avatar
Christian Schäfer committed
22
23
# include "irgwalk.h"
# include "ircons.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
24
# include "misc.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
25
# include "irgmod.h"
26
# include "array.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
27
# include "pset.h"
28
# include "pdeq.h"       /* Fuer code placement */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
29
# include "irouts.h"
30
31

/* Defined in iropt.c */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
32
33
pset *new_identities (void);
void  del_identities (pset *value_table);
34
void  add_identities   (pset *value_table, ir_node *node);
35

Götz Lindenmaier's avatar
Götz Lindenmaier committed
36
37
/********************************************************************/
/* apply optimizations of iropt to all nodes.                       */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
38
/********************************************************************/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
39

40
41
42
43
void init_link (ir_node *n, void *env) {
  set_irn_link(n, NULL);
}

Christian Schäfer's avatar
Christian Schäfer committed
44
45
void
optimize_in_place_wrapper (ir_node *n, void *env) {
46
  int i;
Christian Schäfer's avatar
Christian Schäfer committed
47
48
  ir_node *optimized;

49
  for (i = 0; i < get_irn_arity(n); i++) {
50
    optimized = optimize_in_place_2(get_irn_n(n, i));
Christian Schäfer's avatar
Christian Schäfer committed
51
    set_irn_n(n, i, optimized);
52
53
54
55
56
  }

  if (get_irn_op(n) == op_Block) {
    optimized = optimize_in_place_2(n);
    if (optimized != n) exchange (n, optimized);
Christian Schäfer's avatar
Christian Schäfer committed
57
58
59
60
61
62
63
64
  }
}

void
local_optimize_graph (ir_graph *irg) {
  ir_graph *rem = current_ir_graph;
  current_ir_graph = irg;

65
  /* Handle graph state */
66
  assert(get_irg_phase_state(irg) != phase_building);
67
68
69
70
  if (get_opt_global_cse())
    set_irg_pinned(current_ir_graph, floats);
  if (get_irg_outs_state(current_ir_graph) == outs_consistent)
    set_irg_outs_inconsistent(current_ir_graph);
71
72
  if (get_irg_dom_state(current_ir_graph) == dom_consistent)
    set_irg_dom_inconsistent(current_ir_graph);
73
74

  /* Clean the value_table in irg for the cse. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
75
76
77
  del_identities(irg->value_table);
  irg->value_table = new_identities();

Christian Schäfer's avatar
Christian Schäfer committed
78
  /* walk over the graph */
79
  irg_walk(irg->end, init_link, optimize_in_place_wrapper, NULL);
Christian Schäfer's avatar
Christian Schäfer committed
80
81
82
83

  current_ir_graph = rem;
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
84
85
86
/********************************************************************/
/* Routines for dead node elimination / copying garbage collection  */
/* of the obstack.                                                  */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
87
/********************************************************************/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
88

Götz Lindenmaier's avatar
Götz Lindenmaier committed
89
90
/* Remeber the new node in the old node by using a field all nodes have. */
inline void
Christian Schäfer's avatar
Christian Schäfer committed
91
92
set_new_node (ir_node *old, ir_node *new)
{
Götz Lindenmaier's avatar
Götz Lindenmaier committed
93
  old->link = new;
Christian Schäfer's avatar
Christian Schäfer committed
94
95
96
}

/* Get this new node, before the old node is forgotton.*/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
97
inline ir_node *
Christian Schäfer's avatar
Christian Schäfer committed
98
99
get_new_node (ir_node * n)
{
Götz Lindenmaier's avatar
Götz Lindenmaier committed
100
101
102
  return n->link;
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
103
104
/* We use the block_visited flag to mark that we have computed the
   number of useful predecessors for this block.
105
106
107
108
   Further we encode the new arity in this flag in the old blocks.
   Remembering the arity is useful, as it saves a lot of pointer
   accesses.  This function is called for all Phi and Block nodes
   in a Block. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
inline int
compute_new_arity(ir_node *b) {
  int i, res;
  int irg_v, block_v;

  irg_v = get_irg_block_visited(current_ir_graph);
  block_v = get_Block_block_visited(b);
  if (block_v >= irg_v) {
    /* we computed the number of preds for this block and saved it in the
       block_v flag */
    return block_v - irg_v;
  } else {
    /* compute the number of good predecessors */
    res = get_irn_arity(b);
    for (i = 0; i < get_irn_arity(b); i++)
      if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
    /* save it in the flag. */
    set_Block_block_visited(b, irg_v + res);
    return res;
  }
}

/* Copies the node to the new obstack. The Ins of the new node point to
132
133
   the predecessors on the old obstack.  For block/phi nodes not all
   predecessors might be copied.  n->link points to the new node.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
134
   For Phi and Block nodes the function allocates in-arrays with an arity
Götz Lindenmaier's avatar
Götz Lindenmaier committed
135
136
   only for useful predecessors.  The arity is determined by counting
   the non-bad predecessors of the block. */
137
void
Götz Lindenmaier's avatar
Götz Lindenmaier committed
138
139
copy_node (ir_node *n, void *env) {
  ir_node *nn, *block;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
140
  int new_arity;
141

Götz Lindenmaier's avatar
Götz Lindenmaier committed
142
143
  if (get_irn_opcode(n) == iro_Block) {
    block = NULL;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
144
    new_arity = compute_new_arity(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
145
146
  } else {
    block = get_nodes_Block(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
147
148
149
150
151
    if (get_irn_opcode(n) == iro_Phi) {
      new_arity = compute_new_arity(block);
    } else {
      new_arity = get_irn_arity(n);
    }
Götz Lindenmaier's avatar
Götz Lindenmaier committed
152
153
154
155
156
  }
  nn = new_ir_node(current_ir_graph,
		   block,
		   get_irn_op(n),
		   get_irn_mode(n),
Götz Lindenmaier's avatar
Götz Lindenmaier committed
157
		   new_arity,
Götz Lindenmaier's avatar
Götz Lindenmaier committed
158
		   get_irn_in(n));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
159
160
161
  /* Copy the attributes.  These might point to additional data.  If this
     was allocated on the old obstack the pointers now are dangling.  This
     frees e.g. the memory of the graph_arr allocated in new_immBlock. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
162
163
  copy_attrs(n, nn);
  set_new_node(n, nn);
164
165
166
167

  /*  printf("\n old node: "); DDMSG2(n);
      printf(" new node: "); DDMSG2(nn); */

Christian Schäfer's avatar
Christian Schäfer committed
168
169
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
170
171
/* Copies new predecessors of old node to new node remembered in link.
   Spare the Bad predecessors of Phi and Block nodes. */
172
void
Götz Lindenmaier's avatar
Götz Lindenmaier committed
173
copy_preds (ir_node *n, void *env) {
Götz Lindenmaier's avatar
Götz Lindenmaier committed
174
  ir_node *nn, *block, *on;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
175
  int i, j;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
176
177

  nn = get_new_node(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
178

179
180
181
182
  /* printf("\n old node: "); DDMSG2(n);
     printf(" new node: "); DDMSG2(nn);
     printf(" arities: old: %d, new: %d\n", get_irn_arity(n), get_irn_arity(nn)); */

Götz Lindenmaier's avatar
Götz Lindenmaier committed
183
184
185
186
187
188
189
190
  if (get_irn_opcode(n) == iro_Block) {
    /* Don't copy Bad nodes. */
    j = 0;
    for (i = 0; i < get_irn_arity(n); i++)
      if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
	set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
	j++;
      }
191
192
    /* repair the block visited flag from above misuse. Repair it in both
       graphs so that the old one can still be used. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
193
    set_Block_block_visited(nn, 0);
194
    set_Block_block_visited(n, 0);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
195
    /* Local optimization could not merge two subsequent blocks if
196
       in array contained Bads.  Now it's possible.
197
       We don't call optimize_in_place as it requires
198
199
200
201
       that the fields in ir_graph are set properly. */
    if (get_Block_n_cfgpreds(nn) == 1
	&& get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)
      exchange(nn, get_nodes_Block(get_Block_cfgpred(nn, 0)));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
202
203
204
205
206
207
208
209
210
211
212
  } else if (get_irn_opcode(n) == iro_Phi) {
    /* Don't copy node if corresponding predecessor in block is Bad.
       The Block itself should not be Bad. */
    block = get_nodes_Block(n);
    set_irn_n (nn, -1, get_new_node(block));
    j = 0;
    for (i = 0; i < get_irn_arity(n); i++)
      if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
	set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
	j++;
      }
213
214
215
    /* If the pre walker reached this Phi after the post walker visited the
       block block_visited is > 0. */
    set_Block_block_visited(get_nodes_Block(n), 0);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
216
    /* Compacting the Phi's ins might generate Phis with only one
Götz Lindenmaier's avatar
Götz Lindenmaier committed
217
       predecessor. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
218
    if (get_irn_arity(n) == 1)
219
      exchange(n, get_irn_n(n, 0));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
220
221
222
223
  } else {
    for (i = -1; i < get_irn_arity(n); i++)
      set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
  }
224
225
226
227
  /* Now the new node is complete.  We can add it to the hash table for cse.
     @@@ inlinening aborts if we identify End. Why? */
  if(get_irn_op(nn) != op_End)
    add_identities (current_ir_graph->value_table, nn);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
228
229
}

230
/* Copies the graph recursively, compacts the keepalive of the end node. */
231
232
233
void
copy_graph () {
  ir_node *oe, *ne; /* old end, new end */
234
  ir_node *ka;      /* keep alive */
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  int i;

  oe = get_irg_end(current_ir_graph);
  /* copy the end node by hand, allocate dynamic in array! */
  ne = new_ir_node(current_ir_graph,
		   NULL,
		   op_End,
		   mode_X,
		   -1,
		   NULL);
  /* Copy the attributes.  Well, there might be some in the future... */
  copy_attrs(oe, ne);
  set_new_node(oe, ne);

  /* copy the live nodes */
  irg_walk(get_nodes_Block(oe), copy_node, copy_preds, NULL);
  /* copy_preds for the end node ... */
  set_nodes_Block(ne, get_new_node(get_nodes_Block(oe)));

  /** ... and now the keep alives. **/
  /* First pick the not marked block nodes and walk them.  We must pick these
     first as else we will oversee blocks reachable from Phis. */
  for (i = 0; i < get_irn_arity(oe); i++) {
    ka = get_irn_n(oe, i);
    if ((get_irn_op(ka) == op_Block) &&
	(get_irn_visited(ka) < get_irg_visited(current_ir_graph))) {
      /* We must keep the block alive and copy everything reachable */
      set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
      irg_walk(ka, copy_node, copy_preds, NULL);
      add_End_keepalive(ne, get_new_node(ka));
    }
  }

  /* Now pick the Phis.  Here we will keep all! */
  for (i = 0; i < get_irn_arity(oe); i++) {
    ka = get_irn_n(oe, i);
    if ((get_irn_op(ka) == op_Phi)) {
      if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) {
	/* We didn't copy the Phi yet.  */
	set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
	irg_walk(ka, copy_node, copy_preds, NULL);
      }
      add_End_keepalive(ne, get_new_node(ka));
    }
  }
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
282
/* Copies the graph reachable from current_ir_graph->end to the obstack
283
   in current_ir_graph and fixes the environment.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
284
285
286
   Then fixes the fields in current_ir_graph containing nodes of the
   graph.  */
void
287
copy_graph_env () {
Götz Lindenmaier's avatar
Götz Lindenmaier committed
288
  /* Not all nodes remembered in current_ir_graph might be reachable
289
     from the end node.  Assure their link is set to NULL, so that
Götz Lindenmaier's avatar
Götz Lindenmaier committed
290
291
292
293
294
295
296
297
298
     we can test whether new nodes have been computed. */
  set_irn_link(get_irg_frame  (current_ir_graph), NULL);
  set_irn_link(get_irg_globals(current_ir_graph), NULL);
  set_irn_link(get_irg_args   (current_ir_graph), NULL);

  /* we use the block walk flag for removing Bads from Blocks ins. */
  inc_irg_block_visited(current_ir_graph);

  /* copy the graph */
299
  copy_graph();
Götz Lindenmaier's avatar
Götz Lindenmaier committed
300
301

  /* fix the fields in current_ir_graph */
302
  free_End(get_irg_end(current_ir_graph));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
303
304
  set_irg_end        (current_ir_graph, get_new_node(get_irg_end(current_ir_graph)));
  set_irg_end_block  (current_ir_graph, get_new_node(get_irg_end_block(current_ir_graph)));
305
306
307
308
309
310
311
312
313
314
315
316
  if (get_irn_link(get_irg_frame(current_ir_graph)) == NULL) {
    copy_node (get_irg_frame(current_ir_graph), NULL);
    copy_preds(get_irg_frame(current_ir_graph), NULL);
  }
  if (get_irn_link(get_irg_globals(current_ir_graph)) == NULL) {
    copy_node (get_irg_globals(current_ir_graph), NULL);
    copy_preds(get_irg_globals(current_ir_graph), NULL);
  }
  if (get_irn_link(get_irg_args(current_ir_graph)) == NULL) {
    copy_node (get_irg_args(current_ir_graph), NULL);
    copy_preds(get_irg_args(current_ir_graph), NULL);
  }
Götz Lindenmaier's avatar
Götz Lindenmaier committed
317
  set_irg_start  (current_ir_graph, get_new_node(get_irg_start(current_ir_graph)));
318

Götz Lindenmaier's avatar
Götz Lindenmaier committed
319
320
321
322
323
324
325
326
327
328
  set_irg_start_block(current_ir_graph,
		      get_new_node(get_irg_start_block(current_ir_graph)));
  set_irg_frame  (current_ir_graph, get_new_node(get_irg_frame(current_ir_graph)));
  set_irg_globals(current_ir_graph, get_new_node(get_irg_globals(current_ir_graph)));
  set_irg_args   (current_ir_graph, get_new_node(get_irg_args(current_ir_graph)));
  if (get_irn_link(get_irg_bad(current_ir_graph)) == NULL) {
    copy_node(get_irg_bad(current_ir_graph), NULL);
    copy_preds(get_irg_bad(current_ir_graph), NULL);
  }
  set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
329
330
}

331
332
333
334
335
336
/* Copies all reachable nodes to a new obstack.  Removes bad inputs
   from block nodes and the corresponding inputs from Phi nodes.
   Merges single exit blocks with single entry blocks and removes
   1-input Phis.
   Adds all new nodes to a new hash table for cse.  Does not
   perform cse, so the hash table might contain common subexpressions. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
337
338
339
340
341
342
343
344
345
346
347
/* Amroq call this emigrate() */
void
dead_node_elimination(ir_graph *irg) {
  ir_graph *rem;
  struct obstack *graveyard_obst = NULL;
  struct obstack *rebirth_obst   = NULL;

  /* Remember external state of current_ir_graph. */
  rem = current_ir_graph;
  current_ir_graph = irg;

348
  /* Handle graph state */
349
  assert(get_irg_phase_state(current_ir_graph) != phase_building);
350
351
  free_outs(current_ir_graph);

Götz Lindenmaier's avatar
Götz Lindenmaier committed
352
353
354
355
356
357
358
359
360
361
362
  if (get_optimize() && get_opt_dead_node_elimination()) {

    /* A quiet place, where the old obstack can rest in peace,
       until it will be cremated. */
    graveyard_obst = irg->obst;

    /* A new obstack, where the reachable nodes will be copied to. */
    rebirth_obst = (struct obstack *) xmalloc (sizeof (struct obstack));
    current_ir_graph->obst = rebirth_obst;
    obstack_init (current_ir_graph->obst);

363
364
365
366
    /* We also need a new hash table for cse */
    del_identities (irg->value_table);
    irg->value_table = new_identities ();

Götz Lindenmaier's avatar
Götz Lindenmaier committed
367
    /* Copy the graph from the old to the new obstack */
368
    copy_graph_env();
Götz Lindenmaier's avatar
Götz Lindenmaier committed
369
370
371
372
373
374
375
376

    /* Free memory from old unoptimized obstack */
    obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
    xfree (graveyard_obst);           /* ... then free it.           */
  }

  current_ir_graph = rem;
}
377
378
379
380
381

/**********************************************************************/
/*  Funcionality for inlining                                         */
/**********************************************************************/

Götz Lindenmaier's avatar
Götz Lindenmaier committed
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
/* Copy node for inlineing.  Copies the node by calling copy_node and
   then updates the entity if it's a local one.  env must be a pointer
   to the frame type of the procedure. The new entities must be in
   the link field of the entities. */
void
copy_node_inline (ir_node *n, void *env) {
  ir_node *new;
  type *frame_tp = (type *)env;

  copy_node(n, NULL);
  if (get_irn_op(n) == op_Sel) {
    new = get_new_node (n);
    assert(get_irn_op(new) == op_Sel);
    if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
      set_Sel_entity(new, get_entity_link(get_Sel_entity(n)));
    }
  }
}

401
402
403
404
405
406
407
408
409
void inline_method(ir_node *call, ir_graph *called_graph) {
  ir_node *pre_call;
  ir_node *post_call, *post_bl;
  ir_node *in[5];
  ir_node *end, *end_bl;
  ir_node **res_pred;
  ir_node **cf_pred;
  ir_node *ret, *phi;
  ir_node *cf_op, *bl;
410
  int arity, n_ret, n_exc, n_res, i, j, rem_opt;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
411
  type *called_frame, *caller_frame;
412

413
414
415
416
  if (!get_optimize() || !get_opt_inline()) return;
  /** Turn off optimizations, this can cause problems when allocating new nodes. **/
  rem_opt = get_optimize();
  set_optimize(0);
417

418
  /* Handle graph state */
419
  assert(get_irg_phase_state(current_ir_graph) != phase_building);
420
421
  assert(get_irg_pinned(current_ir_graph) == pinned);
  assert(get_irg_pinned(called_graph) == pinned);
422
423
424
  if (get_irg_outs_state(current_ir_graph) == outs_consistent)
    set_irg_outs_inconsistent(current_ir_graph);

425
426
427
428
429
430
  /** Check preconditions **/
  assert(get_irn_op(call) == op_Call);
  assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
  assert(get_type_tpop(get_Call_type(call)) == type_method);
  if (called_graph == current_ir_graph) return;

431

432
  /** Part the Call node into two nodes.  Pre_call collects the parameters of
433
434
435
	  the procedure and later replaces the Start node of the called graph.
	  Post_call is the old Call node and collects the results of the called
	  graph. Both will end up being a tuple.  **/
436
437
  post_bl = get_nodes_Block(call);
  set_irg_current_block(current_ir_graph, post_bl);
438
  /* XxMxPxP of Start + parameter of Call */
439
440
441
442
  in[0] = new_Jmp();
  in[1] = get_Call_mem(call);
  in[2] = get_irg_frame(current_ir_graph);
  in[3] = get_irg_globals(current_ir_graph);
443
  in[4] = new_Tuple (get_Call_n_params(call), get_Call_param_arr(call));
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
  pre_call = new_Tuple(5, in);
  post_call = call;

  /** Part the block of the Call node into two blocks.
      The new block gets the ins of the old block, pre_call and all its
      predecessors and all Phi nodes. **/
  part_block(pre_call);

  /** Prepare state for dead node elimination **/
  /* Visited flags in calling irg must be >= flag in called irg.
     Else walker and arity computation will not work. */
  if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
    set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);   /***/
  if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
    set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
  /* Set pre_call as new Start node in link field of the start node of
460
461
462
463
     calling graph and pre_calls block as new block for the start block
     of calling graph.
     Further mark these nodes so that they are not visited by the
     copying. */
464
465
  set_irn_link(get_irg_start(called_graph), pre_call);
  set_irn_visited(get_irg_start(called_graph),
466
				  get_irg_visited(current_ir_graph));/***/
467
  set_irn_link(get_irg_start_block(called_graph),
468
			   get_nodes_Block(pre_call));
469
  set_irn_visited(get_irg_start_block(called_graph),
470
				  get_irg_visited(current_ir_graph));  /***/
471
472
473
474

  /* Initialize for compaction of in arrays */
  inc_irg_block_visited(current_ir_graph);
  /*
475
476
      set_Block_block_visited(get_irg_start_block(called_graph),
      get_irg_block_visited(current_ir_graph) +1 +1); /* count for self edge */
477
478

  /*** Replicate local entities of the called_graph ***/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
479
480
481
482
483
484
485
486
  /* copy the entities. */
  called_frame = get_irg_frame_type(called_graph);
  for (i = 0; i < get_class_n_member(called_frame); i++) {
    entity *new_ent, *old_ent;
    old_ent = get_class_member(called_frame, i);
    new_ent = copy_entity_own(old_ent, get_cur_frame_type());
    set_entity_link(old_ent, new_ent);
  }
487
488

  /* visited is > than that of called graph.  With this trick visited will
489
490
     remain unchanged so that an outer walker, e.g., searching the call nodes
     to inline, calling this inline will not visit the inlined nodes. */
491
492
493
  set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);

  /** Performing dead node elimination inlines the graph **/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
494
495
  /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
     entities. */
496
  /* @@@ endless loops are not copied!! */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
497
  irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds,
498
	   get_irg_frame_type(called_graph));
499
500
501
502
503
504
505

  /* Repair called_graph */
  set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
  set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
  set_Block_block_visited(get_irg_start_block(called_graph), 0);

  /*** Merge the end of the inlined procedure with the call site ***/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
506
507
508
509
510
  /* We will turn the old Call node into a Tuple with the following
     predecessors:
     -1:  Block of Tuple.
     0: Phi of all Memories of Return statements.
     1: Jmp from new Block that merges the control flow from all exception
511
	 predecessors of the old end block.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
512
513
514
     2: Tuple of all arguments.
     3: Phi of Exception memories.
  */
515
516
517
518
519
520
521

  /** Precompute some values **/
  end_bl = get_new_node(get_irg_end_block(called_graph));
  end = get_new_node(get_irg_end(called_graph));
  arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
  n_res = get_method_n_res(get_Call_type(call));

522
  res_pred = (ir_node **) malloc (n_res * sizeof (ir_node *));
523
524
525
526
  cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));

  set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */

527
528
529
  /** archive keepalives **/
  for (i = 0; i < get_irn_arity(end); i++)
    add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
530
531
  /* The new end node will die, but the in array is not on the obstack ... */
  free_End(end);
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566

  /** Collect control flow from Return blocks to post_calls block. Replace
      Return nodes by Jump nodes. **/
  n_ret = 0;
  for (i = 0; i < arity; i++) {
    ir_node *ret;
    ret = get_irn_n(end_bl, i);
    if (get_irn_op(ret) == op_Return) {
      cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_Block(ret));
      n_ret++;
    }
  }
  set_irn_in(post_bl, n_ret, cf_pred);

  /** Collect results from Return nodes to post_call. Post_call is
      turned into a tuple. **/
  turn_into_tuple(post_call, 4);
  /* First the Memory-Phi */
  n_ret = 0;
  for (i = 0; i < arity; i++) {
    ret = get_irn_n(end_bl, i);
    if (get_irn_op(ret) == op_Return) {
      cf_pred[n_ret] = get_Return_mem(ret);
      n_ret++;
    }
  }
  phi = new_Phi(n_ret, cf_pred, mode_M);
  set_Tuple_pred(call, 0, phi);
  set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
  set_irn_link(post_bl, phi);
  /* Now the real results */
  if (n_res > 0) {
    for (j = 0; j < n_res; j++) {
      n_ret = 0;
      for (i = 0; i < arity; i++) {
567
568
569
570
571
	ret = get_irn_n(end_bl, i);
	if (get_irn_op(ret) == op_Return) {
	  cf_pred[n_ret] = get_Return_res(ret, j);
	  n_ret++;
	}
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
      }
      phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
      res_pred[j] = phi;
      set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
      set_irn_link(post_bl, phi);
    }
    set_Tuple_pred(call, 2, new_Tuple(n_res, res_pred));
  } else {
    set_Tuple_pred(call, 2, new_Bad());
  }
  /* Finally the exception control flow.  We need to add a Phi node to
     collect the memory containing the exception objects.  Further we need
     to add another block to get a correct representation of this Phi.  To
     this block we add a Jmp that resolves into the X output of the Call
     when the Call is turned into a tuple. */
  n_exc = 0;
  for (i = 0; i < arity; i++) {
    ir_node *ret;
    ret = get_irn_n(end_bl, i);
    if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
      cf_pred[n_exc] = ret;
      n_exc++;
    }
  }
  if (n_exc > 0) {
    new_Block(n_exc, cf_pred);      /* whatch it: current_block is changed! */
    set_Tuple_pred(call, 1, new_Jmp());
    /* The Phi for the memories with the exception objects */
    n_exc = 0;
    for (i = 0; i < arity; i++) {
      ir_node *ret;
      ret = skip_Proj(get_irn_n(end_bl, i));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
604
      if (get_irn_op(ret) == op_Call) {
605
606
	cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
	n_exc++;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
607
      } else if (is_fragile_op(ret)) {
608
609
610
	/* We rely that all cfops have the memory output at the same position. */
	cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
	n_exc++;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
611
      } else if (get_irn_op(ret) == op_Raise) {
612
613
	cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
	n_exc++;
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
      }
    }
    set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
  } else {
    set_Tuple_pred(call, 1, new_Bad());
    set_Tuple_pred(call, 3, new_Bad());
  }
  free(res_pred);
  free(cf_pred);

  /*** Correct the control flow to the end node.
       If the exception control flow from the Call directly branched to the
       end block we now have the following control flow predecessor pattern:
       ProjX -> Tuple -> Jmp.
       We must remove the Jmp along with it's empty block and add Jmp's
       predecessors as predecessors of this end block. ***/
  /* find the problematic predecessor of the end block. */
  end_bl = get_irg_end_block(current_ir_graph);
  for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
    cf_op = get_Block_cfgpred(end_bl, i);
    if (get_irn_op(cf_op) == op_Proj) {
      cf_op = get_Proj_pred(cf_op);
      if (get_irn_op(cf_op) == op_Tuple) {
637
638
639
	cf_op = get_Tuple_pred(cf_op, 1);
	assert(get_irn_op(cf_op) == op_Jmp);
	break;
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
      }
    }
  }
  /* repair */
  if (i < get_Block_n_cfgpreds(end_bl)) {
    bl = get_nodes_Block(cf_op);
    arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
    cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
    for (j = 0; j < i; j++)
      cf_pred[j] = get_Block_cfgpred(end_bl, j);
    for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
      cf_pred[j] = get_Block_cfgpred(bl, j-i);
    for (j = j; j < arity; j++)
      cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
    set_irn_in(end_bl, arity, cf_pred);
    free(cf_pred);
  }
657
658

  /** Turn cse back on. **/
659
  set_optimize(rem_opt);
660
}
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739

/********************************************************************/
/* Apply inlineing to small methods.                                */
/********************************************************************/

static int pos;

/* It makes no sense to inline too many calls in one procedure. Anyways,
   I didn't get a version with NEW_ARR_F to run. */
#define MAX_INLINE 1024

static void collect_calls(ir_node *call, void *env) {
  ir_node **calls = (ir_node **)env;
  ir_node *addr;
  tarval *tv;
  ir_graph *called_irg;

  if (get_irn_op(call) != op_Call) return;

  addr = get_Call_ptr(call);
  if (get_irn_op(addr) == op_Const) {
    /* Check whether the constant is the pointer to a compiled entity. */
    tv = get_Const_tarval(addr);
    if (tv->u.p.ent) {
      called_irg = get_entity_irg(tv->u.p.ent);
      if (called_irg && pos < MAX_INLINE) {
	/* The Call node calls a locally defined method.  Remember to inline. */
	calls[pos] = call;
	pos++;
      }
    }
  }
}


/* Inlines all small methods at call sites where the called address comes
   from a Const node that references the entity representing the called
   method.
   The size argument is a rough measure for the code size of the method:
   Methods where the obstack containing the firm graph is smaller than
   size are inlined. */
void inline_small_irgs(ir_graph *irg, int size) {
  int i;
  ir_node *calls[MAX_INLINE];
  ir_graph *rem = current_ir_graph;

  if (!(get_optimize() && get_opt_inline())) return;

  /*DDME(get_irg_ent(current_ir_graph));*/

  current_ir_graph = irg;
  /* Handle graph state */
  assert(get_irg_phase_state(current_ir_graph) != phase_building);

  /* Find Call nodes to inline.
     (We can not inline during a walk of the graph, as inlineing the same
     method several times changes the visited flag of the walked graph:
     after the first inlineing visited of the callee equals visited of
     the caller.  With the next inlineing both are increased.) */
  pos = 0;
  irg_walk(get_irg_end(irg), NULL, collect_calls, (void *) calls);

  if ((pos > 0) && (pos < MAX_INLINE)) {
    /* There are calls to inline */
    collect_phiprojs(irg);
    for (i = 0; i < pos; i++) {
      tarval *tv;
      ir_graph *callee;
      tv = get_Const_tarval(get_Call_ptr(calls[i]));
      callee = get_entity_irg(tv->u.p.ent);
      if ((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) {
	/*printf(" inlineing "); DDME(tv->u.p.ent);*/
	inline_method(calls[i], callee);
      }
    }
  }

  current_ir_graph = rem;
}
Götz Lindenmaier's avatar
Götz Lindenmaier committed
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988


/********************************************************************/
/*  Code Placement.  Pinns all floating nodes to a block where they */
/*  will be executed only if needed.                                */
/********************************************************************/

static pdeq *worklist;		/* worklist of ir_node*s */

/* Find the earliest correct block for N.  --- Place N into the
   same Block as its dominance-deepest Input.  */
static void
place_floats_early (ir_node *n)
{
  int i, start;

  /* we must not run into an infinite loop */
  assert (irn_not_visited(n));
  mark_irn_visited(n);

  /* Place floating nodes. */
  if (get_op_pinned(get_irn_op(n)) == floats) {
    int depth = 0;
    ir_node *b = new_Bad();   /* The block to place this node in */

    assert(get_irn_op(n) != op_Block);

    if ((get_irn_op(n) == op_Const) ||
	(get_irn_op(n) == op_SymConst) ||
	(is_Bad(n))) {
      /* These nodes will not be placed by the loop below. */
      b = get_irg_start_block(current_ir_graph);
      depth = 1;
    }

    /* find the block for this node. */
    for (i = 0; i < get_irn_arity(n); i++) {
      ir_node *dep = get_irn_n(n, i);
      ir_node *dep_block;
      if ((irn_not_visited(dep)) &&
	  (get_op_pinned(get_irn_op(dep)) == floats)) {
	place_floats_early (dep);
      }
      /* Because all loops contain at least one pinned node, now all
         our inputs are either pinned or place_early has already
         been finished on them.  We do not have any unfinished inputs!  */
      dep_block = get_nodes_Block(dep);
      if ((!is_Bad(dep_block)) &&
	  (get_Block_dom_depth(dep_block) > depth)) {
	b = dep_block;
	depth = get_Block_dom_depth(dep_block);
      }
      /* Avoid that the node is placed in the Start block */
      if ((depth == 1) && (get_Block_dom_depth(get_nodes_Block(n)) > 1)) {
	b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
	assert(b != get_irg_start_block(current_ir_graph));
	depth = 2;
      }
    }
    set_nodes_Block(n, b);
  }

  /* Add predecessors of non floating nodes on worklist. */
  start = (get_irn_op(n) == op_Block) ? 0 : -1;
  for (i = start; i < get_irn_arity(n); i++) {
    ir_node *pred = get_irn_n(n, i);
    if (irn_not_visited(pred)) {
      pdeq_putr (worklist, pred);
    }
  }
}

/* Floating nodes form subgraphs that begin at nodes as Const, Load,
   Start, Call and end at pinned nodes as Store, Call.  Place_early
   places all floating nodes reachable from its argument through floating
   nodes and adds all beginnings at pinned nodes to the worklist. */
inline void place_early () {
  int i;
  bool del_me;

  assert(worklist);
  inc_irg_visited(current_ir_graph);

  /* this inits the worklist */
  place_floats_early (get_irg_end(current_ir_graph));

  /* Work the content of the worklist. */
  while (!pdeq_empty (worklist)) {
    ir_node *n = pdeq_getl (worklist);
    if (irn_not_visited(n)) place_floats_early (n);
  }

  set_irg_outs_inconsistent(current_ir_graph);
  current_ir_graph->pinned = pinned;
}


/* deepest common dominance ancestor of DCA and CONSUMER of PRODUCER */
static ir_node *
consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
{
  ir_node *block;

  /* Compute the latest block into which we can place a node so that it is
     before consumer. */
  if (get_irn_op(consumer) == op_Phi) {
    /* our comsumer is a Phi-node, the effective use is in all those
       blocks through which the Phi-node reaches producer */
    int i;
    ir_node *phi_block = get_nodes_Block(consumer);
    for (i = 0;  i < get_irn_arity(consumer); i++) {
      if (get_irn_n(consumer, i) == producer) {
	block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
      }
    }
  } else {
    assert(is_no_Block(consumer));
    block = get_nodes_Block(consumer);
  }

  /* Compute the deepest common ancestor of block and dca. */
  assert(block);
  if (!dca) return block;
  while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
    block = get_Block_idom(block);
  while (get_Block_dom_depth(dca) > get_Block_dom_depth(block))
    dca = get_Block_idom(dca);
  while (block != dca)
    { block = get_Block_idom(block); dca = get_Block_idom(dca); }

  return dca;
}

#if 0
/* @@@ Needs loop informations.  Will implement later interprocedural. */
static void
move_out_of_loops (ir_node *n, ir_node *dca)
{
  assert(dca);

  /* Find the region deepest in the dominator tree dominating
     dca with the least loop nesting depth, but still dominated
     by our early placement. */
  ir_node *best = dca;
  while (dca != get_nodes_Block(n)) {
    dca = get_Block_idom(dca);
    if (!dca) break; /* should we put assert(dca)? */
    if (get_Block_loop_depth(dca) < get_Block_loop_depth(best)) {
      best = dca;
    }
  }
  if (get_Block_dom_depth(best) >= get_Block_dom_depth(get_nodes_Block(n)))
    set_nodes_Block(n, best);
}
#endif

/* Find the latest legal block for N and place N into the
   `optimal' Block between the latest and earliest legal block.
   The `optimal' block is the dominance-deepest block of those
   with the least loop-nesting-depth.  This places N out of as many
   loops as possible and then makes it as controldependant as
   possible. */
static void
place_floats_late (ir_node *n)
{
  int i;

  assert (irn_not_visited(n)); /* no multiple placement */

  /* no need to place block nodes, control nodes are already placed. */
  if ((get_irn_op(n) != op_Block) && (!is_cfop(n)) && (get_irn_mode(n) != mode_X)) {
    /* Assure that our users are all placed, except the Phi-nodes.
       --- Each dataflow cycle contains at least one Phi-node.  We
       have to break the `user has to be placed before the
       producer' dependance cycle and the Phi-nodes are the
       place to do so, because we need to base our placement on the
       final region of our users, which is OK with Phi-nodes, as they
       are pinned, and they never have to be placed after a
       producer of one of their inputs in the same block anyway. */
    for (i = 0; i < get_irn_n_outs(n); i++) {
      ir_node *succ = get_irn_out(n, i);
      if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
	place_floats_late (succ);
    }

    /* We have to determine the final block of this node... except for constants. */
    if ((get_op_pinned(get_irn_op(n)) == floats) &&
	(get_irn_op(n) != op_Const) &&
	(get_irn_op(n) != op_SymConst)) {
      ir_node *dca = NULL;	/* deepest common ancestor in the
				   dominator tree of all nodes'
				   blocks depending on us; our final
				   placement has to dominate DCA. */
      for (i = 0; i < get_irn_n_outs(n); i++) {
	dca = consumer_dom_dca (dca, get_irn_out(n, i), n);
      }
      set_nodes_Block(n, dca);
#if 0
      move_out_of_loops (n, dca);
#endif
    }
  }

  mark_irn_visited(n);

  /* Add predecessors of all non-floating nodes on list. (Those of floating
     nodes are placeded already and therefore are marked.)  */
  for (i = 0; i < get_irn_n_outs(n); i++) {
    if (irn_not_visited(get_irn_out(n, i))) {
      pdeq_putr (worklist, get_irn_out(n, i));
    }
  }
}

inline void place_late() {
  assert(worklist);
  inc_irg_visited(current_ir_graph);

  /* This fills the worklist initially. */
  place_floats_late(get_irg_start_block(current_ir_graph));
  /* And now empty the worklist again... */
  while (!pdeq_empty (worklist)) {
    ir_node *n = pdeq_getl (worklist);
    if (irn_not_visited(n)) place_floats_late(n);
  }
}

void place_code(ir_graph *irg) {
  ir_graph *rem = current_ir_graph;
  current_ir_graph = irg;

  if (!(get_optimize() && get_opt_global_cse())) return;

  /* Handle graph state */
  assert(get_irg_phase_state(irg) != phase_building);
  if (get_irg_dom_state(irg) != dom_consistent)
    compute_doms(irg);

  /* Place all floating nodes as early as possible. This guarantees
     a legal code placement. */
  worklist = new_pdeq ();
  place_early();

  /* place_early invalidates the outs, place_late needs them. */
  compute_outs(irg);
  /* Now move the nodes down in the dominator tree. This reduces the
     unnecessary executions of the node. */
  place_late();

989
  set_irg_outs_inconsistent(current_ir_graph);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
990
991
992
  del_pdeq (worklist);
  current_ir_graph = rem;
}
993
994
995
996
997
998
999
1000



/********************************************************************/
/* Control flow optimization.                                       */
/* Removes Bad control flow predecessors and empty blocks.  A block */
/* is empty if it contains only a Jmp node.                         */
/* */
For faster browsing, not all history is shown. View entire blame