irgopt.c 43.3 KB
Newer Older
1
/* Coyright (C) 1998 - 2000 by Universitaet Karlsruhe
Christian Schäfer's avatar
Christian Schäfer committed
2
3
4
5
** All rights reserved.
**
** Author: Christian Schaefer
**
Götz Lindenmaier's avatar
Götz Lindenmaier committed
6
** Optimizations for a whole ir graph, i.e., a procedure.
Christian Schäfer's avatar
Christian Schäfer committed
7
8
*/

Boris Boesler's avatar
Boris Boesler committed
9
10
/* $Id$ */

Boris Boesler's avatar
added    
Boris Boesler committed
11
12
13
14
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

Götz Lindenmaier's avatar
Götz Lindenmaier committed
15
16
# include <assert.h>

17
# include "irprog.h"
Christian Schäfer's avatar
Christian Schäfer committed
18
# include "irgopt.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
19
# include "irnode_t.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
20
# include "irgraph_t.h"
21
# include "iropt_t.h"
Christian Schäfer's avatar
Christian Schäfer committed
22
23
# include "irgwalk.h"
# include "ircons.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
24
# include "misc.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
25
# include "irgmod.h"
26
# include "array.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
27
# include "pset.h"
28
# include "pdeq.h"       /* Fuer code placement */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
29
# include "irouts.h"
30
31

/* Defined in iropt.c */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
32
33
pset *new_identities (void);
void  del_identities (pset *value_table);
34
void  add_identities   (pset *value_table, ir_node *node);
35

Götz Lindenmaier's avatar
Götz Lindenmaier committed
36
37
/********************************************************************/
/* apply optimizations of iropt to all nodes.                       */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
38
/********************************************************************/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
39

40
41
42
43
void init_link (ir_node *n, void *env) {
  set_irn_link(n, NULL);
}

Christian Schäfer's avatar
Christian Schäfer committed
44
45
void
optimize_in_place_wrapper (ir_node *n, void *env) {
46
  int start, i;
Christian Schäfer's avatar
Christian Schäfer committed
47
48
  ir_node *optimized;

49
50
51
52
53
  if (get_irn_op(n) == op_Block)
    start = 0;
  else
    start = -1;

Christian Schäfer's avatar
Christian Schäfer committed
54
55
  /* optimize all sons after recursion, i.e., the sons' sons are
     optimized already. */
56
  for (i = start; i < get_irn_arity(n); i++) {
57
    optimized = optimize_in_place_2(get_irn_n(n, i));
Christian Schäfer's avatar
Christian Schäfer committed
58
    set_irn_n(n, i, optimized);
59
    assert(get_irn_op(optimized) != op_Id);
Christian Schäfer's avatar
Christian Schäfer committed
60
61
62
63
64
65
66
67
  }
}

void
local_optimize_graph (ir_graph *irg) {
  ir_graph *rem = current_ir_graph;
  current_ir_graph = irg;

68
  /* Handle graph state */
69
  assert(get_irg_phase_state(irg) != phase_building);
70
71
72
73
  if (get_opt_global_cse())
    set_irg_pinned(current_ir_graph, floats);
  if (get_irg_outs_state(current_ir_graph) == outs_consistent)
    set_irg_outs_inconsistent(current_ir_graph);
74
75
  if (get_irg_dom_state(current_ir_graph) == dom_consistent)
    set_irg_dom_inconsistent(current_ir_graph);
76
77

  /* Clean the value_table in irg for the cse. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
78
79
80
  del_identities(irg->value_table);
  irg->value_table = new_identities();

Christian Schäfer's avatar
Christian Schäfer committed
81
  /* walk over the graph */
82
  irg_walk(irg->end, init_link, optimize_in_place_wrapper, NULL);
Christian Schäfer's avatar
Christian Schäfer committed
83
84
85
86

  current_ir_graph = rem;
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
87
88
89
/********************************************************************/
/* Routines for dead node elimination / copying garbage collection  */
/* of the obstack.                                                  */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
90
/********************************************************************/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
91

Götz Lindenmaier's avatar
Götz Lindenmaier committed
92
93
/* Remeber the new node in the old node by using a field all nodes have. */
inline void
Christian Schäfer's avatar
Christian Schäfer committed
94
95
set_new_node (ir_node *old, ir_node *new)
{
Götz Lindenmaier's avatar
Götz Lindenmaier committed
96
  old->link = new;
Christian Schäfer's avatar
Christian Schäfer committed
97
98
99
}

/* Get this new node, before the old node is forgotton.*/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
100
inline ir_node *
Christian Schäfer's avatar
Christian Schäfer committed
101
102
get_new_node (ir_node * n)
{
Götz Lindenmaier's avatar
Götz Lindenmaier committed
103
104
105
  return n->link;
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
106
107
/* We use the block_visited flag to mark that we have computed the
   number of useful predecessors for this block.
108
109
110
111
   Further we encode the new arity in this flag in the old blocks.
   Remembering the arity is useful, as it saves a lot of pointer
   accesses.  This function is called for all Phi and Block nodes
   in a Block. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
inline int
compute_new_arity(ir_node *b) {
  int i, res;
  int irg_v, block_v;

  irg_v = get_irg_block_visited(current_ir_graph);
  block_v = get_Block_block_visited(b);
  if (block_v >= irg_v) {
    /* we computed the number of preds for this block and saved it in the
       block_v flag */
    return block_v - irg_v;
  } else {
    /* compute the number of good predecessors */
    res = get_irn_arity(b);
    for (i = 0; i < get_irn_arity(b); i++)
      if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
    /* save it in the flag. */
    set_Block_block_visited(b, irg_v + res);
    return res;
  }
}

/* Copies the node to the new obstack. The Ins of the new node point to
135
136
   the predecessors on the old obstack.  For block/phi nodes not all
   predecessors might be copied.  n->link points to the new node.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
137
   For Phi and Block nodes the function allocates in-arrays with an arity
Götz Lindenmaier's avatar
Götz Lindenmaier committed
138
139
   only for useful predecessors.  The arity is determined by counting
   the non-bad predecessors of the block. */
140
void
Götz Lindenmaier's avatar
Götz Lindenmaier committed
141
142
copy_node (ir_node *n, void *env) {
  ir_node *nn, *block;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
143
  int new_arity;
144

Götz Lindenmaier's avatar
Götz Lindenmaier committed
145
146
  if (get_irn_opcode(n) == iro_Block) {
    block = NULL;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
147
    new_arity = compute_new_arity(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
148
149
  } else {
    block = get_nodes_Block(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
150
151
152
153
154
    if (get_irn_opcode(n) == iro_Phi) {
      new_arity = compute_new_arity(block);
    } else {
      new_arity = get_irn_arity(n);
    }
Götz Lindenmaier's avatar
Götz Lindenmaier committed
155
156
157
158
159
  }
  nn = new_ir_node(current_ir_graph,
		   block,
		   get_irn_op(n),
		   get_irn_mode(n),
Götz Lindenmaier's avatar
Götz Lindenmaier committed
160
		   new_arity,
Götz Lindenmaier's avatar
Götz Lindenmaier committed
161
		   get_irn_in(n));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
162
163
164
  /* Copy the attributes.  These might point to additional data.  If this
     was allocated on the old obstack the pointers now are dangling.  This
     frees e.g. the memory of the graph_arr allocated in new_immBlock. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
165
166
  copy_attrs(n, nn);
  set_new_node(n, nn);
167
168
169
170

  /*  printf("\n old node: "); DDMSG2(n);
      printf(" new node: "); DDMSG2(nn); */

Christian Schäfer's avatar
Christian Schäfer committed
171
172
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
173
174
/* Copies new predecessors of old node to new node remembered in link.
   Spare the Bad predecessors of Phi and Block nodes. */
175
void
Götz Lindenmaier's avatar
Götz Lindenmaier committed
176
copy_preds (ir_node *n, void *env) {
Götz Lindenmaier's avatar
Götz Lindenmaier committed
177
  ir_node *nn, *block, *on;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
178
  int i, j;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
179
180

  nn = get_new_node(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
181

182
183
184
185
  /* printf("\n old node: "); DDMSG2(n);
     printf(" new node: "); DDMSG2(nn);
     printf(" arities: old: %d, new: %d\n", get_irn_arity(n), get_irn_arity(nn)); */

Götz Lindenmaier's avatar
Götz Lindenmaier committed
186
187
188
189
190
191
192
193
  if (get_irn_opcode(n) == iro_Block) {
    /* Don't copy Bad nodes. */
    j = 0;
    for (i = 0; i < get_irn_arity(n); i++)
      if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
	set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
	j++;
      }
194
195
    /* repair the block visited flag from above misuse. Repair it in both
       graphs so that the old one can still be used. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
196
    set_Block_block_visited(nn, 0);
197
    set_Block_block_visited(n, 0);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
198
    /* Local optimization could not merge two subsequent blocks if
199
       in array contained Bads.  Now it's possible.
200
       We don't call optimize_in_place as it requires
201
202
203
204
       that the fields in ir_graph are set properly. */
    if (get_Block_n_cfgpreds(nn) == 1
	&& get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)
      exchange(nn, get_nodes_Block(get_Block_cfgpred(nn, 0)));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
205
206
207
208
209
210
211
212
213
214
215
  } else if (get_irn_opcode(n) == iro_Phi) {
    /* Don't copy node if corresponding predecessor in block is Bad.
       The Block itself should not be Bad. */
    block = get_nodes_Block(n);
    set_irn_n (nn, -1, get_new_node(block));
    j = 0;
    for (i = 0; i < get_irn_arity(n); i++)
      if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
	set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
	j++;
      }
216
217
218
    /* If the pre walker reached this Phi after the post walker visited the
       block block_visited is > 0. */
    set_Block_block_visited(get_nodes_Block(n), 0);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
219
    /* Compacting the Phi's ins might generate Phis with only one
Götz Lindenmaier's avatar
Götz Lindenmaier committed
220
       predecessor. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
221
    if (get_irn_arity(n) == 1)
222
      exchange(n, get_irn_n(n, 0));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
223
224
225
226
  } else {
    for (i = -1; i < get_irn_arity(n); i++)
      set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
  }
227
228
229
230
  /* Now the new node is complete.  We can add it to the hash table for cse.
     @@@ inlinening aborts if we identify End. Why? */
  if(get_irn_op(nn) != op_End)
    add_identities (current_ir_graph->value_table, nn);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
231
232
}

233
/* Copies the graph recursively, compacts the keepalive of the end node. */
234
235
236
void
copy_graph () {
  ir_node *oe, *ne; /* old end, new end */
237
  ir_node *ka;      /* keep alive */
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
  int i;

  oe = get_irg_end(current_ir_graph);
  /* copy the end node by hand, allocate dynamic in array! */
  ne = new_ir_node(current_ir_graph,
		   NULL,
		   op_End,
		   mode_X,
		   -1,
		   NULL);
  /* Copy the attributes.  Well, there might be some in the future... */
  copy_attrs(oe, ne);
  set_new_node(oe, ne);

  /* copy the live nodes */
  irg_walk(get_nodes_Block(oe), copy_node, copy_preds, NULL);
  /* copy_preds for the end node ... */
  set_nodes_Block(ne, get_new_node(get_nodes_Block(oe)));

  /** ... and now the keep alives. **/
  /* First pick the not marked block nodes and walk them.  We must pick these
     first as else we will oversee blocks reachable from Phis. */
  for (i = 0; i < get_irn_arity(oe); i++) {
    ka = get_irn_n(oe, i);
    if ((get_irn_op(ka) == op_Block) &&
	(get_irn_visited(ka) < get_irg_visited(current_ir_graph))) {
      /* We must keep the block alive and copy everything reachable */
      set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
      irg_walk(ka, copy_node, copy_preds, NULL);
      add_End_keepalive(ne, get_new_node(ka));
    }
  }

  /* Now pick the Phis.  Here we will keep all! */
  for (i = 0; i < get_irn_arity(oe); i++) {
    ka = get_irn_n(oe, i);
    if ((get_irn_op(ka) == op_Phi)) {
      if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) {
	/* We didn't copy the Phi yet.  */
	set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
	irg_walk(ka, copy_node, copy_preds, NULL);
      }
      add_End_keepalive(ne, get_new_node(ka));
    }
  }
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
285
/* Copies the graph reachable from current_ir_graph->end to the obstack
286
   in current_ir_graph and fixes the environment.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
287
288
289
   Then fixes the fields in current_ir_graph containing nodes of the
   graph.  */
void
290
copy_graph_env () {
Götz Lindenmaier's avatar
Götz Lindenmaier committed
291
  /* Not all nodes remembered in current_ir_graph might be reachable
292
     from the end node.  Assure their link is set to NULL, so that
Götz Lindenmaier's avatar
Götz Lindenmaier committed
293
294
295
296
297
298
299
300
301
     we can test whether new nodes have been computed. */
  set_irn_link(get_irg_frame  (current_ir_graph), NULL);
  set_irn_link(get_irg_globals(current_ir_graph), NULL);
  set_irn_link(get_irg_args   (current_ir_graph), NULL);

  /* we use the block walk flag for removing Bads from Blocks ins. */
  inc_irg_block_visited(current_ir_graph);

  /* copy the graph */
302
  copy_graph();
Götz Lindenmaier's avatar
Götz Lindenmaier committed
303
304

  /* fix the fields in current_ir_graph */
305
  free_End(get_irg_end(current_ir_graph));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
306
307
  set_irg_end        (current_ir_graph, get_new_node(get_irg_end(current_ir_graph)));
  set_irg_end_block  (current_ir_graph, get_new_node(get_irg_end_block(current_ir_graph)));
308
309
310
311
312
313
314
315
316
317
318
319
  if (get_irn_link(get_irg_frame(current_ir_graph)) == NULL) {
    copy_node (get_irg_frame(current_ir_graph), NULL);
    copy_preds(get_irg_frame(current_ir_graph), NULL);
  }
  if (get_irn_link(get_irg_globals(current_ir_graph)) == NULL) {
    copy_node (get_irg_globals(current_ir_graph), NULL);
    copy_preds(get_irg_globals(current_ir_graph), NULL);
  }
  if (get_irn_link(get_irg_args(current_ir_graph)) == NULL) {
    copy_node (get_irg_args(current_ir_graph), NULL);
    copy_preds(get_irg_args(current_ir_graph), NULL);
  }
Götz Lindenmaier's avatar
Götz Lindenmaier committed
320
  set_irg_start  (current_ir_graph, get_new_node(get_irg_start(current_ir_graph)));
321

Götz Lindenmaier's avatar
Götz Lindenmaier committed
322
323
324
325
326
327
328
329
330
331
  set_irg_start_block(current_ir_graph,
		      get_new_node(get_irg_start_block(current_ir_graph)));
  set_irg_frame  (current_ir_graph, get_new_node(get_irg_frame(current_ir_graph)));
  set_irg_globals(current_ir_graph, get_new_node(get_irg_globals(current_ir_graph)));
  set_irg_args   (current_ir_graph, get_new_node(get_irg_args(current_ir_graph)));
  if (get_irn_link(get_irg_bad(current_ir_graph)) == NULL) {
    copy_node(get_irg_bad(current_ir_graph), NULL);
    copy_preds(get_irg_bad(current_ir_graph), NULL);
  }
  set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
332
333
}

334
335
336
337
338
339
/* Copies all reachable nodes to a new obstack.  Removes bad inputs
   from block nodes and the corresponding inputs from Phi nodes.
   Merges single exit blocks with single entry blocks and removes
   1-input Phis.
   Adds all new nodes to a new hash table for cse.  Does not
   perform cse, so the hash table might contain common subexpressions. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
340
341
342
343
344
345
346
347
348
349
350
/* Amroq call this emigrate() */
void
dead_node_elimination(ir_graph *irg) {
  ir_graph *rem;
  struct obstack *graveyard_obst = NULL;
  struct obstack *rebirth_obst   = NULL;

  /* Remember external state of current_ir_graph. */
  rem = current_ir_graph;
  current_ir_graph = irg;

351
  /* Handle graph state */
352
  assert(get_irg_phase_state(current_ir_graph) != phase_building);
353
354
  free_outs(current_ir_graph);

Götz Lindenmaier's avatar
Götz Lindenmaier committed
355
356
357
358
359
360
361
362
363
364
365
  if (get_optimize() && get_opt_dead_node_elimination()) {

    /* A quiet place, where the old obstack can rest in peace,
       until it will be cremated. */
    graveyard_obst = irg->obst;

    /* A new obstack, where the reachable nodes will be copied to. */
    rebirth_obst = (struct obstack *) xmalloc (sizeof (struct obstack));
    current_ir_graph->obst = rebirth_obst;
    obstack_init (current_ir_graph->obst);

366
367
368
369
    /* We also need a new hash table for cse */
    del_identities (irg->value_table);
    irg->value_table = new_identities ();

Götz Lindenmaier's avatar
Götz Lindenmaier committed
370
    /* Copy the graph from the old to the new obstack */
371
    copy_graph_env();
Götz Lindenmaier's avatar
Götz Lindenmaier committed
372
373
374
375
376
377
378
379

    /* Free memory from old unoptimized obstack */
    obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
    xfree (graveyard_obst);           /* ... then free it.           */
  }

  current_ir_graph = rem;
}
380
381
382
383
384

/**********************************************************************/
/*  Funcionality for inlining                                         */
/**********************************************************************/

Götz Lindenmaier's avatar
Götz Lindenmaier committed
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
/* Copy node for inlineing.  Copies the node by calling copy_node and
   then updates the entity if it's a local one.  env must be a pointer
   to the frame type of the procedure. The new entities must be in
   the link field of the entities. */
void
copy_node_inline (ir_node *n, void *env) {
  ir_node *new;
  type *frame_tp = (type *)env;

  copy_node(n, NULL);
  if (get_irn_op(n) == op_Sel) {
    new = get_new_node (n);
    assert(get_irn_op(new) == op_Sel);
    if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
      set_Sel_entity(new, get_entity_link(get_Sel_entity(n)));
    }
  }
}

404
405
406
407
408
409
410
411
412
void inline_method(ir_node *call, ir_graph *called_graph) {
  ir_node *pre_call;
  ir_node *post_call, *post_bl;
  ir_node *in[5];
  ir_node *end, *end_bl;
  ir_node **res_pred;
  ir_node **cf_pred;
  ir_node *ret, *phi;
  ir_node *cf_op, *bl;
413
  int arity, n_ret, n_exc, n_res, i, j, rem_opt;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
414
  type *called_frame, *caller_frame;
415
416
417

  if (!get_opt_inline()) return;

418
  /* Handle graph state */
419
  assert(get_irg_phase_state(current_ir_graph) != phase_building);
420
421
  assert(get_irg_pinned(current_ir_graph) == pinned);
  assert(get_irg_pinned(called_graph) == pinned);
422
423
424
  if (get_irg_outs_state(current_ir_graph) == outs_consistent)
    set_irg_outs_inconsistent(current_ir_graph);

425
426
427
428
429
430
  /** Check preconditions **/
  assert(get_irn_op(call) == op_Call);
  assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
  assert(get_type_tpop(get_Call_type(call)) == type_method);
  if (called_graph == current_ir_graph) return;

431
432
433
434
  /** Turn off cse, this can cause problems when allocating new nodes. **/
  rem_opt = get_opt_cse();
  set_opt_cse(0);

435
  /** Part the Call node into two nodes.  Pre_call collects the parameters of
436
437
438
	  the procedure and later replaces the Start node of the called graph.
	  Post_call is the old Call node and collects the results of the called
	  graph. Both will end up being a tuple.  **/
439
440
  post_bl = get_nodes_Block(call);
  set_irg_current_block(current_ir_graph, post_bl);
441
  /* XxMxPxP of Start + parameter of Call */
442
443
444
445
446
  in[0] = new_Jmp();
  in[1] = get_Call_mem(call);
  in[2] = get_irg_frame(current_ir_graph);
  in[3] = get_irg_globals(current_ir_graph);
  in[4] = new_Tuple (get_Call_n_params(call),
447
					 get_Call_param_arr(call));
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
  pre_call = new_Tuple(5, in);
  post_call = call;

  /** Part the block of the Call node into two blocks.
      The new block gets the ins of the old block, pre_call and all its
      predecessors and all Phi nodes. **/
  part_block(pre_call);

  /** Prepare state for dead node elimination **/
  /* Visited flags in calling irg must be >= flag in called irg.
     Else walker and arity computation will not work. */
  if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
    set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);   /***/
  if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
    set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
  /* Set pre_call as new Start node in link field of the start node of
464
465
466
467
	 calling graph and pre_calls block as new block for the start block
	 of calling graph.
	 Further mark these nodes so that they are not visited by the
	 copying. */
468
469
  set_irn_link(get_irg_start(called_graph), pre_call);
  set_irn_visited(get_irg_start(called_graph),
470
				  get_irg_visited(current_ir_graph));/***/
471
  set_irn_link(get_irg_start_block(called_graph),
472
			   get_nodes_Block(pre_call));
473
  set_irn_visited(get_irg_start_block(called_graph),
474
				  get_irg_visited(current_ir_graph));  /***/
475
476
477
478

  /* Initialize for compaction of in arrays */
  inc_irg_block_visited(current_ir_graph);
  /*
479
480
	  set_Block_block_visited(get_irg_start_block(called_graph),
	  get_irg_block_visited(current_ir_graph) +1 +1); /* count for self edge */
481
482

  /*** Replicate local entities of the called_graph ***/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
483
484
485
486
487
488
489
490
  /* copy the entities. */
  called_frame = get_irg_frame_type(called_graph);
  for (i = 0; i < get_class_n_member(called_frame); i++) {
    entity *new_ent, *old_ent;
    old_ent = get_class_member(called_frame, i);
    new_ent = copy_entity_own(old_ent, get_cur_frame_type());
    set_entity_link(old_ent, new_ent);
  }
491
492

  /* visited is > than that of called graph.  With this trick visited will
493
494
     remain unchanged so that an outer walker, e.g., searching the call nodes
     to inline, calling this inline will not visit the inlined nodes. */
495
496
497
  set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);

  /** Performing dead node elimination inlines the graph **/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
498
499
  /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
     entities. */
500
  /* @@@ endless loops are not copied!! */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
501
  irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds,
502
		   get_irg_frame_type(called_graph));
503
504
505
506
507
508
509

  /* Repair called_graph */
  set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
  set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
  set_Block_block_visited(get_irg_start_block(called_graph), 0);

  /*** Merge the end of the inlined procedure with the call site ***/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
510
511
512
513
514
  /* We will turn the old Call node into a Tuple with the following
     predecessors:
     -1:  Block of Tuple.
     0: Phi of all Memories of Return statements.
     1: Jmp from new Block that merges the control flow from all exception
515
	 predecessors of the old end block.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
516
517
518
     2: Tuple of all arguments.
     3: Phi of Exception memories.
  */
519
520
521
522
523
524
525

  /** Precompute some values **/
  end_bl = get_new_node(get_irg_end_block(called_graph));
  end = get_new_node(get_irg_end(called_graph));
  arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
  n_res = get_method_n_res(get_Call_type(call));

526
  res_pred = (ir_node **) malloc (n_res * sizeof (ir_node *));
527
528
529
530
  cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));

  set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */

531
532
533
  /** archive keepalives **/
  for (i = 0; i < get_irn_arity(end); i++)
    add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
534
535
  /* The new end node will die, but the in array is not on the obstack ... */
  free_End(end);
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570

  /** Collect control flow from Return blocks to post_calls block. Replace
      Return nodes by Jump nodes. **/
  n_ret = 0;
  for (i = 0; i < arity; i++) {
    ir_node *ret;
    ret = get_irn_n(end_bl, i);
    if (get_irn_op(ret) == op_Return) {
      cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_Block(ret));
      n_ret++;
    }
  }
  set_irn_in(post_bl, n_ret, cf_pred);

  /** Collect results from Return nodes to post_call. Post_call is
      turned into a tuple. **/
  turn_into_tuple(post_call, 4);
  /* First the Memory-Phi */
  n_ret = 0;
  for (i = 0; i < arity; i++) {
    ret = get_irn_n(end_bl, i);
    if (get_irn_op(ret) == op_Return) {
      cf_pred[n_ret] = get_Return_mem(ret);
      n_ret++;
    }
  }
  phi = new_Phi(n_ret, cf_pred, mode_M);
  set_Tuple_pred(call, 0, phi);
  set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
  set_irn_link(post_bl, phi);
  /* Now the real results */
  if (n_res > 0) {
    for (j = 0; j < n_res; j++) {
      n_ret = 0;
      for (i = 0; i < arity; i++) {
571
572
573
574
575
		ret = get_irn_n(end_bl, i);
		if (get_irn_op(ret) == op_Return) {
		  cf_pred[n_ret] = get_Return_res(ret, j);
		  n_ret++;
		}
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
      }
      phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
      res_pred[j] = phi;
      set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
      set_irn_link(post_bl, phi);
    }
    set_Tuple_pred(call, 2, new_Tuple(n_res, res_pred));
  } else {
    set_Tuple_pred(call, 2, new_Bad());
  }
  /* Finally the exception control flow.  We need to add a Phi node to
     collect the memory containing the exception objects.  Further we need
     to add another block to get a correct representation of this Phi.  To
     this block we add a Jmp that resolves into the X output of the Call
     when the Call is turned into a tuple. */
  n_exc = 0;
  for (i = 0; i < arity; i++) {
    ir_node *ret;
    ret = get_irn_n(end_bl, i);
    if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
      cf_pred[n_exc] = ret;
      n_exc++;
    }
  }
  if (n_exc > 0) {
    new_Block(n_exc, cf_pred);      /* whatch it: current_block is changed! */
    set_Tuple_pred(call, 1, new_Jmp());
    /* The Phi for the memories with the exception objects */
    n_exc = 0;
    for (i = 0; i < arity; i++) {
      ir_node *ret;
      ret = skip_Proj(get_irn_n(end_bl, i));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
608
      if (get_irn_op(ret) == op_Call) {
609
610
		cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
		n_exc++;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
611
      } else if (is_fragile_op(ret)) {
612
613
614
		/* We rely that all cfops have the memory output at the same position. */
		cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
		n_exc++;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
615
      } else if (get_irn_op(ret) == op_Raise) {
616
617
		cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
		n_exc++;
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
      }
    }
    set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
  } else {
    set_Tuple_pred(call, 1, new_Bad());
    set_Tuple_pred(call, 3, new_Bad());
  }
  free(res_pred);
  free(cf_pred);

  /*** Correct the control flow to the end node.
       If the exception control flow from the Call directly branched to the
       end block we now have the following control flow predecessor pattern:
       ProjX -> Tuple -> Jmp.
       We must remove the Jmp along with it's empty block and add Jmp's
       predecessors as predecessors of this end block. ***/
  /* find the problematic predecessor of the end block. */
  end_bl = get_irg_end_block(current_ir_graph);
  for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
    cf_op = get_Block_cfgpred(end_bl, i);
    if (get_irn_op(cf_op) == op_Proj) {
      cf_op = get_Proj_pred(cf_op);
      if (get_irn_op(cf_op) == op_Tuple) {
641
642
643
		cf_op = get_Tuple_pred(cf_op, 1);
		assert(get_irn_op(cf_op) == op_Jmp);
		break;
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
      }
    }
  }
  /* repair */
  if (i < get_Block_n_cfgpreds(end_bl)) {
    bl = get_nodes_Block(cf_op);
    arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
    cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
    for (j = 0; j < i; j++)
      cf_pred[j] = get_Block_cfgpred(end_bl, j);
    for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
      cf_pred[j] = get_Block_cfgpred(bl, j-i);
    for (j = j; j < arity; j++)
      cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
    set_irn_in(end_bl, arity, cf_pred);
    free(cf_pred);
  }
661
662
663

  /** Turn cse back on. **/
  set_opt_cse(rem_opt);
664
}
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743

/********************************************************************/
/* Apply inlineing to small methods.                                */
/********************************************************************/

static int pos;

/* It makes no sense to inline too many calls in one procedure. Anyways,
   I didn't get a version with NEW_ARR_F to run. */
#define MAX_INLINE 1024

static void collect_calls(ir_node *call, void *env) {
  ir_node **calls = (ir_node **)env;
  ir_node *addr;
  tarval *tv;
  ir_graph *called_irg;

  if (get_irn_op(call) != op_Call) return;

  addr = get_Call_ptr(call);
  if (get_irn_op(addr) == op_Const) {
    /* Check whether the constant is the pointer to a compiled entity. */
    tv = get_Const_tarval(addr);
    if (tv->u.p.ent) {
      called_irg = get_entity_irg(tv->u.p.ent);
      if (called_irg && pos < MAX_INLINE) {
	/* The Call node calls a locally defined method.  Remember to inline. */
	calls[pos] = call;
	pos++;
      }
    }
  }
}


/* Inlines all small methods at call sites where the called address comes
   from a Const node that references the entity representing the called
   method.
   The size argument is a rough measure for the code size of the method:
   Methods where the obstack containing the firm graph is smaller than
   size are inlined. */
void inline_small_irgs(ir_graph *irg, int size) {
  int i;
  ir_node *calls[MAX_INLINE];
  ir_graph *rem = current_ir_graph;

  if (!(get_optimize() && get_opt_inline())) return;

  /*DDME(get_irg_ent(current_ir_graph));*/

  current_ir_graph = irg;
  /* Handle graph state */
  assert(get_irg_phase_state(current_ir_graph) != phase_building);

  /* Find Call nodes to inline.
     (We can not inline during a walk of the graph, as inlineing the same
     method several times changes the visited flag of the walked graph:
     after the first inlineing visited of the callee equals visited of
     the caller.  With the next inlineing both are increased.) */
  pos = 0;
  irg_walk(get_irg_end(irg), NULL, collect_calls, (void *) calls);

  if ((pos > 0) && (pos < MAX_INLINE)) {
    /* There are calls to inline */
    collect_phiprojs(irg);
    for (i = 0; i < pos; i++) {
      tarval *tv;
      ir_graph *callee;
      tv = get_Const_tarval(get_Call_ptr(calls[i]));
      callee = get_entity_irg(tv->u.p.ent);
      if ((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) {
	/*printf(" inlineing "); DDME(tv->u.p.ent);*/
	inline_method(calls[i], callee);
      }
    }
  }

  current_ir_graph = rem;
}
Götz Lindenmaier's avatar
Götz Lindenmaier committed
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992


/********************************************************************/
/*  Code Placement.  Pinns all floating nodes to a block where they */
/*  will be executed only if needed.                                */
/********************************************************************/

static pdeq *worklist;		/* worklist of ir_node*s */

/* Find the earliest correct block for N.  --- Place N into the
   same Block as its dominance-deepest Input.  */
static void
place_floats_early (ir_node *n)
{
  int i, start;

  /* we must not run into an infinite loop */
  assert (irn_not_visited(n));
  mark_irn_visited(n);

  /* Place floating nodes. */
  if (get_op_pinned(get_irn_op(n)) == floats) {
    int depth = 0;
    ir_node *b = new_Bad();   /* The block to place this node in */

    assert(get_irn_op(n) != op_Block);

    if ((get_irn_op(n) == op_Const) ||
	(get_irn_op(n) == op_SymConst) ||
	(is_Bad(n))) {
      /* These nodes will not be placed by the loop below. */
      b = get_irg_start_block(current_ir_graph);
      depth = 1;
    }

    /* find the block for this node. */
    for (i = 0; i < get_irn_arity(n); i++) {
      ir_node *dep = get_irn_n(n, i);
      ir_node *dep_block;
      if ((irn_not_visited(dep)) &&
	  (get_op_pinned(get_irn_op(dep)) == floats)) {
	place_floats_early (dep);
      }
      /* Because all loops contain at least one pinned node, now all
         our inputs are either pinned or place_early has already
         been finished on them.  We do not have any unfinished inputs!  */
      dep_block = get_nodes_Block(dep);
      if ((!is_Bad(dep_block)) &&
	  (get_Block_dom_depth(dep_block) > depth)) {
	b = dep_block;
	depth = get_Block_dom_depth(dep_block);
      }
      /* Avoid that the node is placed in the Start block */
      if ((depth == 1) && (get_Block_dom_depth(get_nodes_Block(n)) > 1)) {
	b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
	assert(b != get_irg_start_block(current_ir_graph));
	depth = 2;
      }
    }
    set_nodes_Block(n, b);
  }

  /* Add predecessors of non floating nodes on worklist. */
  start = (get_irn_op(n) == op_Block) ? 0 : -1;
  for (i = start; i < get_irn_arity(n); i++) {
    ir_node *pred = get_irn_n(n, i);
    if (irn_not_visited(pred)) {
      pdeq_putr (worklist, pred);
    }
  }
}

/* Floating nodes form subgraphs that begin at nodes as Const, Load,
   Start, Call and end at pinned nodes as Store, Call.  Place_early
   places all floating nodes reachable from its argument through floating
   nodes and adds all beginnings at pinned nodes to the worklist. */
inline void place_early () {
  int i;
  bool del_me;

  assert(worklist);
  inc_irg_visited(current_ir_graph);

  /* this inits the worklist */
  place_floats_early (get_irg_end(current_ir_graph));

  /* Work the content of the worklist. */
  while (!pdeq_empty (worklist)) {
    ir_node *n = pdeq_getl (worklist);
    if (irn_not_visited(n)) place_floats_early (n);
  }

  set_irg_outs_inconsistent(current_ir_graph);
  current_ir_graph->pinned = pinned;
}


/* deepest common dominance ancestor of DCA and CONSUMER of PRODUCER */
static ir_node *
consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
{
  ir_node *block;

  /* Compute the latest block into which we can place a node so that it is
     before consumer. */
  if (get_irn_op(consumer) == op_Phi) {
    /* our comsumer is a Phi-node, the effective use is in all those
       blocks through which the Phi-node reaches producer */
    int i;
    ir_node *phi_block = get_nodes_Block(consumer);
    for (i = 0;  i < get_irn_arity(consumer); i++) {
      if (get_irn_n(consumer, i) == producer) {
	block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
      }
    }
  } else {
    assert(is_no_Block(consumer));
    block = get_nodes_Block(consumer);
  }

  /* Compute the deepest common ancestor of block and dca. */
  assert(block);
  if (!dca) return block;
  while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
    block = get_Block_idom(block);
  while (get_Block_dom_depth(dca) > get_Block_dom_depth(block))
    dca = get_Block_idom(dca);
  while (block != dca)
    { block = get_Block_idom(block); dca = get_Block_idom(dca); }

  return dca;
}

#if 0
/* @@@ Needs loop informations.  Will implement later interprocedural. */
static void
move_out_of_loops (ir_node *n, ir_node *dca)
{
  assert(dca);

  /* Find the region deepest in the dominator tree dominating
     dca with the least loop nesting depth, but still dominated
     by our early placement. */
  ir_node *best = dca;
  while (dca != get_nodes_Block(n)) {
    dca = get_Block_idom(dca);
    if (!dca) break; /* should we put assert(dca)? */
    if (get_Block_loop_depth(dca) < get_Block_loop_depth(best)) {
      best = dca;
    }
  }
  if (get_Block_dom_depth(best) >= get_Block_dom_depth(get_nodes_Block(n)))
    set_nodes_Block(n, best);
}
#endif

/* Find the latest legal block for N and place N into the
   `optimal' Block between the latest and earliest legal block.
   The `optimal' block is the dominance-deepest block of those
   with the least loop-nesting-depth.  This places N out of as many
   loops as possible and then makes it as controldependant as
   possible. */
static void
place_floats_late (ir_node *n)
{
  int i;

  assert (irn_not_visited(n)); /* no multiple placement */

  /* no need to place block nodes, control nodes are already placed. */
  if ((get_irn_op(n) != op_Block) && (!is_cfop(n)) && (get_irn_mode(n) != mode_X)) {
    /* Assure that our users are all placed, except the Phi-nodes.
       --- Each dataflow cycle contains at least one Phi-node.  We
       have to break the `user has to be placed before the
       producer' dependance cycle and the Phi-nodes are the
       place to do so, because we need to base our placement on the
       final region of our users, which is OK with Phi-nodes, as they
       are pinned, and they never have to be placed after a
       producer of one of their inputs in the same block anyway. */
    for (i = 0; i < get_irn_n_outs(n); i++) {
      ir_node *succ = get_irn_out(n, i);
      if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
	place_floats_late (succ);
    }

    /* We have to determine the final block of this node... except for constants. */
    if ((get_op_pinned(get_irn_op(n)) == floats) &&
	(get_irn_op(n) != op_Const) &&
	(get_irn_op(n) != op_SymConst)) {
      ir_node *dca = NULL;	/* deepest common ancestor in the
				   dominator tree of all nodes'
				   blocks depending on us; our final
				   placement has to dominate DCA. */
      for (i = 0; i < get_irn_n_outs(n); i++) {
	dca = consumer_dom_dca (dca, get_irn_out(n, i), n);
      }
      set_nodes_Block(n, dca);
#if 0
      move_out_of_loops (n, dca);
#endif
    }
  }

  mark_irn_visited(n);

  /* Add predecessors of all non-floating nodes on list. (Those of floating
     nodes are placeded already and therefore are marked.)  */
  for (i = 0; i < get_irn_n_outs(n); i++) {
    if (irn_not_visited(get_irn_out(n, i))) {
      pdeq_putr (worklist, get_irn_out(n, i));
    }
  }
}

inline void place_late() {
  assert(worklist);
  inc_irg_visited(current_ir_graph);

  /* This fills the worklist initially. */
  place_floats_late(get_irg_start_block(current_ir_graph));
  /* And now empty the worklist again... */
  while (!pdeq_empty (worklist)) {
    ir_node *n = pdeq_getl (worklist);
    if (irn_not_visited(n)) place_floats_late(n);
  }
}

void place_code(ir_graph *irg) {
  ir_graph *rem = current_ir_graph;
  current_ir_graph = irg;

  if (!(get_optimize() && get_opt_global_cse())) return;

  /* Handle graph state */
  assert(get_irg_phase_state(irg) != phase_building);
  if (get_irg_dom_state(irg) != dom_consistent)
    compute_doms(irg);

  /* Place all floating nodes as early as possible. This guarantees
     a legal code placement. */
  worklist = new_pdeq ();
  place_early();

  /* place_early invalidates the outs, place_late needs them. */
  compute_outs(irg);
  /* Now move the nodes down in the dominator tree. This reduces the
     unnecessary executions of the node. */
  place_late();

993
  set_irg_outs_inconsistent(current_ir_graph);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
994
995
996
  del_pdeq (worklist);
  current_ir_graph = rem;
}
997
998
999
1000



/********************************************************************/
For faster browsing, not all history is shown. View entire blame