irgopt.c 33.6 KB
Newer Older
1
/* Coyright (C) 1998 - 2000 by Universitaet Karlsruhe
Christian Schäfer's avatar
Christian Schäfer committed
2
3
4
5
** All rights reserved.
**
** Author: Christian Schaefer
**
Götz Lindenmaier's avatar
Götz Lindenmaier committed
6
** Optimizations for a whole ir graph, i.e., a procedure.
Christian Schäfer's avatar
Christian Schäfer committed
7
8
*/

Boris Boesler's avatar
Boris Boesler committed
9
10
/* $Id$ */

Boris Boesler's avatar
added    
Boris Boesler committed
11
12
13
14
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

Götz Lindenmaier's avatar
Götz Lindenmaier committed
15
16
# include <assert.h>

17
# include "irprog.h"
Christian Schäfer's avatar
Christian Schäfer committed
18
# include "irgopt.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
19
# include "irnode_t.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
20
# include "irgraph_t.h"
21
# include "iropt_t.h"
Christian Schäfer's avatar
Christian Schäfer committed
22
23
# include "irgwalk.h"
# include "ircons.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
24
# include "misc.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
25
# include "irgmod.h"
26
# include "array.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
27
# include "pset.h"
Götz Lindenmaier's avatar
Götz Lindenmaier committed
28
29
# include "pdeq.h"  /* provisorisch fuer code placement */
# include "irouts.h"
30
31

/* Defined in iropt.c */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
32
33
pset *new_identities (void);
void  del_identities (pset *value_table);
34
void  add_identities   (pset *value_table, ir_node *node);
35

36
37
#if 0  /* Warum ist das hier nochmal definiert?
	  Hat's nicht gelinkt wegen typeO tities - tity ?? */
38
39
40
41
42
/* To fill the hash table */
void
add_identity (pset *value_table, ir_node *n) {
  /* identify_remember (value_table, n);*/
}
43
#endif
Götz Lindenmaier's avatar
Götz Lindenmaier committed
44

Götz Lindenmaier's avatar
Götz Lindenmaier committed
45
46
/********************************************************************/
/* apply optimizations of iropt to all nodes.                       */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
47
/********************************************************************/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
48

49
50
51
52
void init_link (ir_node *n, void *env) {
  set_irn_link(n, NULL);
}

Christian Schäfer's avatar
Christian Schäfer committed
53
54
void
optimize_in_place_wrapper (ir_node *n, void *env) {
55
  int start, i;
Christian Schäfer's avatar
Christian Schäfer committed
56
57
  ir_node *optimized;

58
59
60
61
62
  if (get_irn_op(n) == op_Block)
    start = 0;
  else
    start = -1;

Christian Schäfer's avatar
Christian Schäfer committed
63
64
  /* optimize all sons after recursion, i.e., the sons' sons are
     optimized already. */
65
  for (i = start; i < get_irn_arity(n); i++) {
66
    optimized = optimize_in_place_2(get_irn_n(n, i));
Christian Schäfer's avatar
Christian Schäfer committed
67
    set_irn_n(n, i, optimized);
68
    assert(get_irn_op(optimized) != op_Id);
Christian Schäfer's avatar
Christian Schäfer committed
69
70
71
72
73
74
75
76
  }
}

void
local_optimize_graph (ir_graph *irg) {
  ir_graph *rem = current_ir_graph;
  current_ir_graph = irg;

77
  /* Handle graph state */
78
  assert(get_irg_phase_state(irg) != phase_building);
79
80
81
82
83
84
  if (get_opt_global_cse())
    set_irg_pinned(current_ir_graph, floats);
  if (get_irg_outs_state(current_ir_graph) == outs_consistent)
    set_irg_outs_inconsistent(current_ir_graph);

  /* Clean the value_table in irg for the cse. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
85
86
87
  del_identities(irg->value_table);
  irg->value_table = new_identities();

Christian Schäfer's avatar
Christian Schäfer committed
88
  /* walk over the graph */
89
  irg_walk(irg->end, init_link, optimize_in_place_wrapper, NULL);
Christian Schäfer's avatar
Christian Schäfer committed
90
91
92
93

  current_ir_graph = rem;
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
94
95
96
/********************************************************************/
/* Routines for dead node elimination / copying garbage collection  */
/* of the obstack.                                                  */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
97
/********************************************************************/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
98

Götz Lindenmaier's avatar
Götz Lindenmaier committed
99
100
/* Remeber the new node in the old node by using a field all nodes have. */
inline void
Christian Schäfer's avatar
Christian Schäfer committed
101
102
set_new_node (ir_node *old, ir_node *new)
{
Götz Lindenmaier's avatar
Götz Lindenmaier committed
103
  old->link = new;
Christian Schäfer's avatar
Christian Schäfer committed
104
105
106
}

/* Get this new node, before the old node is forgotton.*/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
107
inline ir_node *
Christian Schäfer's avatar
Christian Schäfer committed
108
109
get_new_node (ir_node * n)
{
Götz Lindenmaier's avatar
Götz Lindenmaier committed
110
111
112
  return n->link;
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
113
114
/* We use the block_visited flag to mark that we have computed the
   number of useful predecessors for this block.
115
116
117
118
   Further we encode the new arity in this flag in the old blocks.
   Remembering the arity is useful, as it saves a lot of pointer
   accesses.  This function is called for all Phi and Block nodes
   in a Block. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
inline int
compute_new_arity(ir_node *b) {
  int i, res;
  int irg_v, block_v;

  irg_v = get_irg_block_visited(current_ir_graph);
  block_v = get_Block_block_visited(b);
  if (block_v >= irg_v) {
    /* we computed the number of preds for this block and saved it in the
       block_v flag */
    return block_v - irg_v;
  } else {
    /* compute the number of good predecessors */
    res = get_irn_arity(b);
    for (i = 0; i < get_irn_arity(b); i++)
      if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
    /* save it in the flag. */
    set_Block_block_visited(b, irg_v + res);
    return res;
  }
}

/* Copies the node to the new obstack. The Ins of the new node point to
142
143
   the predecessors on the old obstack.  For block/phi nodes not all
   predecessors might be copied.  n->link points to the new node.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
144
   For Phi and Block nodes the function allocates in-arrays with an arity
Götz Lindenmaier's avatar
Götz Lindenmaier committed
145
146
   only for useful predecessors.  The arity is determined by counting
   the non-bad predecessors of the block. */
147
void
Götz Lindenmaier's avatar
Götz Lindenmaier committed
148
149
copy_node (ir_node *n, void *env) {
  ir_node *nn, *block;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
150
  int new_arity;
151

Götz Lindenmaier's avatar
Götz Lindenmaier committed
152
153
  if (get_irn_opcode(n) == iro_Block) {
    block = NULL;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
154
    new_arity = compute_new_arity(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
155
156
  } else {
    block = get_nodes_Block(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
157
158
159
160
161
    if (get_irn_opcode(n) == iro_Phi) {
      new_arity = compute_new_arity(block);
    } else {
      new_arity = get_irn_arity(n);
    }
Götz Lindenmaier's avatar
Götz Lindenmaier committed
162
163
164
165
166
  }
  nn = new_ir_node(current_ir_graph,
		   block,
		   get_irn_op(n),
		   get_irn_mode(n),
Götz Lindenmaier's avatar
Götz Lindenmaier committed
167
		   new_arity,
Götz Lindenmaier's avatar
Götz Lindenmaier committed
168
		   get_irn_in(n));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
169
170
171
  /* Copy the attributes.  These might point to additional data.  If this
     was allocated on the old obstack the pointers now are dangling.  This
     frees e.g. the memory of the graph_arr allocated in new_immBlock. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
172
173
  copy_attrs(n, nn);
  set_new_node(n, nn);
174
175
176
177

  /*  printf("\n old node: "); DDMSG2(n);
      printf(" new node: "); DDMSG2(nn); */

Christian Schäfer's avatar
Christian Schäfer committed
178
179
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
180
181
/* Copies new predecessors of old node to new node remembered in link.
   Spare the Bad predecessors of Phi and Block nodes. */
182
void
Götz Lindenmaier's avatar
Götz Lindenmaier committed
183
copy_preds (ir_node *n, void *env) {
Götz Lindenmaier's avatar
Götz Lindenmaier committed
184
  ir_node *nn, *block, *on;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
185
  int i, j;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
186
187

  nn = get_new_node(n);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
188

189
190
191
192
  /* printf("\n old node: "); DDMSG2(n);
     printf(" new node: "); DDMSG2(nn);
     printf(" arities: old: %d, new: %d\n", get_irn_arity(n), get_irn_arity(nn)); */

Götz Lindenmaier's avatar
Götz Lindenmaier committed
193
194
195
196
197
198
199
200
  if (get_irn_opcode(n) == iro_Block) {
    /* Don't copy Bad nodes. */
    j = 0;
    for (i = 0; i < get_irn_arity(n); i++)
      if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
	set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
	j++;
      }
201
202
    /* repair the block visited flag from above misuse. Repair it in both
       graphs so that the old one can still be used. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
203
    set_Block_block_visited(nn, 0);
204
    set_Block_block_visited(n, 0);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
205
    /* Local optimization could not merge two subsequent blocks if
206
       in array contained Bads.  Now it's possible.
207
       We don't call optimize_in_place as it requires
208
209
210
211
       that the fields in ir_graph are set properly. */
    if (get_Block_n_cfgpreds(nn) == 1
	&& get_irn_op(get_Block_cfgpred(nn, 0)) == op_Jmp)
      exchange(nn, get_nodes_Block(get_Block_cfgpred(nn, 0)));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
212
213
214
215
216
217
218
219
220
221
222
  } else if (get_irn_opcode(n) == iro_Phi) {
    /* Don't copy node if corresponding predecessor in block is Bad.
       The Block itself should not be Bad. */
    block = get_nodes_Block(n);
    set_irn_n (nn, -1, get_new_node(block));
    j = 0;
    for (i = 0; i < get_irn_arity(n); i++)
      if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
	set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
	j++;
      }
223
224
225
    /* If the pre walker reached this Phi after the post walker visited the
       block block_visited is > 0. */
    set_Block_block_visited(get_nodes_Block(n), 0);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
226
    /* Compacting the Phi's ins might generate Phis with only one
Götz Lindenmaier's avatar
Götz Lindenmaier committed
227
       predecessor. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
228
    if (get_irn_arity(n) == 1)
229
      exchange(n, get_irn_n(n, 0));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
230
231
232
233
  } else {
    for (i = -1; i < get_irn_arity(n); i++)
      set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
  }
234
235
236
237
  /* Now the new node is complete.  We can add it to the hash table for cse.
     @@@ inlinening aborts if we identify End. Why? */
  if(get_irn_op(nn) != op_End)
    add_identities (current_ir_graph->value_table, nn);
Götz Lindenmaier's avatar
Götz Lindenmaier committed
238
239
}

240
/* Copies the graph recursively, compacts the keepalive of the end node. */
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
void
copy_graph () {
  ir_node *oe, *ne; /* old end, new end */
  ir_node *ka; /* keep alive */
  int i;

  oe = get_irg_end(current_ir_graph);
  /* copy the end node by hand, allocate dynamic in array! */
  ne = new_ir_node(current_ir_graph,
		   NULL,
		   op_End,
		   mode_X,
		   -1,
		   NULL);
  /* Copy the attributes.  Well, there might be some in the future... */
  copy_attrs(oe, ne);
  set_new_node(oe, ne);

  /* copy the live nodes */
  irg_walk(get_nodes_Block(oe), copy_node, copy_preds, NULL);
  /* copy_preds for the end node ... */
  set_nodes_Block(ne, get_new_node(get_nodes_Block(oe)));

  /** ... and now the keep alives. **/
  /* First pick the not marked block nodes and walk them.  We must pick these
     first as else we will oversee blocks reachable from Phis. */
  for (i = 0; i < get_irn_arity(oe); i++) {
    ka = get_irn_n(oe, i);
    if ((get_irn_op(ka) == op_Block) &&
	(get_irn_visited(ka) < get_irg_visited(current_ir_graph))) {
      /* We must keep the block alive and copy everything reachable */
      set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
      irg_walk(ka, copy_node, copy_preds, NULL);
      add_End_keepalive(ne, get_new_node(ka));
    }
  }

  /* Now pick the Phis.  Here we will keep all! */
  for (i = 0; i < get_irn_arity(oe); i++) {
    ka = get_irn_n(oe, i);
    if ((get_irn_op(ka) == op_Phi)) {
      if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) {
	/* We didn't copy the Phi yet.  */
	set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
	irg_walk(ka, copy_node, copy_preds, NULL);
      }
      add_End_keepalive(ne, get_new_node(ka));
    }
  }
}

Götz Lindenmaier's avatar
Götz Lindenmaier committed
292
/* Copies the graph reachable from current_ir_graph->end to the obstack
293
   in current_ir_graph and fixes the environment.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
294
295
296
   Then fixes the fields in current_ir_graph containing nodes of the
   graph.  */
void
297
copy_graph_env () {
Götz Lindenmaier's avatar
Götz Lindenmaier committed
298
  /* Not all nodes remembered in current_ir_graph might be reachable
299
     from the end node.  Assure their link is set to NULL, so that
Götz Lindenmaier's avatar
Götz Lindenmaier committed
300
301
302
303
304
305
306
307
308
     we can test whether new nodes have been computed. */
  set_irn_link(get_irg_frame  (current_ir_graph), NULL);
  set_irn_link(get_irg_globals(current_ir_graph), NULL);
  set_irn_link(get_irg_args   (current_ir_graph), NULL);

  /* we use the block walk flag for removing Bads from Blocks ins. */
  inc_irg_block_visited(current_ir_graph);

  /* copy the graph */
309
  copy_graph();
Götz Lindenmaier's avatar
Götz Lindenmaier committed
310
311

  /* fix the fields in current_ir_graph */
312
  free_End(get_irg_end(current_ir_graph));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
313
314
  set_irg_end        (current_ir_graph, get_new_node(get_irg_end(current_ir_graph)));
  set_irg_end_block  (current_ir_graph, get_new_node(get_irg_end_block(current_ir_graph)));
315
316
317
318
319
320
321
322
323
324
325
326
  if (get_irn_link(get_irg_frame(current_ir_graph)) == NULL) {
    copy_node (get_irg_frame(current_ir_graph), NULL);
    copy_preds(get_irg_frame(current_ir_graph), NULL);
  }
  if (get_irn_link(get_irg_globals(current_ir_graph)) == NULL) {
    copy_node (get_irg_globals(current_ir_graph), NULL);
    copy_preds(get_irg_globals(current_ir_graph), NULL);
  }
  if (get_irn_link(get_irg_args(current_ir_graph)) == NULL) {
    copy_node (get_irg_args(current_ir_graph), NULL);
    copy_preds(get_irg_args(current_ir_graph), NULL);
  }
Götz Lindenmaier's avatar
Götz Lindenmaier committed
327
  set_irg_start  (current_ir_graph, get_new_node(get_irg_start(current_ir_graph)));
328

Götz Lindenmaier's avatar
Götz Lindenmaier committed
329
330
331
332
333
334
335
336
337
338
  set_irg_start_block(current_ir_graph,
		      get_new_node(get_irg_start_block(current_ir_graph)));
  set_irg_frame  (current_ir_graph, get_new_node(get_irg_frame(current_ir_graph)));
  set_irg_globals(current_ir_graph, get_new_node(get_irg_globals(current_ir_graph)));
  set_irg_args   (current_ir_graph, get_new_node(get_irg_args(current_ir_graph)));
  if (get_irn_link(get_irg_bad(current_ir_graph)) == NULL) {
    copy_node(get_irg_bad(current_ir_graph), NULL);
    copy_preds(get_irg_bad(current_ir_graph), NULL);
  }
  set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
339
340
}

341
342
343
344
345
346
/* Copies all reachable nodes to a new obstack.  Removes bad inputs
   from block nodes and the corresponding inputs from Phi nodes.
   Merges single exit blocks with single entry blocks and removes
   1-input Phis.
   Adds all new nodes to a new hash table for cse.  Does not
   perform cse, so the hash table might contain common subexpressions. */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
347
348
349
350
351
352
353
354
355
356
357
/* Amroq call this emigrate() */
void
dead_node_elimination(ir_graph *irg) {
  ir_graph *rem;
  struct obstack *graveyard_obst = NULL;
  struct obstack *rebirth_obst   = NULL;

  /* Remember external state of current_ir_graph. */
  rem = current_ir_graph;
  current_ir_graph = irg;

358
  /* Handle graph state */
359
  assert(get_irg_phase_state(current_ir_graph) != phase_building);
360
361
  free_outs(current_ir_graph);

Götz Lindenmaier's avatar
Götz Lindenmaier committed
362
363
364
365
366
367
368
369
370
371
372
  if (get_optimize() && get_opt_dead_node_elimination()) {

    /* A quiet place, where the old obstack can rest in peace,
       until it will be cremated. */
    graveyard_obst = irg->obst;

    /* A new obstack, where the reachable nodes will be copied to. */
    rebirth_obst = (struct obstack *) xmalloc (sizeof (struct obstack));
    current_ir_graph->obst = rebirth_obst;
    obstack_init (current_ir_graph->obst);

373
374
375
376
    /* We also need a new hash table for cse */
    del_identities (irg->value_table);
    irg->value_table = new_identities ();

Götz Lindenmaier's avatar
Götz Lindenmaier committed
377
    /* Copy the graph from the old to the new obstack */
378
    copy_graph_env();
Götz Lindenmaier's avatar
Götz Lindenmaier committed
379
380
381
382
383
384
385
386

    /* Free memory from old unoptimized obstack */
    obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
    xfree (graveyard_obst);           /* ... then free it.           */
  }

  current_ir_graph = rem;
}
387
388
389
390
391

/**********************************************************************/
/*  Funcionality for inlining                                         */
/**********************************************************************/

Götz Lindenmaier's avatar
Götz Lindenmaier committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
/* Copy node for inlineing.  Copies the node by calling copy_node and
   then updates the entity if it's a local one.  env must be a pointer
   to the frame type of the procedure. The new entities must be in
   the link field of the entities. */
void
copy_node_inline (ir_node *n, void *env) {
  ir_node *new;
  type *frame_tp = (type *)env;

  copy_node(n, NULL);
  if (get_irn_op(n) == op_Sel) {
    new = get_new_node (n);
    assert(get_irn_op(new) == op_Sel);
    if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
      set_Sel_entity(new, get_entity_link(get_Sel_entity(n)));
    }
  }
}

411
412
413
414
415
416
417
418
419
420
void inline_method(ir_node *call, ir_graph *called_graph) {
  ir_node *pre_call;
  ir_node *post_call, *post_bl;
  ir_node *in[5];
  ir_node *end, *end_bl;
  ir_node **res_pred;
  ir_node **cf_pred;
  ir_node *ret, *phi;
  ir_node *cf_op, *bl;
  int arity, n_ret, n_exc, n_res, i, j;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
421
  type *called_frame, *caller_frame;
422
423
424

  if (!get_opt_inline()) return;

425
  /* Handle graph state */
426
  assert(get_irg_phase_state(current_ir_graph) != phase_building);
427
428
429
  if (get_irg_outs_state(current_ir_graph) == outs_consistent)
    set_irg_outs_inconsistent(current_ir_graph);

430
431
432
433
434
435
436
  /** Check preconditions **/
  assert(get_irn_op(call) == op_Call);
  assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
  assert(get_type_tpop(get_Call_type(call)) == type_method);
  if (called_graph == current_ir_graph) return;

  /** Part the Call node into two nodes.  Pre_call collects the parameters of
437
438
439
	  the procedure and later replaces the Start node of the called graph.
	  Post_call is the old Call node and collects the results of the called
	  graph. Both will end up being a tuple.  **/
440
441
  post_bl = get_nodes_Block(call);
  set_irg_current_block(current_ir_graph, post_bl);
442
  /* XxMxPxP of Start + parameter of Call */
443
444
445
446
447
  in[0] = new_Jmp();
  in[1] = get_Call_mem(call);
  in[2] = get_irg_frame(current_ir_graph);
  in[3] = get_irg_globals(current_ir_graph);
  in[4] = new_Tuple (get_Call_n_params(call),
448
					 get_Call_param_arr(call));
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
  pre_call = new_Tuple(5, in);
  post_call = call;

  /** Part the block of the Call node into two blocks.
      The new block gets the ins of the old block, pre_call and all its
      predecessors and all Phi nodes. **/
  part_block(pre_call);

  /** Prepare state for dead node elimination **/
  /* Visited flags in calling irg must be >= flag in called irg.
     Else walker and arity computation will not work. */
  if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
    set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);   /***/
  if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
    set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
  /* Set pre_call as new Start node in link field of the start node of
465
466
467
468
	 calling graph and pre_calls block as new block for the start block
	 of calling graph.
	 Further mark these nodes so that they are not visited by the
	 copying. */
469
470
  set_irn_link(get_irg_start(called_graph), pre_call);
  set_irn_visited(get_irg_start(called_graph),
471
				  get_irg_visited(current_ir_graph));/***/
472
  set_irn_link(get_irg_start_block(called_graph),
473
			   get_nodes_Block(pre_call));
474
  set_irn_visited(get_irg_start_block(called_graph),
475
				  get_irg_visited(current_ir_graph));  /***/
476
477
478
479

  /* Initialize for compaction of in arrays */
  inc_irg_block_visited(current_ir_graph);
  /*
480
481
	  set_Block_block_visited(get_irg_start_block(called_graph),
	  get_irg_block_visited(current_ir_graph) +1 +1); /* count for self edge */
482
483

  /*** Replicate local entities of the called_graph ***/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
484
485
486
487
488
489
490
491
  /* copy the entities. */
  called_frame = get_irg_frame_type(called_graph);
  for (i = 0; i < get_class_n_member(called_frame); i++) {
    entity *new_ent, *old_ent;
    old_ent = get_class_member(called_frame, i);
    new_ent = copy_entity_own(old_ent, get_cur_frame_type());
    set_entity_link(old_ent, new_ent);
  }
492
493

  /* visited is > than that of called graph.  With this trick visited will
494
495
     remain unchanged so that an outer walker, e.g., searching the call nodes
     to inline, calling this inline will not visit the inlined nodes. */
496
497
498
  set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);

  /** Performing dead node elimination inlines the graph **/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
499
500
  /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
     entities. */
501
  /* @@@ endless loops are not copied!! */
Götz Lindenmaier's avatar
Götz Lindenmaier committed
502
  irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds,
503
		   get_irg_frame_type(called_graph));
504
505
506
507
508
509
510

  /* Repair called_graph */
  set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
  set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
  set_Block_block_visited(get_irg_start_block(called_graph), 0);

  /*** Merge the end of the inlined procedure with the call site ***/
Götz Lindenmaier's avatar
Götz Lindenmaier committed
511
512
513
514
515
  /* We will turn the old Call node into a Tuple with the following
     predecessors:
     -1:  Block of Tuple.
     0: Phi of all Memories of Return statements.
     1: Jmp from new Block that merges the control flow from all exception
516
	 predecessors of the old end block.
Götz Lindenmaier's avatar
Götz Lindenmaier committed
517
518
519
     2: Tuple of all arguments.
     3: Phi of Exception memories.
  */
520
521
522
523
524
525
526
527
528
529
530
531

  /** Precompute some values **/
  end_bl = get_new_node(get_irg_end_block(called_graph));
  end = get_new_node(get_irg_end(called_graph));
  arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
  n_res = get_method_n_res(get_Call_type(call));

  res_pred = (ir_node **) malloc ((n_res) * sizeof (ir_node *));
  cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));

  set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */

532
533
534
  /** archive keepalives **/
  for (i = 0; i < get_irn_arity(end); i++)
    add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
535
536
  /* The new end node will die, but the in array is not on the obstack ... */
  free_End(end);
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571

  /** Collect control flow from Return blocks to post_calls block. Replace
      Return nodes by Jump nodes. **/
  n_ret = 0;
  for (i = 0; i < arity; i++) {
    ir_node *ret;
    ret = get_irn_n(end_bl, i);
    if (get_irn_op(ret) == op_Return) {
      cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_Block(ret));
      n_ret++;
    }
  }
  set_irn_in(post_bl, n_ret, cf_pred);

  /** Collect results from Return nodes to post_call. Post_call is
      turned into a tuple. **/
  turn_into_tuple(post_call, 4);
  /* First the Memory-Phi */
  n_ret = 0;
  for (i = 0; i < arity; i++) {
    ret = get_irn_n(end_bl, i);
    if (get_irn_op(ret) == op_Return) {
      cf_pred[n_ret] = get_Return_mem(ret);
      n_ret++;
    }
  }
  phi = new_Phi(n_ret, cf_pred, mode_M);
  set_Tuple_pred(call, 0, phi);
  set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
  set_irn_link(post_bl, phi);
  /* Now the real results */
  if (n_res > 0) {
    for (j = 0; j < n_res; j++) {
      n_ret = 0;
      for (i = 0; i < arity; i++) {
572
573
574
575
576
		ret = get_irn_n(end_bl, i);
		if (get_irn_op(ret) == op_Return) {
		  cf_pred[n_ret] = get_Return_res(ret, j);
		  n_ret++;
		}
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
      }
      phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
      res_pred[j] = phi;
      set_irn_link(phi, get_irn_link(post_bl));  /* Conserve Phi-list for further inlinings */
      set_irn_link(post_bl, phi);
    }
    set_Tuple_pred(call, 2, new_Tuple(n_res, res_pred));
  } else {
    set_Tuple_pred(call, 2, new_Bad());
  }
  /* Finally the exception control flow.  We need to add a Phi node to
     collect the memory containing the exception objects.  Further we need
     to add another block to get a correct representation of this Phi.  To
     this block we add a Jmp that resolves into the X output of the Call
     when the Call is turned into a tuple. */
  n_exc = 0;
  for (i = 0; i < arity; i++) {
    ir_node *ret;
    ret = get_irn_n(end_bl, i);
    if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
      cf_pred[n_exc] = ret;
      n_exc++;
    }
  }
  if (n_exc > 0) {
    new_Block(n_exc, cf_pred);      /* whatch it: current_block is changed! */
    set_Tuple_pred(call, 1, new_Jmp());
    /* The Phi for the memories with the exception objects */
    n_exc = 0;
    for (i = 0; i < arity; i++) {
      ir_node *ret;
      ret = skip_Proj(get_irn_n(end_bl, i));
Götz Lindenmaier's avatar
Götz Lindenmaier committed
609
      if (get_irn_op(ret) == op_Call) {
610
611
		cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
		n_exc++;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
612
      } else if (is_fragile_op(ret)) {
613
614
615
		/* We rely that all cfops have the memory output at the same position. */
		cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
		n_exc++;
Götz Lindenmaier's avatar
Götz Lindenmaier committed
616
      } else if (get_irn_op(ret) == op_Raise) {
617
618
		cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
		n_exc++;
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
      }
    }
    set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
  } else {
    set_Tuple_pred(call, 1, new_Bad());
    set_Tuple_pred(call, 3, new_Bad());
  }
  free(res_pred);
  free(cf_pred);

  /*** Correct the control flow to the end node.
       If the exception control flow from the Call directly branched to the
       end block we now have the following control flow predecessor pattern:
       ProjX -> Tuple -> Jmp.
       We must remove the Jmp along with it's empty block and add Jmp's
       predecessors as predecessors of this end block. ***/
  /* find the problematic predecessor of the end block. */
  end_bl = get_irg_end_block(current_ir_graph);
  for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
    cf_op = get_Block_cfgpred(end_bl, i);
    if (get_irn_op(cf_op) == op_Proj) {
      cf_op = get_Proj_pred(cf_op);
      if (get_irn_op(cf_op) == op_Tuple) {
642
643
644
		cf_op = get_Tuple_pred(cf_op, 1);
		assert(get_irn_op(cf_op) == op_Jmp);
		break;
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
      }
    }
  }
  /* repair */
  if (i < get_Block_n_cfgpreds(end_bl)) {
    bl = get_nodes_Block(cf_op);
    arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
    cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
    for (j = 0; j < i; j++)
      cf_pred[j] = get_Block_cfgpred(end_bl, j);
    for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
      cf_pred[j] = get_Block_cfgpred(bl, j-i);
    for (j = j; j < arity; j++)
      cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
    set_irn_in(end_bl, arity, cf_pred);
    free(cf_pred);
  }
}
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741

/********************************************************************/
/* Apply inlineing to small methods.                                */
/********************************************************************/

static int pos;

/* It makes no sense to inline too many calls in one procedure. Anyways,
   I didn't get a version with NEW_ARR_F to run. */
#define MAX_INLINE 1024

static void collect_calls(ir_node *call, void *env) {
  ir_node **calls = (ir_node **)env;
  ir_node *addr;
  tarval *tv;
  ir_graph *called_irg;

  if (get_irn_op(call) != op_Call) return;

  addr = get_Call_ptr(call);
  if (get_irn_op(addr) == op_Const) {
    /* Check whether the constant is the pointer to a compiled entity. */
    tv = get_Const_tarval(addr);
    if (tv->u.p.ent) {
      called_irg = get_entity_irg(tv->u.p.ent);
      if (called_irg && pos < MAX_INLINE) {
	/* The Call node calls a locally defined method.  Remember to inline. */
	calls[pos] = call;
	pos++;
      }
    }
  }
}


/* Inlines all small methods at call sites where the called address comes
   from a Const node that references the entity representing the called
   method.
   The size argument is a rough measure for the code size of the method:
   Methods where the obstack containing the firm graph is smaller than
   size are inlined. */
void inline_small_irgs(ir_graph *irg, int size) {
  int i;
  ir_node *calls[MAX_INLINE];
  ir_graph *rem = current_ir_graph;

  if (!(get_optimize() && get_opt_inline())) return;

  /*DDME(get_irg_ent(current_ir_graph));*/

  current_ir_graph = irg;
  /* Handle graph state */
  assert(get_irg_phase_state(current_ir_graph) != phase_building);

  /* Find Call nodes to inline.
     (We can not inline during a walk of the graph, as inlineing the same
     method several times changes the visited flag of the walked graph:
     after the first inlineing visited of the callee equals visited of
     the caller.  With the next inlineing both are increased.) */
  pos = 0;
  irg_walk(get_irg_end(irg), NULL, collect_calls, (void *) calls);

  if ((pos > 0) && (pos < MAX_INLINE)) {
    /* There are calls to inline */
    collect_phiprojs(irg);
    for (i = 0; i < pos; i++) {
      tarval *tv;
      ir_graph *callee;
      tv = get_Const_tarval(get_Call_ptr(calls[i]));
      callee = get_entity_irg(tv->u.p.ent);
      if ((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) {
	/*printf(" inlineing "); DDME(tv->u.p.ent);*/
	inline_method(calls[i], callee);
      }
    }
  }

  current_ir_graph = rem;
}
Götz Lindenmaier's avatar
Götz Lindenmaier committed
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993


/********************************************************************/
/*  Code Placement.  Pinns all floating nodes to a block where they */
/*  will be executed only if needed.                                */
/********************************************************************/

static pdeq *worklist;		/* worklist of ir_node*s */

/* Find the earliest correct block for N.  --- Place N into the
   same Block as its dominance-deepest Input.  */
static void
place_floats_early (ir_node *n)
{
  int i, start;

  /* we must not run into an infinite loop */
  assert (irn_not_visited(n));
  mark_irn_visited(n);

  /* Place floating nodes. */
  if (get_op_pinned(get_irn_op(n)) == floats) {
    int depth = 0;
    ir_node *b = new_Bad();   /* The block to place this node in */

    assert(get_irn_op(n) != op_Block);

    if ((get_irn_op(n) == op_Const) ||
	(get_irn_op(n) == op_SymConst) ||
	(is_Bad(n))) {
      /* These nodes will not be placed by the loop below. */
      b = get_irg_start_block(current_ir_graph);
      depth = 1;
    }

    /* find the block for this node. */
    for (i = 0; i < get_irn_arity(n); i++) {
      ir_node *dep = get_irn_n(n, i);
      ir_node *dep_block;
      if ((irn_not_visited(dep)) &&
	  (get_op_pinned(get_irn_op(dep)) == floats)) {
	place_floats_early (dep);
      }
      /* Because all loops contain at least one pinned node, now all
         our inputs are either pinned or place_early has already
         been finished on them.  We do not have any unfinished inputs!  */
      dep_block = get_nodes_Block(dep);
      if ((!is_Bad(dep_block)) &&
	  (get_Block_dom_depth(dep_block) > depth)) {
	b = dep_block;
	depth = get_Block_dom_depth(dep_block);
      }
      /* Avoid that the node is placed in the Start block */
      if ((depth == 1) && (get_Block_dom_depth(get_nodes_Block(n)) > 1)) {
	b = get_Block_cfg_out(get_irg_start_block(current_ir_graph), 0);
	assert(b != get_irg_start_block(current_ir_graph));
	depth = 2;
      }
    }
    set_nodes_Block(n, b);
  }

  /* Add predecessors of non floating nodes on worklist. */
  start = (get_irn_op(n) == op_Block) ? 0 : -1;
  for (i = start; i < get_irn_arity(n); i++) {
    ir_node *pred = get_irn_n(n, i);
    if (irn_not_visited(pred)) {
      pdeq_putr (worklist, pred);
    }
  }
}

/* Floating nodes form subgraphs that begin at nodes as Const, Load,
   Start, Call and end at pinned nodes as Store, Call.  Place_early
   places all floating nodes reachable from its argument through floating
   nodes and adds all beginnings at pinned nodes to the worklist. */
inline void place_early () {
  int i;
  bool del_me;

  assert(worklist);
  inc_irg_visited(current_ir_graph);

  /* this inits the worklist */
  place_floats_early (get_irg_end(current_ir_graph));

  /* Work the content of the worklist. */
  while (!pdeq_empty (worklist)) {
    ir_node *n = pdeq_getl (worklist);
    if (irn_not_visited(n)) place_floats_early (n);
  }

  set_irg_outs_inconsistent(current_ir_graph);
  current_ir_graph->pinned = pinned;
}


/* deepest common dominance ancestor of DCA and CONSUMER of PRODUCER */
static ir_node *
consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
{
  ir_node *block;

  /* Compute the latest block into which we can place a node so that it is
     before consumer. */
  if (get_irn_op(consumer) == op_Phi) {
    /* our comsumer is a Phi-node, the effective use is in all those
       blocks through which the Phi-node reaches producer */
    int i;
    ir_node *phi_block = get_nodes_Block(consumer);
    for (i = 0;  i < get_irn_arity(consumer); i++) {
      if (get_irn_n(consumer, i) == producer) {
	block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
      }
    }
  } else {
    assert(is_no_Block(consumer));
    block = get_nodes_Block(consumer);
  }

  /* Compute the deepest common ancestor of block and dca. */
  assert(block);
  if (!dca) return block;
  while (get_Block_dom_depth(block) > get_Block_dom_depth(dca))
    block = get_Block_idom(block);
  while (get_Block_dom_depth(dca) > get_Block_dom_depth(block))
    dca = get_Block_idom(dca);
  while (block != dca)
    { block = get_Block_idom(block); dca = get_Block_idom(dca); }

  return dca;
}

#if 0
/* @@@ Needs loop informations.  Will implement later interprocedural. */
static void
move_out_of_loops (ir_node *n, ir_node *dca)
{
  assert(dca);

  /* Find the region deepest in the dominator tree dominating
     dca with the least loop nesting depth, but still dominated
     by our early placement. */
  ir_node *best = dca;
  while (dca != get_nodes_Block(n)) {
    dca = get_Block_idom(dca);
    if (!dca) break; /* should we put assert(dca)? */
    if (get_Block_loop_depth(dca) < get_Block_loop_depth(best)) {
      best = dca;
    }
  }
  if (get_Block_dom_depth(best) >= get_Block_dom_depth(get_nodes_Block(n)))
    set_nodes_Block(n, best);
}
#endif

/* Find the latest legal block for N and place N into the
   `optimal' Block between the latest and earliest legal block.
   The `optimal' block is the dominance-deepest block of those
   with the least loop-nesting-depth.  This places N out of as many
   loops as possible and then makes it as controldependant as
   possible. */
static void
place_floats_late (ir_node *n)
{
  int i;

  assert (irn_not_visited(n)); /* no multiple placement */

  /* no need to place block nodes, control nodes are already placed. */
  if ((get_irn_op(n) != op_Block) && (!is_cfop(n)) && (get_irn_mode(n) != mode_X)) {
    /* Assure that our users are all placed, except the Phi-nodes.
       --- Each dataflow cycle contains at least one Phi-node.  We
       have to break the `user has to be placed before the
       producer' dependance cycle and the Phi-nodes are the
       place to do so, because we need to base our placement on the
       final region of our users, which is OK with Phi-nodes, as they
       are pinned, and they never have to be placed after a
       producer of one of their inputs in the same block anyway. */
    for (i = 0; i < get_irn_n_outs(n); i++) {
      ir_node *succ = get_irn_out(n, i);
      if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
	place_floats_late (succ);
    }

    /* We have to determine the final block of this node... except for constants. */
    if ((get_op_pinned(get_irn_op(n)) == floats) &&
	(get_irn_op(n) != op_Const) &&
	(get_irn_op(n) != op_SymConst)) {
      ir_node *dca = NULL;	/* deepest common ancestor in the
				   dominator tree of all nodes'
				   blocks depending on us; our final
				   placement has to dominate DCA. */
      for (i = 0; i < get_irn_n_outs(n); i++) {
	dca = consumer_dom_dca (dca, get_irn_out(n, i), n);
      }
      set_nodes_Block(n, dca);
#if 0
      move_out_of_loops (n, dca);
#endif
    }
  }

  mark_irn_visited(n);

  /* Add predecessors of all non-floating nodes on list. (Those of floating
     nodes are placeded already and therefore are marked.)  */
  for (i = 0; i < get_irn_n_outs(n); i++) {
    if (irn_not_visited(get_irn_out(n, i))) {
      pdeq_putr (worklist, get_irn_out(n, i));
    }
  }
}

inline void place_late() {
  assert(worklist);
  inc_irg_visited(current_ir_graph);

  /* This fills the worklist initially. */
  place_floats_late(get_irg_start_block(current_ir_graph));
  /* And now empty the worklist again... */
  while (!pdeq_empty (worklist)) {
    ir_node *n = pdeq_getl (worklist);
    if (irn_not_visited(n)) place_floats_late(n);
  }
}

void place_code(ir_graph *irg) {
  ir_graph *rem = current_ir_graph;
  current_ir_graph = irg;

  if (!(get_optimize() && get_opt_global_cse())) return;

  /* Handle graph state */
  assert(get_irg_phase_state(irg) != phase_building);
  if (get_irg_dom_state(irg) != dom_consistent)
    compute_doms(irg);

  /* Place all floating nodes as early as possible. This guarantees
     a legal code placement. */
  worklist = new_pdeq ();
  place_early();

  /* place_early invalidates the outs, place_late needs them. */
  compute_outs(irg);
  /* Now move the nodes down in the dominator tree. This reduces the
     unnecessary executions of the node. */
  place_late();

  del_pdeq (worklist);
  current_ir_graph = rem;
}