parallelize_mem.c 14.9 KB
Newer Older
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
4
5
6
7
 */

/**
 * @file
8
 * @brief   parallelizing Load/Store optimization
9
10
 * @author  Christoph Mallon
 */
11
12
#include "iroptimize.h"

13
#include "array.h"
14
#include "ircons.h"
15
#include "irgraph_t.h"
16
17
18
19
#include "irgmod.h"
#include "irgopt.h"
#include "irgwalk.h"
#include "irmemory.h"
20
#include "irnode_t.h"
21
22
#include "irnodeset.h"
#include "obst.h"
23
#include "irdump.h"
Michael Beck's avatar
Michael Beck committed
24
#include "irflag_t.h"
25
#include "iredges_t.h"
26
#include "type_t.h"
27

28
typedef struct parallelize_info
29
30
31
{
	ir_node      *origin_block;
	ir_node      *origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
32
33
	ir_type      *origin_type;  /**< Type if ptr destination. */
	unsigned      origin_size;  /**< size of memory access. */
34
35
	ir_nodeset_t  this_mem;
	ir_nodeset_t  user_mem;
Andreas Fried's avatar
Andreas Fried committed
36
	ir_nodeset_t  all_visited;
37
} parallelize_info;
38

39
static void parallelize_load(parallelize_info *pi, ir_node *irn)
40
{
41
	/* There is no point in investigating the same subgraph twice */
Andreas Fried's avatar
Andreas Fried committed
42
	if (ir_nodeset_contains(&pi->all_visited, irn))
43
44
		return;

Andreas Fried's avatar
Andreas Fried committed
45
46
	ir_nodeset_insert(&pi->all_visited, irn);

47
48
49
50
51
52
53
	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
			if (is_Load(pred) &&
					get_Load_volatility(pred) == volatility_non_volatile) {
				ir_node *mem = get_Load_mem(pred);
				ir_nodeset_insert(&pi->user_mem, irn);
54
				parallelize_load(pi, mem);
55
56
57
				return;
			} else if (is_Store(pred) &&
					get_Store_volatility(pred) == volatility_non_volatile) {
Matthias Braun's avatar
Matthias Braun committed
58
59
				ir_type *org_type   = pi->origin_type;
				unsigned org_size   = pi->origin_size;
60
				ir_node *org_ptr    = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
61
62
				ir_type *store_type = get_Store_type(pred);
				unsigned store_size = get_mode_size_bytes(get_irn_mode(get_Store_value(pred)));
63
				ir_node *store_ptr  = get_Store_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
64
65
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias) {
66
67
					ir_node *mem = get_Store_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
68
					parallelize_load(pi, mem);
69
70
71
72
					return;
				}
			}
		} else if (is_Sync(irn)) {
73
			for (int i = 0, n = get_Sync_n_preds(irn); i < n; ++i) {
74
				ir_node *sync_pred = get_Sync_pred(irn, i);
75
				parallelize_load(pi, sync_pred);
76
77
			}
			return;
78
		} else if (is_CopyB(irn) &&
79
		           get_CopyB_volatility(irn) == volatility_non_volatile) {
Matthias Braun's avatar
Matthias Braun committed
80
81
			ir_type *org_type   = pi->origin_type;
			unsigned org_size   = pi->origin_size;
82
83
84
			ir_node *org_ptr    = pi->origin_ptr;
			ir_type *copyB_type = get_CopyB_type(irn);
			ir_node *copyB_dst  = get_CopyB_dst(irn);
Matthias Braun's avatar
Matthias Braun committed
85
86
87
			unsigned copyB_size = get_type_size_bytes(copyB_type);
			if (get_alias_relation(org_ptr, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias) {
88
89
90
91
92
				ir_node *mem = get_CopyB_mem(irn);
				ir_nodeset_insert(&pi->user_mem, irn);
				parallelize_load(pi, mem);
				return;
			}
93
94
95
96
97
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

98
static void parallelize_store(parallelize_info *pi, ir_node *irn)
99
{
100
	/* There is no point in investigating the same subgraph twice */
Andreas Fried's avatar
Andreas Fried committed
101
	if (ir_nodeset_contains(&pi->all_visited, irn))
102
103
		return;

Andreas Fried's avatar
Andreas Fried committed
104
105
	ir_nodeset_insert(&pi->all_visited, irn);

106
107
108
	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
109
110
			if (is_Load(pred)
			    && get_Load_volatility(pred) == volatility_non_volatile) {
111
				ir_node *org_ptr   = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
112
113
				ir_type *org_type  = pi->origin_type;
				unsigned org_size  = pi->origin_size;
114
				ir_node *load_ptr  = get_Load_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
115
116
117
118
				ir_type *load_type = get_Load_type(pred);
				unsigned load_size = get_mode_size_bytes(get_Load_mode(pred));
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       load_ptr, load_type, load_size) == ir_no_alias) {
119
120
					ir_node *mem = get_Load_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
121
					parallelize_store(pi, mem);
122
123
					return;
				}
124
125
			} else if (is_Store(pred)
			           && get_Store_volatility(pred) == volatility_non_volatile) {
126
				ir_node *org_ptr    = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
127
128
				ir_type *org_type   = pi->origin_type;
				unsigned org_size   = pi->origin_size;
129
				ir_node *store_ptr  = get_Store_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
130
131
132
133
				ir_type *store_type = get_Store_type(pred);
				unsigned store_size = get_mode_size_bytes(get_irn_mode(get_Store_value(pred)));
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias) {
134
					ir_node *mem = get_Store_mem(pred);
135
					ir_nodeset_insert(&pi->user_mem, irn);
136
					parallelize_store(pi, mem);
137
138
139
140
					return;
				}
			}
		} else if (is_Sync(irn)) {
141
			for (int i = 0, n = get_Sync_n_preds(irn); i < n; ++i) {
142
				ir_node *sync_pred = get_Sync_pred(irn, i);
143
				parallelize_store(pi, sync_pred);
144
145
			}
			return;
146
147
		} else if (is_CopyB(irn)
		           && get_CopyB_volatility(irn) == volatility_non_volatile) {
148
			ir_node *org_ptr    = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
149
150
			ir_type *org_type   = pi->origin_type;
			unsigned org_size   = pi->origin_size;
151
152
			ir_node *copyB_src  = get_CopyB_src(irn);
			ir_node *copyB_dst  = get_CopyB_dst(irn);
Matthias Braun's avatar
Matthias Braun committed
153
154
155
156
157
158
			ir_type *copyB_type = get_CopyB_type(irn);
			unsigned copyB_size = get_type_size_bytes(copyB_type);
			if (get_alias_relation(org_ptr, org_type, org_size,
			                       copyB_src, copyB_type, copyB_size) == ir_no_alias &&
			    get_alias_relation(org_ptr, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias) {
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
				ir_node *mem = get_CopyB_mem(irn);
				ir_nodeset_insert(&pi->user_mem, irn);
				parallelize_store(pi, mem);
				return;
			}
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

static void parallelize_copyB(parallelize_info *pi, ir_node *origin, ir_node *irn)
{
	/* There is no point in investigating the same subgraph twice */
	if (ir_nodeset_contains(&pi->all_visited, irn))
		return;

	ir_nodeset_insert(&pi->all_visited, irn);

	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
180
181
			if (is_Load(pred)
			    && get_Load_volatility(pred) == volatility_non_volatile) {
182
				ir_node *org_ptr   = get_CopyB_dst(origin);
Matthias Braun's avatar
Matthias Braun committed
183
184
				ir_type *org_type  = pi->origin_type;
				unsigned org_size  = pi->origin_size;
185
				ir_node *load_ptr  = get_Load_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
186
187
188
189
				ir_type *load_type = get_Load_type(pred);
				unsigned load_size = get_mode_size_bytes(get_Load_mode(pred));
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       load_ptr, load_type, load_size) == ir_no_alias) {
190
191
192
193
194
					ir_node *mem = get_Load_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
					parallelize_copyB(pi, origin, mem);
					return;
				}
195
196
			} else if (is_Store(pred)
			           && get_Store_volatility(pred) == volatility_non_volatile) {
197
198
				ir_node *org_src    = get_CopyB_src(origin);
				ir_node *org_dst    = get_CopyB_dst(origin);
Matthias Braun's avatar
Matthias Braun committed
199
200
				ir_type *org_type   = pi->origin_type;
				unsigned org_size   = pi->origin_size;
201
				ir_node *store_ptr  = get_Store_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
202
203
204
205
206
207
				ir_type *store_type = get_Store_type(pred);
				unsigned store_size = get_mode_size_bytes(get_irn_mode(get_Store_value(pred)));
				if (get_alias_relation(org_src, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias &&
				    get_alias_relation(org_dst, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias) {
208
209
210
211
212
213
214
					ir_node *mem = get_Store_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
					parallelize_copyB(pi, origin, mem);
					return;
				}
			}
		} else if (is_Sync(irn)) {
215
			for (int i = 0, n = get_Sync_n_preds(irn); i < n; ++i) {
216
217
218
219
				ir_node *sync_pred = get_Sync_pred(irn, i);
				parallelize_copyB(pi, origin, sync_pred);
			}
			return;
220
221
		} else if (is_CopyB(irn)
		           && get_CopyB_volatility(irn) == volatility_non_volatile) {
222
223
			ir_node *org_src    = get_CopyB_src(origin);
			ir_node *org_dst    = get_CopyB_dst(origin);
Matthias Braun's avatar
Matthias Braun committed
224
225
			ir_type *org_type   = pi->origin_type;
			unsigned org_size   = pi->origin_size;
226
227
			ir_node *copyB_src  = get_CopyB_src(irn);
			ir_node *copyB_dst  = get_CopyB_dst(irn);
Matthias Braun's avatar
Matthias Braun committed
228
229
230
231
232
233
234
235
			ir_type *copyB_type = get_CopyB_type(irn);
			unsigned copyB_size = get_type_size_bytes(copyB_type);
			if (get_alias_relation(org_src, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias &&
			    get_alias_relation(org_dst, org_type, org_size,
			                       copyB_src, copyB_type, copyB_size) == ir_no_alias &&
			    get_alias_relation(org_dst, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias) {
236
237
238
239
240
				ir_node *mem = get_CopyB_mem(irn);
				ir_nodeset_insert(&pi->user_mem, irn);
				parallelize_copyB(pi, origin, mem);
				return;
			}
241
242
243
244
245
246
247
248
249
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

static void walker(ir_node *proj, void *env)
{
	(void)env;

250
251
252
253
254
255
256
257
	ir_node *mem_op;
	if (is_Proj(proj) && get_irn_mode(proj) == mode_M) {
		mem_op = get_Proj_pred(proj);
	} else if (get_irn_mode(proj) == mode_M) {
		mem_op = proj;
	} else {
		return;
	}
258

Andreas Fried's avatar
Andreas Fried committed
259
260
261
	ir_node          *pred;
	ir_node          *block;
	parallelize_info  pi;
262
263
264
265
266
267
	if (is_Load(mem_op)) {
		if (get_Load_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_Load_mem(mem_op);

Matthias Braun's avatar
Matthias Braun committed
268
269
270
271
		pi.origin_block = block,
		pi.origin_ptr   = get_Load_ptr(mem_op);
		pi.origin_size  = get_mode_size_bytes(get_Load_mode(mem_op));
		pi.origin_type  = get_Load_type(mem_op);
272
273
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);
Andreas Fried's avatar
Andreas Fried committed
274
		ir_nodeset_init(&pi.all_visited);
275

276
		parallelize_load(&pi, pred);
277
278
279
280
281
282
	} else if (is_Store(mem_op)) {
		if (get_Store_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_Store_mem(mem_op);

Matthias Braun's avatar
Matthias Braun committed
283
284
285
286
		pi.origin_block = block,
		pi.origin_ptr   = get_Store_ptr(mem_op);
		pi.origin_size  = get_mode_size_bytes(get_irn_mode(get_Store_value(mem_op)));
		pi.origin_type  = get_Store_type(mem_op);
287
288
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);
Andreas Fried's avatar
Andreas Fried committed
289
		ir_nodeset_init(&pi.all_visited);
290

291
		parallelize_store(&pi, pred);
292
293
294
295
296
297
	} else if (is_CopyB(mem_op)) {
		if (get_CopyB_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_CopyB_mem(mem_op);

Matthias Braun's avatar
Matthias Braun committed
298
299
300
		pi.origin_block = block;
		pi.origin_type  = get_CopyB_type(mem_op);
		pi.origin_size  = get_type_size_bytes(pi.origin_type);
301
302
303
304
305
306
307
		/* parallelize_copyB uses the node itself, because the
		 * information does not fit in a parallelize_info. */
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);
		ir_nodeset_init(&pi.all_visited);

		parallelize_copyB(&pi, mem_op, pred);
308
309
310
311
	} else {
		return;
	}

Andreas Fried's avatar
Andreas Fried committed
312
	size_t n = ir_nodeset_size(&pi.user_mem);
313
	if (n > 0) { /* nothing happened otherwise */
314
		ir_node **in = XMALLOCN(ir_node*, n+1);
315

Andreas Fried's avatar
Andreas Fried committed
316
		size_t i = 0;
317
318
319
		in[i++] = proj;
		foreach_ir_nodeset(&pi.user_mem, node, iter) {
			in[i++] = node;
320
		}
yb9976's avatar
yb9976 committed
321
		assert(i == n + 1);
Andreas Fried's avatar
Andreas Fried committed
322
		ir_node *sync = new_r_Sync(block, i, in);
323
		free(in);
324
		edges_reroute_except(proj, sync, sync);
325
326
327

		n = ir_nodeset_size(&pi.this_mem);
		if (n == 1) {
328
			sync = ir_nodeset_first(&pi.this_mem);
329
		} else {
330
			in = XMALLOCN(ir_node*, n);
331
			i = 0;
332
333
			foreach_ir_nodeset(&pi.this_mem, node, iter) {
				in[i++] = node;
334
335
			}
			assert(i == n);
336
			sync = new_r_Sync(block, i, in);
337
			free(in);
338
339
340
341
342
343
		}
		set_memop_mem(mem_op, sync);
	}

	ir_nodeset_destroy(&pi.this_mem);
	ir_nodeset_destroy(&pi.user_mem);
Andreas Fried's avatar
Andreas Fried committed
344
	ir_nodeset_destroy(&pi.all_visited);
345
346
}

347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
/*
 * Elimination of unnecessary Sync edges
 *
 * This code removes Sync inputs, where the immediate predecessor is
 * reachable by another one of the Sync's inputs.
 */

typedef struct {
	irg_walk_func *pre;
	irg_walk_func *post;
	irg_walk_func *again;
	ir_nodeset_t *visited;
	void *inner_env;
} dfs_env_t;

/* WARNING: Stops at Phi nodes for now */
static void dfs_step(ir_node *irn, dfs_env_t *env) {
	if (ir_nodeset_contains(env->visited, irn)) {
		if (env->again) env->again(irn, env->inner_env);
		return;
	} else {
		if (env->pre) env->pre(irn, env->inner_env);
		ir_nodeset_insert(env->visited, irn);
	}

	if (is_Proj(irn)) {
		dfs_step(get_Proj_pred(irn), env);
	} else {
375
376
		foreach_irn_in(irn, i, pred) {
			if (get_irn_mode(pred) == mode_M && !is_Phi(pred)) {
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
				dfs_step(pred, env);
			}
		}
	}

	if (env->post) env->post(irn, env->inner_env);
}

/**
 * Performs a DFS beginning at @c irn, but traversing each edge once
 * rather than visiting each node once.
 *
 * When a node is visited again through another edge, the @c again
 * callback is called.
 *
 * WARNING: Because this particular use of DFS needs it, the search
 * stops at Phi nodes.
 */
static void dfs_by_edges_from(ir_node *irn,
			      irg_walk_func *pre, irg_walk_func *post, irg_walk_func *again,
			      void *env)
{
	ir_nodeset_t visited;
	ir_nodeset_init(&visited);

	dfs_env_t dfs_env = {
		pre,
		post,
		again,
		&visited,
		env
	};

	dfs_step(irn, &dfs_env);

	ir_nodeset_destroy(&visited);
}

typedef struct {
	bool is_Sync_pred;
417
	int  sync_pred_count;
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
} sync_pred_info_t;

static void prepare_links_Sync(ir_node *irn, void *e)
{
	struct obstack *obst = (struct obstack*) e;

	sync_pred_info_t *info = obstack_alloc(obst, sizeof(*info));
	info->is_Sync_pred = false;
	info->sync_pred_count = 0;

	set_irn_link(irn, info);
}

static void visit_Sync_pred(ir_node *irn, void *e)
{
	(void)e;

	sync_pred_info_t *info = (sync_pred_info_t*) get_irn_link(irn);

	if (info->is_Sync_pred) {
		info->sync_pred_count++;
	}
}

static void simplify_Sync(ir_node *irn, void *e)
{
	struct obstack *obst = (struct obstack*) e;

446
	if (!is_Sync(irn))
447
448
		return;

449
450
	ir_node **preds   = get_Sync_pred_arr(irn);
	int       n_preds = get_Sync_n_preds(irn);
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495

	/* Mark all direct predecessors */
	for (int i = 0; i < n_preds; i++) {
		sync_pred_info_t *info = (sync_pred_info_t*) get_irn_link(preds[i]);
		info->is_Sync_pred = true;
		info->sync_pred_count--;
	}

	/* Do a DFS from the Sync, stopping at Phi nodes */
	dfs_by_edges_from(irn, visit_Sync_pred, NULL, visit_Sync_pred, NULL);

	/* Find unnecessary preds, reset sync_pred_infos. */
	ir_node **necessary = NEW_ARR_F(ir_node*, 0);
	for (int i = 0; i < n_preds; i++) {
		sync_pred_info_t *info = (sync_pred_info_t*) get_irn_link(preds[i]);

		if (info->sync_pred_count <= 0) {
			ARR_APP1(ir_node*, necessary, preds[i]);
		}

		info->is_Sync_pred = false;
		info->sync_pred_count = 0;
	}

	ir_node *block = get_nodes_block(irn);
	ir_node *new_Sync = new_r_Sync(block, ARR_LEN(necessary), necessary);
	prepare_links_Sync(new_Sync, obst);

	if (irn != new_Sync)
		exchange(irn, new_Sync);

	DEL_ARR_F(necessary);
}

static void eliminate_sync_edges(ir_graph *irg)
{
	struct obstack obst;
	obstack_init(&obst);

	irg_walk_graph(irg, prepare_links_Sync, NULL, &obst);
	irg_walk_graph(irg, simplify_Sync, NULL, &obst);

	obstack_free(&obst, NULL);
}

496
497
void opt_parallelize_mem(ir_graph *irg)
{
498
499
	assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES
	                           | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
500
	irg_walk_blkwise_dom_top_down(irg, NULL, walker, NULL);
501
	ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
502
	eliminate_sync_edges(irg);
503
	ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
504
	confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
505
}