parallelize_mem.c 15 KB
Newer Older
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
4
5
6
7
 */

/**
 * @file
8
 * @brief   parallelizing Load/Store optimization
9
10
 * @author  Christoph Mallon
 */
11
12
#include "iroptimize.h"

13
#include "array.h"
14
15
#include "debug.h"
#include "ircons.h"
16
#include "irgraph_t.h"
17
18
19
20
#include "irgmod.h"
#include "irgopt.h"
#include "irgwalk.h"
#include "irmemory.h"
21
#include "irnode_t.h"
22
23
#include "irnodeset.h"
#include "obst.h"
24
#include "irdump.h"
Michael Beck's avatar
Michael Beck committed
25
#include "irflag_t.h"
26
#include "iredges.h"
27
#include "type_t.h"
28

29
typedef struct parallelize_info
30
31
32
{
	ir_node      *origin_block;
	ir_node      *origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
33
34
	ir_type      *origin_type;  /**< Type if ptr destination. */
	unsigned      origin_size;  /**< size of memory access. */
35
36
	ir_nodeset_t  this_mem;
	ir_nodeset_t  user_mem;
Andreas Fried's avatar
Andreas Fried committed
37
	ir_nodeset_t  all_visited;
38
} parallelize_info;
39

40
static void parallelize_load(parallelize_info *pi, ir_node *irn)
41
{
42
	/* There is no point in investigating the same subgraph twice */
Andreas Fried's avatar
Andreas Fried committed
43
	if (ir_nodeset_contains(&pi->all_visited, irn))
44
45
		return;

Andreas Fried's avatar
Andreas Fried committed
46
47
	ir_nodeset_insert(&pi->all_visited, irn);

48
49
50
51
52
53
54
	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
			if (is_Load(pred) &&
					get_Load_volatility(pred) == volatility_non_volatile) {
				ir_node *mem = get_Load_mem(pred);
				ir_nodeset_insert(&pi->user_mem, irn);
55
				parallelize_load(pi, mem);
56
57
58
				return;
			} else if (is_Store(pred) &&
					get_Store_volatility(pred) == volatility_non_volatile) {
Matthias Braun's avatar
Matthias Braun committed
59
60
				ir_type *org_type   = pi->origin_type;
				unsigned org_size   = pi->origin_size;
61
				ir_node *org_ptr    = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
62
63
				ir_type *store_type = get_Store_type(pred);
				unsigned store_size = get_mode_size_bytes(get_irn_mode(get_Store_value(pred)));
64
				ir_node *store_ptr  = get_Store_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
65
66
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias) {
67
68
					ir_node *mem = get_Store_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
69
					parallelize_load(pi, mem);
70
71
72
73
					return;
				}
			}
		} else if (is_Sync(irn)) {
74
			for (int i = 0, n = get_Sync_n_preds(irn); i < n; ++i) {
75
				ir_node *sync_pred = get_Sync_pred(irn, i);
76
				parallelize_load(pi, sync_pred);
77
78
			}
			return;
79
		} else if (is_CopyB(irn) &&
80
		           get_CopyB_volatility(irn) == volatility_non_volatile) {
Matthias Braun's avatar
Matthias Braun committed
81
82
			ir_type *org_type   = pi->origin_type;
			unsigned org_size   = pi->origin_size;
83
84
85
			ir_node *org_ptr    = pi->origin_ptr;
			ir_type *copyB_type = get_CopyB_type(irn);
			ir_node *copyB_dst  = get_CopyB_dst(irn);
Matthias Braun's avatar
Matthias Braun committed
86
87
88
			unsigned copyB_size = get_type_size_bytes(copyB_type);
			if (get_alias_relation(org_ptr, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias) {
89
90
91
92
93
				ir_node *mem = get_CopyB_mem(irn);
				ir_nodeset_insert(&pi->user_mem, irn);
				parallelize_load(pi, mem);
				return;
			}
94
95
96
97
98
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

99
static void parallelize_store(parallelize_info *pi, ir_node *irn)
100
{
101
	/* There is no point in investigating the same subgraph twice */
Andreas Fried's avatar
Andreas Fried committed
102
	if (ir_nodeset_contains(&pi->all_visited, irn))
103
104
		return;

Andreas Fried's avatar
Andreas Fried committed
105
106
	ir_nodeset_insert(&pi->all_visited, irn);

107
108
109
	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
110
111
			if (is_Load(pred)
			    && get_Load_volatility(pred) == volatility_non_volatile) {
112
				ir_node *org_ptr   = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
113
114
				ir_type *org_type  = pi->origin_type;
				unsigned org_size  = pi->origin_size;
115
				ir_node *load_ptr  = get_Load_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
116
117
118
119
				ir_type *load_type = get_Load_type(pred);
				unsigned load_size = get_mode_size_bytes(get_Load_mode(pred));
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       load_ptr, load_type, load_size) == ir_no_alias) {
120
121
					ir_node *mem = get_Load_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
122
					parallelize_store(pi, mem);
123
124
					return;
				}
125
126
			} else if (is_Store(pred)
			           && get_Store_volatility(pred) == volatility_non_volatile) {
127
				ir_node *org_ptr    = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
128
129
				ir_type *org_type   = pi->origin_type;
				unsigned org_size   = pi->origin_size;
130
				ir_node *store_ptr  = get_Store_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
131
132
133
134
				ir_type *store_type = get_Store_type(pred);
				unsigned store_size = get_mode_size_bytes(get_irn_mode(get_Store_value(pred)));
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias) {
135
					ir_node *mem = get_Store_mem(pred);
136
					ir_nodeset_insert(&pi->user_mem, irn);
137
					parallelize_store(pi, mem);
138
139
140
141
					return;
				}
			}
		} else if (is_Sync(irn)) {
142
			for (int i = 0, n = get_Sync_n_preds(irn); i < n; ++i) {
143
				ir_node *sync_pred = get_Sync_pred(irn, i);
144
				parallelize_store(pi, sync_pred);
145
146
			}
			return;
147
148
		} else if (is_CopyB(irn)
		           && get_CopyB_volatility(irn) == volatility_non_volatile) {
149
			ir_node *org_ptr    = pi->origin_ptr;
Matthias Braun's avatar
Matthias Braun committed
150
151
			ir_type *org_type   = pi->origin_type;
			unsigned org_size   = pi->origin_size;
152
153
			ir_node *copyB_src  = get_CopyB_src(irn);
			ir_node *copyB_dst  = get_CopyB_dst(irn);
Matthias Braun's avatar
Matthias Braun committed
154
155
156
157
158
159
			ir_type *copyB_type = get_CopyB_type(irn);
			unsigned copyB_size = get_type_size_bytes(copyB_type);
			if (get_alias_relation(org_ptr, org_type, org_size,
			                       copyB_src, copyB_type, copyB_size) == ir_no_alias &&
			    get_alias_relation(org_ptr, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias) {
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
				ir_node *mem = get_CopyB_mem(irn);
				ir_nodeset_insert(&pi->user_mem, irn);
				parallelize_store(pi, mem);
				return;
			}
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

static void parallelize_copyB(parallelize_info *pi, ir_node *origin, ir_node *irn)
{
	/* There is no point in investigating the same subgraph twice */
	if (ir_nodeset_contains(&pi->all_visited, irn))
		return;

	ir_nodeset_insert(&pi->all_visited, irn);

	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
181
182
			if (is_Load(pred)
			    && get_Load_volatility(pred) == volatility_non_volatile) {
183
				ir_node *org_ptr   = get_CopyB_dst(origin);
Matthias Braun's avatar
Matthias Braun committed
184
185
				ir_type *org_type  = pi->origin_type;
				unsigned org_size  = pi->origin_size;
186
				ir_node *load_ptr  = get_Load_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
187
188
189
190
				ir_type *load_type = get_Load_type(pred);
				unsigned load_size = get_mode_size_bytes(get_Load_mode(pred));
				if (get_alias_relation(org_ptr, org_type, org_size,
				                       load_ptr, load_type, load_size) == ir_no_alias) {
191
192
193
194
195
					ir_node *mem = get_Load_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
					parallelize_copyB(pi, origin, mem);
					return;
				}
196
197
			} else if (is_Store(pred)
			           && get_Store_volatility(pred) == volatility_non_volatile) {
198
199
				ir_node *org_src    = get_CopyB_src(origin);
				ir_node *org_dst    = get_CopyB_dst(origin);
Matthias Braun's avatar
Matthias Braun committed
200
201
				ir_type *org_type   = pi->origin_type;
				unsigned org_size   = pi->origin_size;
202
				ir_node *store_ptr  = get_Store_ptr(pred);
Matthias Braun's avatar
Matthias Braun committed
203
204
205
206
207
208
				ir_type *store_type = get_Store_type(pred);
				unsigned store_size = get_mode_size_bytes(get_irn_mode(get_Store_value(pred)));
				if (get_alias_relation(org_src, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias &&
				    get_alias_relation(org_dst, org_type, org_size,
				                       store_ptr, store_type, store_size) == ir_no_alias) {
209
210
211
212
213
214
215
					ir_node *mem = get_Store_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
					parallelize_copyB(pi, origin, mem);
					return;
				}
			}
		} else if (is_Sync(irn)) {
216
			for (int i = 0, n = get_Sync_n_preds(irn); i < n; ++i) {
217
218
219
220
				ir_node *sync_pred = get_Sync_pred(irn, i);
				parallelize_copyB(pi, origin, sync_pred);
			}
			return;
221
222
		} else if (is_CopyB(irn)
		           && get_CopyB_volatility(irn) == volatility_non_volatile) {
223
224
			ir_node *org_src    = get_CopyB_src(origin);
			ir_node *org_dst    = get_CopyB_dst(origin);
Matthias Braun's avatar
Matthias Braun committed
225
226
			ir_type *org_type   = pi->origin_type;
			unsigned org_size   = pi->origin_size;
227
228
			ir_node *copyB_src  = get_CopyB_src(irn);
			ir_node *copyB_dst  = get_CopyB_dst(irn);
Matthias Braun's avatar
Matthias Braun committed
229
230
231
232
233
234
235
236
			ir_type *copyB_type = get_CopyB_type(irn);
			unsigned copyB_size = get_type_size_bytes(copyB_type);
			if (get_alias_relation(org_src, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias &&
			    get_alias_relation(org_dst, org_type, org_size,
			                       copyB_src, copyB_type, copyB_size) == ir_no_alias &&
			    get_alias_relation(org_dst, org_type, org_size,
			                       copyB_dst, copyB_type, copyB_size) == ir_no_alias) {
237
238
239
240
241
				ir_node *mem = get_CopyB_mem(irn);
				ir_nodeset_insert(&pi->user_mem, irn);
				parallelize_copyB(pi, origin, mem);
				return;
			}
242
243
244
245
246
247
248
249
250
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

static void walker(ir_node *proj, void *env)
{
	(void)env;

251
252
253
254
255
256
257
258
	ir_node *mem_op;
	if (is_Proj(proj) && get_irn_mode(proj) == mode_M) {
		mem_op = get_Proj_pred(proj);
	} else if (get_irn_mode(proj) == mode_M) {
		mem_op = proj;
	} else {
		return;
	}
259

Andreas Fried's avatar
Andreas Fried committed
260
261
262
	ir_node          *pred;
	ir_node          *block;
	parallelize_info  pi;
263
264
265
266
267
268
	if (is_Load(mem_op)) {
		if (get_Load_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_Load_mem(mem_op);

Matthias Braun's avatar
Matthias Braun committed
269
270
271
272
		pi.origin_block = block,
		pi.origin_ptr   = get_Load_ptr(mem_op);
		pi.origin_size  = get_mode_size_bytes(get_Load_mode(mem_op));
		pi.origin_type  = get_Load_type(mem_op);
273
274
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);
Andreas Fried's avatar
Andreas Fried committed
275
		ir_nodeset_init(&pi.all_visited);
276

277
		parallelize_load(&pi, pred);
278
279
280
281
282
283
	} else if (is_Store(mem_op)) {
		if (get_Store_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_Store_mem(mem_op);

Matthias Braun's avatar
Matthias Braun committed
284
285
286
287
		pi.origin_block = block,
		pi.origin_ptr   = get_Store_ptr(mem_op);
		pi.origin_size  = get_mode_size_bytes(get_irn_mode(get_Store_value(mem_op)));
		pi.origin_type  = get_Store_type(mem_op);
288
289
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);
Andreas Fried's avatar
Andreas Fried committed
290
		ir_nodeset_init(&pi.all_visited);
291

292
		parallelize_store(&pi, pred);
293
294
295
296
297
298
	} else if (is_CopyB(mem_op)) {
		if (get_CopyB_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_CopyB_mem(mem_op);

Matthias Braun's avatar
Matthias Braun committed
299
300
301
		pi.origin_block = block;
		pi.origin_type  = get_CopyB_type(mem_op);
		pi.origin_size  = get_type_size_bytes(pi.origin_type);
302
303
304
305
306
307
308
		/* parallelize_copyB uses the node itself, because the
		 * information does not fit in a parallelize_info. */
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);
		ir_nodeset_init(&pi.all_visited);

		parallelize_copyB(&pi, mem_op, pred);
309
310
311
312
	} else {
		return;
	}

Andreas Fried's avatar
Andreas Fried committed
313
	size_t n = ir_nodeset_size(&pi.user_mem);
314
	if (n > 0) { /* nothing happened otherwise */
315
		ir_node **in = XMALLOCN(ir_node*, n+1);
316

Andreas Fried's avatar
Andreas Fried committed
317
		size_t i = 0;
318
319
320
		in[i++] = proj;
		foreach_ir_nodeset(&pi.user_mem, node, iter) {
			in[i++] = node;
321
		}
yb9976's avatar
yb9976 committed
322
		assert(i == n + 1);
Andreas Fried's avatar
Andreas Fried committed
323
		ir_node *sync = new_r_Sync(block, i, in);
324
		free(in);
325
		edges_reroute_except(proj, sync, sync);
326
327
328

		n = ir_nodeset_size(&pi.this_mem);
		if (n == 1) {
329
			sync = ir_nodeset_first(&pi.this_mem);
330
		} else {
331
			in = XMALLOCN(ir_node*, n);
332
			i = 0;
333
334
			foreach_ir_nodeset(&pi.this_mem, node, iter) {
				in[i++] = node;
335
336
			}
			assert(i == n);
337
			sync = new_r_Sync(block, i, in);
338
			free(in);
339
340
341
342
343
344
		}
		set_memop_mem(mem_op, sync);
	}

	ir_nodeset_destroy(&pi.this_mem);
	ir_nodeset_destroy(&pi.user_mem);
Andreas Fried's avatar
Andreas Fried committed
345
	ir_nodeset_destroy(&pi.all_visited);
346
347
}

348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
/*
 * Elimination of unnecessary Sync edges
 *
 * This code removes Sync inputs, where the immediate predecessor is
 * reachable by another one of the Sync's inputs.
 */

typedef struct {
	irg_walk_func *pre;
	irg_walk_func *post;
	irg_walk_func *again;
	ir_nodeset_t *visited;
	void *inner_env;
} dfs_env_t;

/* WARNING: Stops at Phi nodes for now */
static void dfs_step(ir_node *irn, dfs_env_t *env) {
	if (ir_nodeset_contains(env->visited, irn)) {
		if (env->again) env->again(irn, env->inner_env);
		return;
	} else {
		if (env->pre) env->pre(irn, env->inner_env);
		ir_nodeset_insert(env->visited, irn);
	}

	if (is_Proj(irn)) {
		dfs_step(get_Proj_pred(irn), env);
	} else {
376
377
		foreach_irn_in(irn, i, pred) {
			if (get_irn_mode(pred) == mode_M && !is_Phi(pred)) {
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
				dfs_step(pred, env);
			}
		}
	}

	if (env->post) env->post(irn, env->inner_env);
}

/**
 * Performs a DFS beginning at @c irn, but traversing each edge once
 * rather than visiting each node once.
 *
 * When a node is visited again through another edge, the @c again
 * callback is called.
 *
 * WARNING: Because this particular use of DFS needs it, the search
 * stops at Phi nodes.
 */
static void dfs_by_edges_from(ir_node *irn,
			      irg_walk_func *pre, irg_walk_func *post, irg_walk_func *again,
			      void *env)
{
	ir_nodeset_t visited;
	ir_nodeset_init(&visited);

	dfs_env_t dfs_env = {
		pre,
		post,
		again,
		&visited,
		env
	};

	dfs_step(irn, &dfs_env);

	ir_nodeset_destroy(&visited);
}

typedef struct {
	bool is_Sync_pred;
418
	int  sync_pred_count;
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
} sync_pred_info_t;

static void prepare_links_Sync(ir_node *irn, void *e)
{
	struct obstack *obst = (struct obstack*) e;

	sync_pred_info_t *info = obstack_alloc(obst, sizeof(*info));
	info->is_Sync_pred = false;
	info->sync_pred_count = 0;

	set_irn_link(irn, info);
}

static void visit_Sync_pred(ir_node *irn, void *e)
{
	(void)e;

	sync_pred_info_t *info = (sync_pred_info_t*) get_irn_link(irn);

	if (info->is_Sync_pred) {
		info->sync_pred_count++;
	}
}

static void simplify_Sync(ir_node *irn, void *e)
{
	struct obstack *obst = (struct obstack*) e;

447
	if (!is_Sync(irn))
448
449
		return;

450
451
	ir_node **preds   = get_Sync_pred_arr(irn);
	int       n_preds = get_Sync_n_preds(irn);
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496

	/* Mark all direct predecessors */
	for (int i = 0; i < n_preds; i++) {
		sync_pred_info_t *info = (sync_pred_info_t*) get_irn_link(preds[i]);
		info->is_Sync_pred = true;
		info->sync_pred_count--;
	}

	/* Do a DFS from the Sync, stopping at Phi nodes */
	dfs_by_edges_from(irn, visit_Sync_pred, NULL, visit_Sync_pred, NULL);

	/* Find unnecessary preds, reset sync_pred_infos. */
	ir_node **necessary = NEW_ARR_F(ir_node*, 0);
	for (int i = 0; i < n_preds; i++) {
		sync_pred_info_t *info = (sync_pred_info_t*) get_irn_link(preds[i]);

		if (info->sync_pred_count <= 0) {
			ARR_APP1(ir_node*, necessary, preds[i]);
		}

		info->is_Sync_pred = false;
		info->sync_pred_count = 0;
	}

	ir_node *block = get_nodes_block(irn);
	ir_node *new_Sync = new_r_Sync(block, ARR_LEN(necessary), necessary);
	prepare_links_Sync(new_Sync, obst);

	if (irn != new_Sync)
		exchange(irn, new_Sync);

	DEL_ARR_F(necessary);
}

static void eliminate_sync_edges(ir_graph *irg)
{
	struct obstack obst;
	obstack_init(&obst);

	irg_walk_graph(irg, prepare_links_Sync, NULL, &obst);
	irg_walk_graph(irg, simplify_Sync, NULL, &obst);

	obstack_free(&obst, NULL);
}

497
498
void opt_parallelize_mem(ir_graph *irg)
{
499
500
	assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES
	                           | IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE);
501
	irg_walk_blkwise_dom_top_down(irg, NULL, walker, NULL);
502
	ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK);
503
	eliminate_sync_edges(irg);
504
	ir_free_resources(irg, IR_RESOURCE_IRN_LINK);
505
	confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
506
}