parallelize_mem.c 5.69 KB
Newer Older
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
4
5
6
7
8
9
10
11
12
 */

/**
 * @file
 * @brief   parallelizing Load/Store optimisation
 * @author  Christoph Mallon
 */
#include "config.h"

13
14
#include "iroptimize.h"

15
#include "array_t.h"
16
17
#include "debug.h"
#include "ircons.h"
18
#include "irgraph_t.h"
19
20
21
22
23
24
25
#include "irgmod.h"
#include "irgopt.h"
#include "irgwalk.h"
#include "irmemory.h"
#include "irnode.h"
#include "irnodeset.h"
#include "obst.h"
26
#include "irdump.h"
Michael Beck's avatar
Michael Beck committed
27
#include "irflag_t.h"
28
#include "irpass.h"
29
#include "iredges.h"
30

31
typedef struct parallelize_info
32
33
34
35
36
37
{
	ir_node      *origin_block;
	ir_node      *origin_ptr;
	ir_mode      *origin_mode;
	ir_nodeset_t  this_mem;
	ir_nodeset_t  user_mem;
38
} parallelize_info;
39

40
static void parallelize_load(parallelize_info *pi, ir_node *irn)
41
{
42
43
44
45
	/* There is no point in investigating the same subgraph twice */
	if (ir_nodeset_contains(&pi->user_mem, irn))
		return;

46
47
48
49
50
51
52
53
	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
			if (is_Load(pred) &&
					get_Load_volatility(pred) == volatility_non_volatile) {
				ir_node *mem = get_Load_mem(pred);
				//ir_nodeset_insert(&pi->this_mem, mem);
				ir_nodeset_insert(&pi->user_mem, irn);
54
				parallelize_load(pi, mem);
55
56
57
58
59
60
61
				return;
			} else if (is_Store(pred) &&
					get_Store_volatility(pred) == volatility_non_volatile) {
				ir_mode *org_mode   = pi->origin_mode;
				ir_node *org_ptr    = pi->origin_ptr;
				ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
				ir_node *store_ptr  = get_Store_ptr(pred);
62
				if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
63
64
					ir_node *mem = get_Store_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
65
					parallelize_load(pi, mem);
66
67
68
69
70
71
72
73
74
					return;
				}
			}
		} else if (is_Sync(irn)) {
			int n = get_Sync_n_preds(irn);
			int i;

			for (i = 0; i < n; ++i) {
				ir_node *sync_pred = get_Sync_pred(irn, i);
75
				parallelize_load(pi, sync_pred);
76
77
78
79
80
81
82
			}
			return;
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

83
static void parallelize_store(parallelize_info *pi, ir_node *irn)
84
{
85
86
87
88
	/* There is no point in investigating the same subgraph twice */
	if (ir_nodeset_contains(&pi->user_mem, irn))
		return;

89
90
91
92
93
94
95
96
97
	if (get_nodes_block(irn) == pi->origin_block) {
		if (is_Proj(irn)) {
			ir_node *pred = get_Proj_pred(irn);
			if (is_Load(pred) &&
					get_Load_volatility(pred) == volatility_non_volatile) {
				ir_mode *org_mode  = pi->origin_mode;
				ir_node *org_ptr   = pi->origin_ptr;
				ir_mode *load_mode = get_Load_mode(pred);
				ir_node *load_ptr  = get_Load_ptr(pred);
98
				if (get_alias_relation(org_ptr, org_mode, load_ptr, load_mode) == ir_no_alias) {
99
100
					ir_node *mem = get_Load_mem(pred);
					ir_nodeset_insert(&pi->user_mem, irn);
101
					parallelize_store(pi, mem);
102
103
104
105
106
107
108
109
					return;
				}
			} else if (is_Store(pred) &&
					get_Store_volatility(pred) == volatility_non_volatile) {
				ir_mode *org_mode   = pi->origin_mode;
				ir_node *org_ptr    = pi->origin_ptr;
				ir_mode *store_mode = get_irn_mode(get_Store_value(pred));
				ir_node *store_ptr  = get_Store_ptr(pred);
110
				if (get_alias_relation(org_ptr, org_mode, store_ptr, store_mode) == ir_no_alias) {
Michael Beck's avatar
Michael Beck committed
111
112
					ir_node *mem;

113
					ir_nodeset_insert(&pi->user_mem, irn);
Michael Beck's avatar
Michael Beck committed
114
					mem = get_Store_mem(pred);
115
					parallelize_store(pi, mem);
116
117
118
119
120
121
122
123
124
					return;
				}
			}
		} else if (is_Sync(irn)) {
			int n = get_Sync_n_preds(irn);
			int i;

			for (i = 0; i < n; ++i) {
				ir_node *sync_pred = get_Sync_pred(irn, i);
125
				parallelize_store(pi, sync_pred);
126
127
128
129
130
131
132
133
134
135
136
137
			}
			return;
		}
	}
	ir_nodeset_insert(&pi->this_mem, irn);
}

static void walker(ir_node *proj, void *env)
{
	ir_node          *mem_op;
	ir_node          *pred;
	ir_node          *block;
138
	size_t            n;
139
	parallelize_info  pi;
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

	(void)env;

	if (!is_Proj(proj)) return;
	if (get_irn_mode(proj) != mode_M) return;

	mem_op = get_Proj_pred(proj);
	if (is_Load(mem_op)) {
		if (get_Load_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_Load_mem(mem_op);

		pi.origin_block = block,
		pi.origin_ptr   = get_Load_ptr(mem_op);
		pi.origin_mode  = get_Load_mode(mem_op);
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);

159
		parallelize_load(&pi, pred);
160
161
162
163
164
165
166
167
168
169
170
171
	} else if (is_Store(mem_op)) {
		if (get_Store_volatility(mem_op) != volatility_non_volatile) return;

		block = get_nodes_block(mem_op);
		pred  = get_Store_mem(mem_op);

		pi.origin_block = block,
		pi.origin_ptr   = get_Store_ptr(mem_op);
		pi.origin_mode  = get_irn_mode(get_Store_value(mem_op));
		ir_nodeset_init(&pi.this_mem);
		ir_nodeset_init(&pi.user_mem);

172
		parallelize_store(&pi, pred);
173
174
175
176
177
	} else {
		return;
	}

	n = ir_nodeset_size(&pi.user_mem);
178
	if (n > 0) { /* nothing happened otherwise */
179
180
181
		ir_node  *sync;
		ir_node **in   = XMALLOCN(ir_node*, n+1);
		size_t    i;
182
183

		i = 0;
184
185
186
		in[i++] = proj;
		foreach_ir_nodeset(&pi.user_mem, node, iter) {
			in[i++] = node;
187
		}
188
189
190
191
		assert(i == n+1);
		sync = new_r_Sync(block, i, in);
		xfree(in);
		edges_reroute_except(proj, sync, sync);
192
193
194

		n = ir_nodeset_size(&pi.this_mem);
		if (n == 1) {
195
			sync = ir_nodeset_first(&pi.this_mem);
196
		} else {
197
			in = XMALLOCN(ir_node*, n);
198
			i = 0;
199
200
			foreach_ir_nodeset(&pi.this_mem, node, iter) {
				in[i++] = node;
201
202
			}
			assert(i == n);
203
			sync = new_r_Sync(block, i, in);
204
205
206
207
208
209
210
211
		}
		set_memop_mem(mem_op, sync);
	}

	ir_nodeset_destroy(&pi.this_mem);
	ir_nodeset_destroy(&pi.user_mem);
}

212
213
void opt_parallelize_mem(ir_graph *irg)
{
214
	assure_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_OUT_EDGES);
215
216
	irg_walk_graph(irg, NULL, walker, NULL);
	confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_CONTROL_FLOW);
217
}
Michael Beck's avatar
Michael Beck committed
218

219
ir_graph_pass_t *opt_parallelize_mem_pass(const char *name)
Michael Beck's avatar
Michael Beck committed
220
{
221
	return def_graph_pass(name ? name : "parallelize-mem", opt_parallelize_mem);
Michael Beck's avatar
Michael Beck committed
222
}