arm_optimize.c 6.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
 * @brief       Implements several optimizations for ARM.
 * @author      Michael Beck
 */
#include "config.h"

#include "irgmod.h"
#include "ircons.h"
29
#include "iredges.h"
30
31
#include "error.h"

32
#include "benode.h"
33
34
35
36
37
38
#include "bepeephole.h"
#include "besched.h"

#include "arm_optimize.h"
#include "gen_arm_regalloc_if.h"
#include "gen_arm_new_nodes.h"
39
40
#include "arm_nodes_attr.h"
#include "arm_new_nodes.h"
41

42
43
44
static unsigned arm_ror(unsigned v, unsigned ror)
{
	return (v << (32 - ror)) | (v >> ror);
45
46
47
48
49
50
51
52
53
}

/*
 * construct 8bit values and rot amounts for a value.
 */
void arm_gen_vals_from_word(unsigned int value, arm_vals *result)
{
	int initial = 0;

54
	/* TODO: not optimal yet, as we only "shift" the value and don't take advantage of rotations */
55
56

	/* special case: we prefer shift amount 0 */
57
	if (value <= 0xFF) {
58
		result->values[0] = value;
59
		result->rors[0]   = 0;
60
61
62
63
		result->ops       = 1;
		return;
	}

64
65
66
67
68
	result->ops = 0;
	do {
		while ( (value & 0x3) == 0) {
			value  >>= 2;
			initial += 2;
69
70
		}

71
72
73
		result->values[result->ops] = value & 0xFF;
		result->rors[result->ops]   = (32-initial) % 32;
		++result->ops;
74

75
76
		value  >>= 8;
		initial += 8;
77
	} while (value != 0);
78
79
80
}

/**
81
82
 * Returns non.zero if the given offset can be directly encoded into an ARM
 * instruction.
83
 */
84
85
static int allowed_arm_immediate(int offset, arm_vals *result)
{
86
87
88
89
90
91
92
	arm_gen_vals_from_word(offset, result);
	return result->ops <= 1;
}

/**
 * Fix an IncSP node if the offset gets too big
 */
93
94
95
96
static void peephole_be_IncSP(ir_node *node)
{
	ir_node  *first;
	ir_node  *last;
97
	ir_node  *block;
98
99
100
101
102
103
	int       offset;
	int       cnt;
	int       sign = 1;
	arm_vals  v;
	const ir_edge_t *edge;
	const ir_edge_t *next;
104
105

	/* first optimize incsp->incsp combinations */
106
	node = be_peephole_IncSP_IncSP(node);
107
108
109
110
111
112
113
114
115
116

	offset = be_get_IncSP_offset(node);
	/* can be transformed into Add OR Sub */
	if (offset < 0) {
		sign = -1;
		offset = -offset;
	}
	if (allowed_arm_immediate(offset, &v))
		return;

117
	be_set_IncSP_offset(node, sign * arm_ror(v.values[0], v.rors[0]));
118

119
	first = node;
120
121
	block = get_nodes_block(node);
	for (cnt = 1; cnt < v.ops; ++cnt) {
122
123
		int      value = sign * arm_ror(v.values[cnt], v.rors[cnt]);
		ir_node *incsp = be_new_IncSP(&arm_registers[REG_SP], block, node,
124
		                             value, 1);
125
126
		sched_add_after(node, incsp);
		node = incsp;
127
	}
128
129
130
131
132
133
134
135
136
137
138

	/* reattach IncSP users */
	last = node;
	node = sched_next(first);
	foreach_out_edge_safe(first, edge, next) {
		ir_node *user = get_edge_src_irn(edge);
		int      pos  = get_edge_src_pos(edge);
		if (user == node)
			continue;
		set_irn_n(user, pos, last);
	}
139
140
141
142
143
144
145
}

/**
 * creates the address by Adds
 */
static ir_node *gen_ptr_add(ir_node *node, ir_node *frame, arm_vals *v)
{
146
	dbg_info *dbgi  = get_irn_dbg_info(node);
147
148
149
150
	ir_node  *block = get_nodes_block(node);
	int     cnt;
	ir_node *ptr;

151
	ptr = new_bd_arm_Add_imm(dbgi, block, frame, v->values[0], v->rors[0]);
152
	arch_set_irn_register(ptr, &arm_registers[REG_R12]);
153
154
155
	sched_add_before(node, ptr);

	for (cnt = 1; cnt < v->ops; ++cnt) {
156
157
		ir_node *next = new_bd_arm_Add_imm(dbgi, block, ptr, v->values[cnt],
		                                   v->rors[cnt]);
158
		arch_set_irn_register(next, &arm_registers[REG_R12]);
159
160
161
162
163
164
165
166
167
168
169
		sched_add_before(node, next);
		ptr = next;
	}
	return ptr;
}

/**
* creates the address by Subs
*/
static ir_node *gen_ptr_sub(ir_node *node, ir_node *frame, arm_vals *v)
{
170
	dbg_info *dbgi  = get_irn_dbg_info(node);
171
172
173
174
	ir_node  *block = get_nodes_block(node);
	int     cnt;
	ir_node *ptr;

175
	ptr = new_bd_arm_Sub_imm(dbgi, block, frame, v->values[0], v->rors[0]);
176
	arch_set_irn_register(ptr, &arm_registers[REG_R12]);
177
178
179
	sched_add_before(node, ptr);

	for (cnt = 1; cnt < v->ops; ++cnt) {
180
181
		ir_node *next = new_bd_arm_Sub_imm(dbgi, block, ptr, v->values[cnt],
		                                   v->rors[cnt]);
182
		arch_set_irn_register(next, &arm_registers[REG_R12]);
183
184
185
186
187
188
		sched_add_before(node, next);
		ptr = next;
	}
	return ptr;
}

189
190
191
192
193
194
195
196
/** fix frame addresses which are too big */
static void peephole_arm_FrameAddr(ir_node *node)
{
	arm_SymConst_attr_t *attr   = get_arm_SymConst_attr(node);
	int                  offset = attr->fp_offset;
	arm_vals             v;
	ir_node             *base;
	ir_node             *ptr;
197
198
199
200

	if (allowed_arm_immediate(offset, &v))
		return;

201
202
203
	base = get_irn_n(node, n_arm_FrameAddr_base);
	/* TODO: suboptimal */
	ptr = gen_ptr_add(node, base, &v);
204

205
206
	attr->fp_offset = 0;
	set_irn_n(node, n_arm_FrameAddr_base, ptr);
207
208
209
}

/**
210
 * Fix stackpointer relative stores if the offset gets too big
211
 */
212
213
214
215
216
217
218
static void peephole_arm_Str_Ldr(ir_node *node)
{
	arm_load_store_attr_t *attr    = get_arm_load_store_attr(node);
	int                    offset  = attr->offset;
	int                    use_add = 1;
	ir_node               *ptr;
	arm_vals              v;
219
220
221

	if (allowed_arm_immediate(offset, &v))
		return;
222
223
224
225
226
227

	/* we should only have too big offsets for frame entities */
	if (!attr->is_frame_entity) {
		fprintf(stderr,
		        "POSSIBLE ARM BACKEND PROBLEM: offset in Store too big\n");
	}
228
229
230
231
232
	if (offset < 0) {
		use_add = 0;
		offset = -offset;
	}

233
234
	if (is_arm_Str(node)) {
		ptr = get_irn_n(node, n_arm_Str_ptr);
235
	} else {
236
237
		assert(is_arm_Ldr(node));
		ptr = get_irn_n(node, n_arm_Ldr_ptr);
238
239
	}

240
241
	if (use_add) {
		ptr = gen_ptr_add(node, ptr, &v);
242
	} else {
243
		ptr = gen_ptr_sub(node, ptr, &v);
244
245
	}

246
247
248
249
250
251
252
253
254
	/* TODO: sub-optimal, the last offset could probably be left inside the
	   store */
	if (is_arm_Str(node)) {
		set_irn_n(node, n_arm_Str_ptr, ptr);
	} else {
		assert(is_arm_Ldr(node));
		set_irn_n(node, n_arm_Ldr_ptr, ptr);
	}
	attr->offset = 0;
255
256
257
258
259
}

/**
 * Register a peephole optimization function.
 */
260
261
static void register_peephole_optimisation(ir_op *op, peephole_opt_func func)
{
262
263
264
265
266
	assert(op->ops.generic == NULL);
	op->ops.generic = (op_func)func;
}

/* Perform peephole-optimizations. */
267
void arm_peephole_optimization(ir_graph *irg)
268
269
270
{
	/* register peephole optimizations */
	clear_irp_opcodes_generic_func();
271
272
273
274
	register_peephole_optimisation(op_be_IncSP,      peephole_be_IncSP);
	register_peephole_optimisation(op_arm_Str,       peephole_arm_Str_Ldr);
	register_peephole_optimisation(op_arm_Ldr,       peephole_arm_Str_Ldr);
	register_peephole_optimisation(op_arm_FrameAddr, peephole_arm_FrameAddr);
275

276
	be_peephole_opt(irg);
277
}