arm_optimize.c 6.87 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/*
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
 * @brief       Implements several optimizations for ARM.
 * @author      Michael Beck
 * @version     $Id: $
 */
#include "config.h"

#include "irgmod.h"
#include "ircons.h"
30
#include "iredges.h"
31
32
#include "error.h"

33
#include "benode.h"
34
35
36
37
38
39
#include "bepeephole.h"
#include "besched.h"

#include "arm_optimize.h"
#include "gen_arm_regalloc_if.h"
#include "gen_arm_new_nodes.h"
40
41
#include "arm_nodes_attr.h"
#include "arm_new_nodes.h"
42
43
44

static arm_code_gen_t  *cg;

45
46
47
static unsigned arm_ror(unsigned v, unsigned ror)
{
	return (v << (32 - ror)) | (v >> ror);
48
49
50
51
52
53
54
55
56
}

/*
 * construct 8bit values and rot amounts for a value.
 */
void arm_gen_vals_from_word(unsigned int value, arm_vals *result)
{
	int initial = 0;

57
	/* TODO: not optimal yet, as we only "shift" the value and don't take advantage of rotations */
58
59

	/* special case: we prefer shift amount 0 */
60
	if (value <= 0xFF) {
61
		result->values[0] = value;
62
		result->rors[0]   = 0;
63
64
65
66
		result->ops       = 1;
		return;
	}

67
68
69
70
71
	result->ops = 0;
	do {
		while ( (value & 0x3) == 0) {
			value  >>= 2;
			initial += 2;
72
73
		}

74
75
76
		result->values[result->ops] = value & 0xFF;
		result->rors[result->ops]   = (32-initial) % 32;
		++result->ops;
77

78
79
		value  >>= 8;
		initial += 8;
80
	} while (value != 0);
81
82
83
}

/**
84
85
 * Returns non.zero if the given offset can be directly encoded into an ARM
 * instruction.
86
 */
87
88
static int allowed_arm_immediate(int offset, arm_vals *result)
{
89
90
91
92
93
94
95
	arm_gen_vals_from_word(offset, result);
	return result->ops <= 1;
}

/**
 * Fix an IncSP node if the offset gets too big
 */
96
97
98
99
static void peephole_be_IncSP(ir_node *node)
{
	ir_node  *first;
	ir_node  *last;
100
	ir_node  *block;
101
102
103
104
105
106
	int       offset;
	int       cnt;
	int       sign = 1;
	arm_vals  v;
	const ir_edge_t *edge;
	const ir_edge_t *next;
107
108

	/* first optimize incsp->incsp combinations */
109
	node = be_peephole_IncSP_IncSP(node);
110
111
112
113
114
115
116
117
118
119

	offset = be_get_IncSP_offset(node);
	/* can be transformed into Add OR Sub */
	if (offset < 0) {
		sign = -1;
		offset = -offset;
	}
	if (allowed_arm_immediate(offset, &v))
		return;

120
	be_set_IncSP_offset(node, sign * arm_ror(v.values[0], v.rors[0]));
121

122
	first = node;
123
124
	block = get_nodes_block(node);
	for (cnt = 1; cnt < v.ops; ++cnt) {
125
126
127
		int value = sign * arm_ror(v.values[cnt], v.rors[cnt]);
		ir_node *next = be_new_IncSP(&arm_gp_regs[REG_SP], block, node,
		                             value, 1);
128
129
130
		sched_add_after(node, next);
		node = next;
	}
131
132
133
134
135
136
137
138
139
140
141

	/* reattach IncSP users */
	last = node;
	node = sched_next(first);
	foreach_out_edge_safe(first, edge, next) {
		ir_node *user = get_edge_src_irn(edge);
		int      pos  = get_edge_src_pos(edge);
		if (user == node)
			continue;
		set_irn_n(user, pos, last);
	}
142
143
144
145
146
147
148
}

/**
 * creates the address by Adds
 */
static ir_node *gen_ptr_add(ir_node *node, ir_node *frame, arm_vals *v)
{
149
	dbg_info *dbgi  = get_irn_dbg_info(node);
150
151
152
153
	ir_node  *block = get_nodes_block(node);
	int     cnt;
	ir_node *ptr;

154
	ptr = new_bd_arm_Add_imm(dbgi, block, frame, v->values[0], v->rors[0]);
155
	arch_set_irn_register(ptr, &arm_gp_regs[REG_R12]);
156
157
158
	sched_add_before(node, ptr);

	for (cnt = 1; cnt < v->ops; ++cnt) {
159
160
		ir_node *next = new_bd_arm_Add_imm(dbgi, block, ptr, v->values[cnt],
		                                   v->rors[cnt]);
161
		arch_set_irn_register(next, &arm_gp_regs[REG_R12]);
162
163
164
165
166
167
168
169
170
171
172
		sched_add_before(node, next);
		ptr = next;
	}
	return ptr;
}

/**
* creates the address by Subs
*/
static ir_node *gen_ptr_sub(ir_node *node, ir_node *frame, arm_vals *v)
{
173
	dbg_info *dbgi  = get_irn_dbg_info(node);
174
175
176
177
	ir_node  *block = get_nodes_block(node);
	int     cnt;
	ir_node *ptr;

178
	ptr = new_bd_arm_Sub_imm(dbgi, block, frame, v->values[0], v->rors[0]);
179
	arch_set_irn_register(ptr, &arm_gp_regs[REG_R12]);
180
181
182
	sched_add_before(node, ptr);

	for (cnt = 1; cnt < v->ops; ++cnt) {
183
184
		ir_node *next = new_bd_arm_Sub_imm(dbgi, block, ptr, v->values[cnt],
		                                   v->rors[cnt]);
185
		arch_set_irn_register(next, &arm_gp_regs[REG_R12]);
186
187
188
189
190
191
		sched_add_before(node, next);
		ptr = next;
	}
	return ptr;
}

192
193
194
195
196
197
198
199
/** fix frame addresses which are too big */
static void peephole_arm_FrameAddr(ir_node *node)
{
	arm_SymConst_attr_t *attr   = get_arm_SymConst_attr(node);
	int                  offset = attr->fp_offset;
	arm_vals             v;
	ir_node             *base;
	ir_node             *ptr;
200
201
202
203

	if (allowed_arm_immediate(offset, &v))
		return;

204
205
206
	base = get_irn_n(node, n_arm_FrameAddr_base);
	/* TODO: suboptimal */
	ptr = gen_ptr_add(node, base, &v);
207

208
209
	attr->fp_offset = 0;
	set_irn_n(node, n_arm_FrameAddr_base, ptr);
210
211
212
}

/**
213
 * Fix stackpointer relative stores if the offset gets too big
214
 */
215
216
217
218
219
220
221
static void peephole_arm_Str_Ldr(ir_node *node)
{
	arm_load_store_attr_t *attr    = get_arm_load_store_attr(node);
	int                    offset  = attr->offset;
	int                    use_add = 1;
	ir_node               *ptr;
	arm_vals              v;
222
223
224

	if (allowed_arm_immediate(offset, &v))
		return;
225
226
227
228
229
230

	/* we should only have too big offsets for frame entities */
	if (!attr->is_frame_entity) {
		fprintf(stderr,
		        "POSSIBLE ARM BACKEND PROBLEM: offset in Store too big\n");
	}
231
232
233
234
235
	if (offset < 0) {
		use_add = 0;
		offset = -offset;
	}

236
237
	if (is_arm_Str(node)) {
		ptr = get_irn_n(node, n_arm_Str_ptr);
238
	} else {
239
240
		assert(is_arm_Ldr(node));
		ptr = get_irn_n(node, n_arm_Ldr_ptr);
241
242
	}

243
244
	if (use_add) {
		ptr = gen_ptr_add(node, ptr, &v);
245
	} else {
246
		ptr = gen_ptr_sub(node, ptr, &v);
247
248
	}

249
250
251
252
253
254
255
256
257
	/* TODO: sub-optimal, the last offset could probably be left inside the
	   store */
	if (is_arm_Str(node)) {
		set_irn_n(node, n_arm_Str_ptr, ptr);
	} else {
		assert(is_arm_Ldr(node));
		set_irn_n(node, n_arm_Ldr_ptr, ptr);
	}
	attr->offset = 0;
258
259
260
261
262
}

/**
 * Register a peephole optimization function.
 */
263
264
static void register_peephole_optimisation(ir_op *op, peephole_opt_func func)
{
265
266
267
268
269
270
271
	assert(op->ops.generic == NULL);
	op->ops.generic = (op_func)func;
}

/* Perform peephole-optimizations. */
void arm_peephole_optimization(arm_code_gen_t *new_cg)
{
272
	cg = new_cg;
273
274
275

	/* register peephole optimizations */
	clear_irp_opcodes_generic_func();
276
277
278
279
	register_peephole_optimisation(op_be_IncSP,      peephole_be_IncSP);
	register_peephole_optimisation(op_arm_Str,       peephole_arm_Str_Ldr);
	register_peephole_optimisation(op_arm_Ldr,       peephole_arm_Str_Ldr);
	register_peephole_optimisation(op_arm_FrameAddr, peephole_arm_FrameAddr);
280
281
282

	be_peephole_opt(cg->birg);
}