arm_optimize.c 6.82 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/*
 * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
 *
 * This file is part of libFirm.
 *
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file LICENSE.GPL included in the
 * packaging of this file.
 *
 * Licensees holding valid libFirm Professional Edition licenses may use
 * this file in accordance with the libFirm Commercial License.
 * Agreement provided with the Software.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

/**
 * @file
 * @brief       Implements several optimizations for ARM.
 * @author      Michael Beck
Michael Beck's avatar
Michael Beck committed
24
 * @version     $Id$
25
26
27
28
29
 */
#include "config.h"

#include "irgmod.h"
#include "ircons.h"
30
#include "iredges.h"
31
32
#include "error.h"

33
#include "benode.h"
34
35
36
37
38
39
#include "bepeephole.h"
#include "besched.h"

#include "arm_optimize.h"
#include "gen_arm_regalloc_if.h"
#include "gen_arm_new_nodes.h"
40
41
#include "arm_nodes_attr.h"
#include "arm_new_nodes.h"
42

43
44
45
static unsigned arm_ror(unsigned v, unsigned ror)
{
	return (v << (32 - ror)) | (v >> ror);
46
47
48
49
50
51
52
53
54
}

/*
 * construct 8bit values and rot amounts for a value.
 */
void arm_gen_vals_from_word(unsigned int value, arm_vals *result)
{
	int initial = 0;

55
	/* TODO: not optimal yet, as we only "shift" the value and don't take advantage of rotations */
56
57

	/* special case: we prefer shift amount 0 */
58
	if (value <= 0xFF) {
59
		result->values[0] = value;
60
		result->rors[0]   = 0;
61
62
63
64
		result->ops       = 1;
		return;
	}

65
66
67
68
69
	result->ops = 0;
	do {
		while ( (value & 0x3) == 0) {
			value  >>= 2;
			initial += 2;
70
71
		}

72
73
74
		result->values[result->ops] = value & 0xFF;
		result->rors[result->ops]   = (32-initial) % 32;
		++result->ops;
75

76
77
		value  >>= 8;
		initial += 8;
78
	} while (value != 0);
79
80
81
}

/**
82
83
 * Returns non.zero if the given offset can be directly encoded into an ARM
 * instruction.
84
 */
85
86
static int allowed_arm_immediate(int offset, arm_vals *result)
{
87
88
89
90
91
92
93
	arm_gen_vals_from_word(offset, result);
	return result->ops <= 1;
}

/**
 * Fix an IncSP node if the offset gets too big
 */
94
95
96
97
static void peephole_be_IncSP(ir_node *node)
{
	ir_node  *first;
	ir_node  *last;
98
	ir_node  *block;
99
100
101
102
103
104
	int       offset;
	int       cnt;
	int       sign = 1;
	arm_vals  v;
	const ir_edge_t *edge;
	const ir_edge_t *next;
105
106

	/* first optimize incsp->incsp combinations */
107
	node = be_peephole_IncSP_IncSP(node);
108
109
110
111
112
113
114
115
116
117

	offset = be_get_IncSP_offset(node);
	/* can be transformed into Add OR Sub */
	if (offset < 0) {
		sign = -1;
		offset = -offset;
	}
	if (allowed_arm_immediate(offset, &v))
		return;

118
	be_set_IncSP_offset(node, sign * arm_ror(v.values[0], v.rors[0]));
119

120
	first = node;
121
122
	block = get_nodes_block(node);
	for (cnt = 1; cnt < v.ops; ++cnt) {
123
		int value = sign * arm_ror(v.values[cnt], v.rors[cnt]);
124
		ir_node *next = be_new_IncSP(&arm_registers[REG_SP], block, node,
125
		                             value, 1);
126
127
128
		sched_add_after(node, next);
		node = next;
	}
129
130
131
132
133
134
135
136
137
138
139

	/* reattach IncSP users */
	last = node;
	node = sched_next(first);
	foreach_out_edge_safe(first, edge, next) {
		ir_node *user = get_edge_src_irn(edge);
		int      pos  = get_edge_src_pos(edge);
		if (user == node)
			continue;
		set_irn_n(user, pos, last);
	}
140
141
142
143
144
145
146
}

/**
 * creates the address by Adds
 */
static ir_node *gen_ptr_add(ir_node *node, ir_node *frame, arm_vals *v)
{
147
	dbg_info *dbgi  = get_irn_dbg_info(node);
148
149
150
151
	ir_node  *block = get_nodes_block(node);
	int     cnt;
	ir_node *ptr;

152
	ptr = new_bd_arm_Add_imm(dbgi, block, frame, v->values[0], v->rors[0]);
153
	arch_set_irn_register(ptr, &arm_registers[REG_R12]);
154
155
156
	sched_add_before(node, ptr);

	for (cnt = 1; cnt < v->ops; ++cnt) {
157
158
		ir_node *next = new_bd_arm_Add_imm(dbgi, block, ptr, v->values[cnt],
		                                   v->rors[cnt]);
159
		arch_set_irn_register(next, &arm_registers[REG_R12]);
160
161
162
163
164
165
166
167
168
169
170
		sched_add_before(node, next);
		ptr = next;
	}
	return ptr;
}

/**
* creates the address by Subs
*/
static ir_node *gen_ptr_sub(ir_node *node, ir_node *frame, arm_vals *v)
{
171
	dbg_info *dbgi  = get_irn_dbg_info(node);
172
173
174
175
	ir_node  *block = get_nodes_block(node);
	int     cnt;
	ir_node *ptr;

176
	ptr = new_bd_arm_Sub_imm(dbgi, block, frame, v->values[0], v->rors[0]);
177
	arch_set_irn_register(ptr, &arm_registers[REG_R12]);
178
179
180
	sched_add_before(node, ptr);

	for (cnt = 1; cnt < v->ops; ++cnt) {
181
182
		ir_node *next = new_bd_arm_Sub_imm(dbgi, block, ptr, v->values[cnt],
		                                   v->rors[cnt]);
183
		arch_set_irn_register(next, &arm_registers[REG_R12]);
184
185
186
187
188
189
		sched_add_before(node, next);
		ptr = next;
	}
	return ptr;
}

190
191
192
193
194
195
196
197
/** fix frame addresses which are too big */
static void peephole_arm_FrameAddr(ir_node *node)
{
	arm_SymConst_attr_t *attr   = get_arm_SymConst_attr(node);
	int                  offset = attr->fp_offset;
	arm_vals             v;
	ir_node             *base;
	ir_node             *ptr;
198
199
200
201

	if (allowed_arm_immediate(offset, &v))
		return;

202
203
204
	base = get_irn_n(node, n_arm_FrameAddr_base);
	/* TODO: suboptimal */
	ptr = gen_ptr_add(node, base, &v);
205

206
207
	attr->fp_offset = 0;
	set_irn_n(node, n_arm_FrameAddr_base, ptr);
208
209
210
}

/**
211
 * Fix stackpointer relative stores if the offset gets too big
212
 */
213
214
215
216
217
218
219
static void peephole_arm_Str_Ldr(ir_node *node)
{
	arm_load_store_attr_t *attr    = get_arm_load_store_attr(node);
	int                    offset  = attr->offset;
	int                    use_add = 1;
	ir_node               *ptr;
	arm_vals              v;
220
221
222

	if (allowed_arm_immediate(offset, &v))
		return;
223
224
225
226
227
228

	/* we should only have too big offsets for frame entities */
	if (!attr->is_frame_entity) {
		fprintf(stderr,
		        "POSSIBLE ARM BACKEND PROBLEM: offset in Store too big\n");
	}
229
230
231
232
233
	if (offset < 0) {
		use_add = 0;
		offset = -offset;
	}

234
235
	if (is_arm_Str(node)) {
		ptr = get_irn_n(node, n_arm_Str_ptr);
236
	} else {
237
238
		assert(is_arm_Ldr(node));
		ptr = get_irn_n(node, n_arm_Ldr_ptr);
239
240
	}

241
242
	if (use_add) {
		ptr = gen_ptr_add(node, ptr, &v);
243
	} else {
244
		ptr = gen_ptr_sub(node, ptr, &v);
245
246
	}

247
248
249
250
251
252
253
254
255
	/* TODO: sub-optimal, the last offset could probably be left inside the
	   store */
	if (is_arm_Str(node)) {
		set_irn_n(node, n_arm_Str_ptr, ptr);
	} else {
		assert(is_arm_Ldr(node));
		set_irn_n(node, n_arm_Ldr_ptr, ptr);
	}
	attr->offset = 0;
256
257
258
259
260
}

/**
 * Register a peephole optimization function.
 */
261
262
static void register_peephole_optimisation(ir_op *op, peephole_opt_func func)
{
263
264
265
266
267
	assert(op->ops.generic == NULL);
	op->ops.generic = (op_func)func;
}

/* Perform peephole-optimizations. */
268
void arm_peephole_optimization(ir_graph *irg)
269
270
271
{
	/* register peephole optimizations */
	clear_irp_opcodes_generic_func();
272
273
274
275
	register_peephole_optimisation(op_be_IncSP,      peephole_be_IncSP);
	register_peephole_optimisation(op_arm_Str,       peephole_arm_Str_Ldr);
	register_peephole_optimisation(op_arm_Ldr,       peephole_arm_Str_Ldr);
	register_peephole_optimisation(op_arm_FrameAddr, peephole_arm_FrameAddr);
276

277
	be_peephole_opt(irg);
278
}