irarch.c 28.7 KB
Newer Older
Christian Würdig's avatar
Christian Würdig committed
1
2
/*
 * This file is part of libFirm.
3
 * Copyright (C) 2012 University of Karlsruhe.
Christian Würdig's avatar
Christian Würdig committed
4
5
 */

Michael Beck's avatar
Michael Beck committed
6
/**
Matthias Braun's avatar
Matthias Braun committed
7
8
9
10
 * @file
 * @brief   Machine dependent Firm optimizations.
 * @date    28.9.2004
 * @author  Sebastian Hack, Michael Beck
11
 *
12
13
 * Implements "Strength Reduction of Multiplications by Integer Constants"
 * by Youfeng Wu.
14
 * Implements Division and Modulo by Consts from "Hackers Delight",
Michael Beck's avatar
Michael Beck committed
15
 */
16
#include <stdlib.h>
17
18
19
20
21
22
23
24
#include <assert.h>

#include "irnode_t.h"
#include "irgraph_t.h"
#include "irmode_t.h"
#include "iropt_t.h"
#include "ircons_t.h"
#include "irgmod.h"
25
#include "irverify.h"
Michael Beck's avatar
Michael Beck committed
26
#include "tv_t.h"
27
28
29
#include "dbginfo_t.h"
#include "iropt_dbg.h"
#include "irflag_t.h"
30
#include "irhooks.h"
31
#include "ircons.h"
32
#include "irarch_t.h"
33
#include "irflag.h"
34
#include "be.h"
Matthias Braun's avatar
Matthias Braun committed
35
#include "panic.h"
36
37
38
39

/** The bit mask, which optimizations to apply. */
static arch_dep_opts_t opts;

40
41
void arch_dep_set_opts(arch_dep_opts_t the_opts)
{
42
	opts = the_opts;
43
44
}

45
/** check, whether a mode allows a Mulh instruction. */
46
static int allow_Mulh(const ir_settings_arch_dep_t *params, ir_mode *mode)
47
{
48
49
	if (get_mode_size_bits(mode) > params->max_bits_for_mulh)
		return 0;
Christoph Mallon's avatar
Christoph Mallon committed
50
	return mode_is_signed(mode) ? params->allow_mulhs : params->allow_mulhu;
51
52
}

53
54
55
56
57
/**
 * An instruction,
 */
typedef struct instruction instruction;
struct instruction {
Matthias Braun's avatar
Matthias Braun committed
58
59
60
61
62
	insn_kind    kind;        /**< the instruction kind */
	instruction *in[2];       /**< the ins */
	unsigned     shift_count; /**< shift count for LEA and SHIFT */
	ir_node     *irn;         /**< the generated node for this instruction */
	int          costs;       /**< the costs for this instruction */
63
64
65
66
67
};

/**
 * The environment for the strength reduction of multiplications.
 */
68
typedef struct mul_env {
Matthias Braun's avatar
Matthias Braun committed
69
	struct obstack                obst;
70
	const ir_settings_arch_dep_t *params;
Matthias Braun's avatar
Matthias Braun committed
71
72
73
74
75
76
77
78
79
80
81
	ir_mode     *mode;     /**< the mode of the multiplication constant */
	unsigned     bits;     /**< number of bits in the mode */
	unsigned     max_S;    /**< the maximum LEA shift value. */
	instruction *root;     /**< the root of the instruction tree */
	ir_node     *op;       /**< the operand that is multiplied */
	ir_node     *blk;      /**< the block where the new graph is built */
	ir_graph    *irg;
	dbg_info    *dbg;      /**< the debug info for the new graph. */
	ir_mode     *shf_mode; /**< the (unsigned) mode for the shift constants */
	bool         fail;     /**< set if the insn sequence fails constraints */
	int          n_shift;  /**< maximum number of allowed shift instructions */
82
83
84
85
86

	evaluate_costs_func evaluate;  /**< the evaluate callback */
} mul_env;

/**
Michael Beck's avatar
Michael Beck committed
87
88
 * Some kind of default evaluator. Return the cost of
 * instructions.
89
 */
90
static int default_evaluate(insn_kind kind, const ir_mode *mode, ir_tarval *tv)
91
{
Matthias Braun's avatar
Matthias Braun committed
92
93
	(void)mode;
	(void)tv;
94
95
96
97
98
99
100
101
	if (kind == MUL)
		return 13;
	return 1;
}

/**
 * emit a LEA (or an Add) instruction
 */
Matthias Braun's avatar
Matthias Braun committed
102
103
static instruction *emit_LEA(mul_env *env, instruction *a, instruction *b,
                             unsigned shift)
104
{
105
	instruction *res = OALLOC(&env->obst, instruction);
106
107
108
109
110
111
112
113
114
115
	res->kind = shift > 0 ? LEA : ADD;
	res->in[0] = a;
	res->in[1] = b;
	res->shift_count = shift;
	res->irn = NULL;
	res->costs = -1;
	return res;
}

/**
Michael Beck's avatar
Michael Beck committed
116
 * emit a SHIFT (or an Add or a Zero) instruction
117
 */
118
119
static instruction *emit_SHIFT(mul_env *env, instruction *a, unsigned shift)
{
120
	instruction *res = OALLOC(&env->obst, instruction);
Michael Beck's avatar
Michael Beck committed
121
122
123
124
125
126
127
	if (shift == env->bits) {
		/* a 2^bits with bits resolution is a zero */
		res->kind = ZERO;
		res->in[0] = NULL;
		res->in[1] = NULL;
		res->shift_count = 0;
	} else if (shift != 1) {
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
		res->kind = SHIFT;
		res->in[0] = a;
		res->in[1] = NULL;
		res->shift_count = shift;
	} else {
		res->kind = ADD;
		res->in[0] = a;
		res->in[1] = a;
		res->shift_count = 0;
	}
	res->irn = NULL;
	res->costs = -1;
	return res;
}

/**
 * emit a SUB instruction
 */
146
147
static instruction *emit_SUB(mul_env *env, instruction *a, instruction *b)
{
148
	instruction *res = OALLOC(&env->obst, instruction);
149
150
151
152
153
154
155
156
157
158
159
160
	res->kind = SUB;
	res->in[0] = a;
	res->in[1] = b;
	res->shift_count = 0;
	res->irn = NULL;
	res->costs = -1;
	return res;
}

/**
 * emit the ROOT instruction
 */
161
162
static instruction *emit_ROOT(mul_env *env, ir_node *root_op)
{
163
	instruction *res = OALLOC(&env->obst, instruction);
164
165
166
167
168
169
170
171
172
173
174
175
	res->kind = ROOT;
	res->in[0] = NULL;
	res->in[1] = NULL;
	res->shift_count = 0;
	res->irn = root_op;
	res->costs = 0;
	return res;
}

/**
 * Returns the condensed representation of the tarval tv
 */
Matthias Braun's avatar
Matthias Braun committed
176
static unsigned char *value_to_condensed(mul_env *env, ir_tarval *tv, int *pr)
177
{
178
179
180
	ir_mode       *mode = get_tarval_mode(tv);
	unsigned       bits = get_mode_size_bits(mode);
	unsigned char *R    = OALLOCN(&env->obst, unsigned char, bits);
181

Matthias Braun's avatar
Matthias Braun committed
182
	int r = 0;
183
184
	for (unsigned i = 0, l = 0; i < bits; ++i) {
		if (tarval_get_bit(tv, i)) {
185
186
187
188
189
190
191
192
193
194
195
196
197
			R[r] = i - l;
			l = i;
			++r;
		}
	}

	*pr = r;
	return R;
}

/**
 * Calculate the gain when using the generalized complementary technique
 */
198
199
static int calculate_gain(unsigned char *R, int r)
{
Michael Beck's avatar
Michael Beck committed
200
	int max_gain = 0;
Matthias Braun's avatar
Matthias Braun committed
201
202
203
	int gain     = 2 - 3 - R[0];
	int idx      = -1;
	for (int i = 2; i < r; ++i) {
204
205
206
207
208
209
210
211
		/* calculate the gain for r from the gain for r-1 */
		gain += 2 - R[i - 1];

		if (gain > max_gain) {
			max_gain = gain;
			idx = i;
		}
	}
Michael Beck's avatar
Michael Beck committed
212
	return idx;
213
214
215
216
217
}

/**
 * Calculates the condensed complement of a given (R,r) tuple
 */
Matthias Braun's avatar
Matthias Braun committed
218
219
static unsigned char *complement_condensed(mul_env *env, unsigned char *R,
                                           int gain, int *prs)
220
{
Matthias Braun's avatar
Matthias Braun committed
221
	unsigned char *value = OALLOCNZ(&env->obst, unsigned char, env->bits);
222

Matthias Braun's avatar
Matthias Braun committed
223
224
	int j = 0;
	for (int i = 0; i < gain; ++i) {
225
226
227
228
229
		j += R[i];
		value[j] = 1;
	}

	/* negate and propagate 1 */
Matthias Braun's avatar
Matthias Braun committed
230
231
	unsigned char c = 1;
	for (int i = 0; i <= j; ++i) {
232
233
234
235
236
237
238
		unsigned char v = !value[i];

		value[i] = v ^ c;
		c = v & c;
	}

	/* condense it again */
Matthias Braun's avatar
Matthias Braun committed
239
240
241
	int l = 0;
	int r = 0;
	for (int i = 0; i <= j; ++i) {
242
		if (value[i] == 1) {
Matthias Braun's avatar
Matthias Braun committed
243
			value[r] = i - l;
244
245
246
247
248
249
			l = i;
			++r;
		}
	}

	*prs = r;
Matthias Braun's avatar
Matthias Braun committed
250
	return value;
251
252
253
254
255
}

/**
 * creates a tarval from a condensed representation.
 */
Matthias Braun's avatar
Matthias Braun committed
256
static ir_tarval *condensed_to_value(mul_env *env, unsigned char *R, int r)
257
{
258
259
260
261
	ir_tarval *tv  = get_mode_one(env->mode);
	ir_tarval *res = NULL;
	for (int i = 0; i < r; ++i) {
		int j = R[i];
Matthias Braun's avatar
Matthias Braun committed
262
		if (j != 0)
263
			tv = tarval_shl_unsigned(tv, j);
264
265
266
267
268
		res = res ? tarval_add(res, tv) : tv;
	}
	return res;
}

Matthias Braun's avatar
Matthias Braun committed
269
270
static instruction *basic_decompose_mul(mul_env *env, unsigned char *R, int r,
                                        ir_tarval *N);
271
272
273
274

/*
 * handle simple cases with up-to 2 bits set
 */
Matthias Braun's avatar
Matthias Braun committed
275
276
static instruction *decompose_simple_cases(mul_env *env, unsigned char *R,
                                           int r)
277
{
278
279
280
281
	if (r == 1) {
		return emit_SHIFT(env, env->root, R[0]);
	} else {
		assert(r == 2);
Matthias Braun's avatar
Matthias Braun committed
282
		instruction *ins = env->root;
283
284
285
286
287
288
289
		if (R[1] <= env->max_S) {
			ins = emit_LEA(env, ins, ins, R[1]);
			if (R[0] != 0) {
				ins = emit_SHIFT(env, ins, R[0]);
			}
			return ins;
		}
290
291
292
293
		if (R[0] != 0) {
			ins = emit_SHIFT(env, ins, R[0]);
		}

Matthias Braun's avatar
Matthias Braun committed
294
		instruction *ins2 = emit_SHIFT(env, env->root, R[0] + R[1]);
295
296
297
298
299
300
301
		return emit_LEA(env, ins, ins2, 0);
	}
}

/**
 * Main decompose driver.
 */
Matthias Braun's avatar
Matthias Braun committed
302
303
static instruction *decompose_mul(mul_env *env, unsigned char *R, int r,
                                  ir_tarval *N)
304
{
305
	if (r <= 2)
Matthias Braun's avatar
Matthias Braun committed
306
		return decompose_simple_cases(env, R, r);
307

308
	if (env->params->also_use_subs) {
Matthias Braun's avatar
Matthias Braun committed
309
		int gain = calculate_gain(R, r);
310
		if (gain > 0) {
Matthias Braun's avatar
Matthias Braun committed
311
312
313
314
			int            r1;
			unsigned char *R1 = complement_condensed(env, R, gain, &r1);
			int            r2 = r - gain + 1;
			unsigned char *R2 = OALLOCN(&env->obst, unsigned char, r2);
315

Matthias Braun's avatar
Matthias Braun committed
316
317
			int k = 1;
			for (int i = 0; i < gain; ++i) {
318
319
320
321
				k += R[i];
			}
			R2[0] = k;
			R2[1] = R[gain] - 1;
Matthias Braun's avatar
Matthias Braun committed
322
			int j = 2;
Michael Beck's avatar
Michael Beck committed
323
324
325
326
327
328
			if (R2[1] == 0) {
				/* Two identical bits: normalize */
				++R2[0];
				--j;
				--r2;
			}
Matthias Braun's avatar
Matthias Braun committed
329
			for (int i = gain + 1; i < r; ++i) {
Michael Beck's avatar
Michael Beck committed
330
				R2[j++] = R[i];
331
332
			}

Matthias Braun's avatar
Matthias Braun committed
333
334
			instruction *instr1 = decompose_mul(env, R1, r1, NULL);
			instruction *instr2 = decompose_mul(env, R2, r2, NULL);
335
336
337
338
339
340
341
			return emit_SUB(env, instr2, instr1);
		}
	}

	if (N == NULL)
		N = condensed_to_value(env, R, r);

Matthias Braun's avatar
Matthias Braun committed
342
	for (unsigned i = env->max_S; i > 0; --i) {
Matthias Braun's avatar
Matthias Braun committed
343
344
		ir_tarval *div_res, *mod_res;
		ir_tarval *tv = new_tarval_from_long((1 << i) + 1, env->mode);
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363

		div_res = tarval_divmod(N, tv, &mod_res);
		if (mod_res == get_mode_null(env->mode)) {
			unsigned char *Rs;
			int rs;

			Rs = value_to_condensed(env, div_res, &rs);
			if (rs < r) {
				instruction *N1 = decompose_mul(env, Rs, rs, div_res);
				return emit_LEA(env, N1, N1, i);
			}
		}
	}
	return basic_decompose_mul(env, R, r, N);
}

/**
 * basic decomposition routine
 */
Matthias Braun's avatar
Matthias Braun committed
364
365
static instruction *basic_decompose_mul(mul_env *env, unsigned char *R, int r,
                                        ir_tarval *N)
366
{
367
	if (R[0] == 0) {                    /* Case 1 */
Matthias Braun's avatar
Matthias Braun committed
368
		unsigned t = R[1] > MAX(env->max_S, R[1]);
369
		R[1] -= t;
Matthias Braun's avatar
Matthias Braun committed
370
371
		instruction *ns = decompose_mul(env, &R[1], r - 1, N);
		return emit_LEA(env, env->root, ns, t);
372
	} else if (R[0] <= env->max_S) {    /* Case 2 */
Matthias Braun's avatar
Matthias Braun committed
373
		unsigned t = R[0];
374
		R[1] += t;
Matthias Braun's avatar
Matthias Braun committed
375
376
		instruction *ns = decompose_mul(env, &R[1], r - 1, N);
		return emit_LEA(env, ns, env->root, t);
377
	} else {
Matthias Braun's avatar
Matthias Braun committed
378
		unsigned t = R[0];
379
		R[0] = 0;
Matthias Braun's avatar
Matthias Braun committed
380
381
		instruction *ns = decompose_mul(env, R, r, N);
		return emit_SHIFT(env, ns, t);
382
383
384
385
	}
}

/**
386
 * Recursively build the graph for the instructions.
387
388
389
390
 *
 * @param env   the environment
 * @param inst  the instruction
 */
391
392
static ir_node *build_graph(mul_env *env, instruction *inst)
{
Matthias Braun's avatar
Matthias Braun committed
393
	if (inst->irn != NULL)
394
395
		return inst->irn;

Matthias Braun's avatar
Matthias Braun committed
396
	ir_graph *irg = env->irg;
397
	switch (inst->kind) {
Matthias Braun's avatar
Matthias Braun committed
398
399
400
401
	case LEA: {
		ir_node *l = build_graph(env, inst->in[0]);
		ir_node *r = build_graph(env, inst->in[1]);
		ir_node *c = new_r_Const_long(irg, env->shf_mode, inst->shift_count);
402
		ir_node *s = new_rd_Shl(env->dbg, env->blk, r, c);
403
		return inst->irn = new_rd_Add(env->dbg, env->blk, l, s);
Matthias Braun's avatar
Matthias Braun committed
404
405
406
407
	}
	case SHIFT: {
		ir_node *l = build_graph(env, inst->in[0]);
		ir_node *c = new_r_Const_long(irg, env->shf_mode, inst->shift_count);
408
		return inst->irn = new_rd_Shl(env->dbg, env->blk, l, c);
Matthias Braun's avatar
Matthias Braun committed
409
410
411
412
	}
	case SUB: {
		ir_node *l = build_graph(env, inst->in[0]);
		ir_node *r = build_graph(env, inst->in[1]);
413
		return inst->irn = new_rd_Sub(env->dbg, env->blk, l, r, env->mode);
Matthias Braun's avatar
Matthias Braun committed
414
415
416
417
	}
	case ADD: {
		ir_node *l = build_graph(env, inst->in[0]);
		ir_node *r = build_graph(env, inst->in[1]);
418
		return inst->irn = new_rd_Add(env->dbg, env->blk, l, r);
Matthias Braun's avatar
Matthias Braun committed
419
	}
Michael Beck's avatar
Michael Beck committed
420
	case ZERO:
421
		return inst->irn = new_r_Const_null(irg, env->mode);
Matthias Braun's avatar
Matthias Braun committed
422
423
424
	case ROOT:
	case MUL:
		break;
425
	}
426
	panic("unsupported instruction kind");
427
428
429
430
}

/**
 * Calculate the costs for the given instruction sequence.
Matthias Braun's avatar
Matthias Braun committed
431
432
 * Note that additional costs due to higher register pressure are NOT
 * evaluated yet
433
 */
434
435
static int evaluate_insn(mul_env *env, instruction *inst)
{
436
437
438
439
440
441
442
443
	if (inst->costs >= 0) {
		/* was already evaluated */
		return 0;
	}

	switch (inst->kind) {
	case LEA:
	case SUB:
Matthias Braun's avatar
Matthias Braun committed
444
445
446
447
	case ADD: {
		int costs = evaluate_insn(env, inst->in[0])
		          + evaluate_insn(env, inst->in[1])
		          + env->evaluate(inst->kind, env->mode, NULL);
448
449
		inst->costs = costs;
		return costs;
Matthias Braun's avatar
Matthias Braun committed
450
	}
451
	case SHIFT:
452
		if (inst->shift_count > env->params->highest_shift_amount)
Matthias Braun's avatar
Matthias Braun committed
453
			env->fail = true;
454
		if (env->n_shift <= 0)
Matthias Braun's avatar
Matthias Braun committed
455
			env->fail = true;
456
457
		else
			--env->n_shift;
Matthias Braun's avatar
Matthias Braun committed
458
459
		int costs = evaluate_insn(env, inst->in[0])
		          + env->evaluate(inst->kind, env->mode, NULL);
460
461
		inst->costs = costs;
		return costs;
Matthias Braun's avatar
Matthias Braun committed
462
463
464
	case ZERO: {
		int costs = env->evaluate(inst->kind, env->mode, NULL);
		inst->costs = costs;
Michael Beck's avatar
Michael Beck committed
465
		return costs;
Matthias Braun's avatar
Matthias Braun committed
466
	}
Matthias Braun's avatar
Matthias Braun committed
467
468
469
	case MUL:
	case ROOT:
		break;
470
	}
471
	panic("unsupported instruction kind");
472
473
474
475
476
}

/**
 * Evaluate the replacement instructions and build a new graph
 * if faster than the Mul.
Michael Beck's avatar
Michael Beck committed
477
 * Returns the root of the new graph then or irn otherwise.
478
479
480
481
482
483
484
 *
 * @param irn      the Mul operation
 * @param operand  the multiplication operand
 * @param tv       the multiplication constant
 *
 * @return the new graph
 */
Matthias Braun's avatar
Matthias Braun committed
485
static ir_node *do_decomposition(ir_node *irn, ir_node *operand, ir_tarval *tv)
486
{
Matthias Braun's avatar
Matthias Braun committed
487
	mul_env env;
488
	obstack_init(&env.obst);
489
	env.params   = be_get_backend_param()->dep_param;
490
	env.mode     = get_tarval_mode(tv);
Michael Beck's avatar
Michael Beck committed
491
	env.bits     = (unsigned)get_mode_size_bits(env.mode);
492
493
	env.max_S    = 3;
	env.root     = emit_ROOT(&env, operand);
Matthias Braun's avatar
Matthias Braun committed
494
	env.fail     = false;
495
	env.n_shift  = env.params->maximum_shifts;
Matthias Braun's avatar
Matthias Braun committed
496
497
	env.evaluate = env.params->evaluate != NULL ? env.params->evaluate
	                                            : default_evaluate;
498
	env.irg      = get_irn_irg(irn);
499

Matthias Braun's avatar
Matthias Braun committed
500
501
502
	int            r;
	unsigned char *R    = value_to_condensed(&env, tv, &r);
	instruction   *inst = decompose_mul(&env, R, r, tv);
503
504

	/* the paper suggests 70% here */
Matthias Braun's avatar
Matthias Braun committed
505
506
	int      mul_costs = (env.evaluate(MUL, env.mode, tv) * 7 + 5) / 10;
	ir_node *res       = irn;
507
508
509
510
511
512
513
514
515
516
517
518
519
520
	if (evaluate_insn(&env, inst) <= mul_costs && !env.fail) {
		env.op       = operand;
		env.blk      = get_nodes_block(irn);
		env.dbg      = get_irn_dbg_info(irn);
		env.shf_mode = find_unsigned_mode(env.mode);
		if (env.shf_mode == NULL)
			env.shf_mode = mode_Iu;

		res = build_graph(&env, inst);
	}
	obstack_free(&env.obst, NULL);
	return res;
}

521
/* Replace Muls with Shifts and Add/Subs. */
522
523
ir_node *arch_dep_replace_mul_with_shifts(ir_node *irn)
{
524
	const ir_settings_arch_dep_t *params = be_get_backend_param()->dep_param;
525

526
527
528
	/* If the architecture dependent optimizations were not initialized
	   or this optimization was not enabled. */
	if (params == NULL || (opts & arch_dep_mul_to_shift) == 0)
Matthias Braun's avatar
Matthias Braun committed
529
		return irn;
530

Matthias Braun's avatar
Matthias Braun committed
531
	assert(is_Mul(irn));
Matthias Braun's avatar
Matthias Braun committed
532
	ir_mode *mode = get_irn_mode(irn);
Matthias Braun's avatar
Matthias Braun committed
533
	if (!mode_is_int(mode))
Matthias Braun's avatar
Matthias Braun committed
534
		return irn;
535
536
537

	/* we should never do the reverse transformations again
	   (like x+x -> 2*x) */
Matthias Braun's avatar
Matthias Braun committed
538
	ir_graph *irg = get_irn_irg(irn);
539
	add_irg_constraints(irg, IR_GRAPH_CONSTRAINT_ARCH_DEP);
540
541

	/* Look, if one operand is a constant. */
Matthias Braun's avatar
Matthias Braun committed
542
543
544
545
	ir_node   *left    = get_binop_left(irn);
	ir_node   *right   = get_binop_right(irn);
	ir_tarval *tv      = NULL;
	ir_node   *operand = NULL;
546
547
548
549
550
551
552
	if (is_Const(left)) {
		tv = get_Const_tarval(left);
		operand = right;
	} else if (is_Const(right)) {
		tv = get_Const_tarval(right);
		operand = left;
	}
553

Matthias Braun's avatar
Matthias Braun committed
554
555
556
	/* multiplications with 0 are a special case which we leave for
	 * equivalent_node_Mul because the code here can't handle them */
	if (tv == get_mode_null(mode))
Matthias Braun's avatar
Matthias Braun committed
557
		return irn;
Matthias Braun's avatar
Matthias Braun committed
558

Matthias Braun's avatar
Matthias Braun committed
559
	ir_node *res = irn;
560
561
562
563
	if (tv != NULL) {
		res = do_decomposition(irn, operand, tv);
		if (res != irn) {
			exchange(irn, res);
564
565
		}
	}
566

567
	return res;
Michael Beck's avatar
Michael Beck committed
568
}
569

570
571
572
/**
 * calculated the ld2 of a tarval if tarval is 2^n, else returns -1.
 */
Matthias Braun's avatar
Matthias Braun committed
573
static int tv_ld2(ir_tarval *tv, int bits)
574
{
Matthias Braun's avatar
Matthias Braun committed
575
576
577
	int num = 0;
	int k   = 0;
	for (int i = 0; i < bits; ++i) {
578
		unsigned char v = get_tarval_sub_bits(tv, i);
Matthias Braun's avatar
Matthias Braun committed
579
580
581
582
583
584
585
		if (v == 0)
			continue;
		for (int j = 0; j < 8; ++j) {
			if ((1 << j) & v) {
				++num;
				k = 8 * i + j;
			}
586
587
588
589
590
		}
	}
	if (num == 1)
		return k;
	return -1;
591
592
593
594
595
596
597
}


/* for shorter lines */
#define ABS(a)    tarval_abs(a)
#define NEG(a)    tarval_neg(a)
#define NOT(a)    tarval_not(a)
598
599
#define SHL(a, b) tarval_shl_unsigned(a, b)
#define SHR(a, b) tarval_shr_unsigned(a, b)
600
#define ADD(a, b) tarval_add(a, b)
601
#define SUB(a, b) tarval_sub(a, b)
602
603
604
605
606
607
608
#define MUL(a, b) tarval_mul(a, b)
#define DIV(a, b) tarval_div(a, b)
#define MOD(a, b) tarval_mod(a, b)
#define CMP(a, b) tarval_cmp(a, b)
#define CNV(a, m) tarval_convert_to(a, m)
#define ONE(m)    get_mode_one(m)
#define ZERO(m)   get_mode_null(m)
609
#define AND(a, b) tarval_and(a, b)
610

611
/** The result of a the magic() function. */
612
struct ms {
Matthias Braun's avatar
Matthias Braun committed
613
614
615
616
	ir_tarval *M;        /**< magic number */
	int        s;        /**< shift amount */
	bool       need_add; /**< an additional add is needed */
	bool       need_sub; /**< an additional sub is needed */
617
618
619
};

/**
Matthias Braun's avatar
Matthias Braun committed
620
621
 * Signed division by constant d: calculate the Magic multiplier M and the
 * shift amount s
622
 *
Matthias Braun's avatar
Matthias Braun committed
623
624
 * see Hacker's Delight: 10-6 Integer Division by Constants: Incorporation
 * into a Compiler
625
 */
Matthias Braun's avatar
Matthias Braun committed
626
static struct ms magic(ir_tarval *d)
627
{
628
	/* we need overflow mode to work correctly */
Matthias Braun's avatar
Matthias Braun committed
629
	assert(tarval_get_wrap_on_overflow());
630

Matthias Braun's avatar
Matthias Braun committed
631
632
633
	ir_mode *mode   = get_tarval_mode(d);
	ir_mode *u_mode = find_unsigned_mode(mode);
	unsigned bits   = get_mode_size_bits(u_mode);
634

Matthias Braun's avatar
Matthias Braun committed
635
636
637
638
639
640
641
642
643
644
645
646
647
	/* 2^(bits-1) */
	ir_tarval *two_bits_1 = SHL(get_mode_one(u_mode), bits-1);

	ir_tarval *ad  = CNV(ABS(d), u_mode);
	ir_tarval *t   = ADD(two_bits_1, SHR(CNV(d, u_mode), bits-1));
	ir_tarval *anc = SUB(SUB(t, ONE(u_mode)), MOD(t, ad)); /* abs(nc) */
	int        p   = bits - 1;
	ir_tarval *q1  = DIV(two_bits_1, anc);          /* q1 = 2^p/|nc| */
	ir_tarval *r1  = SUB(two_bits_1, MUL(q1, anc)); /* r1 = rem(2^p, |nc|) */
	ir_tarval *q2  = DIV(two_bits_1, ad);           /* q2 = 2^p/|d| */
	ir_tarval *r2  = SUB(two_bits_1, MUL(q2, ad));  /* r2 = rem(2^p, |d|) */

	ir_tarval *delta;
648
649
	do {
		++p;
Matthias Braun's avatar
Matthias Braun committed
650
651
		q1 = ADD(q1, q1);                      /* Update q1 = 2^p/|nc| */
		r1 = ADD(r1, r1);                      /* Update r1 = rem(2^p, |nc|) */
652

653
		if (CMP(r1, anc) & ir_relation_greater_equal) {
654
655
656
			q1 = ADD(q1, ONE(u_mode));
			r1 = SUB(r1, anc);
		}
657

Matthias Braun's avatar
Matthias Braun committed
658
659
		q2 = ADD(q2, q2);                      /* Update q2 = 2^p/|d| */
		r2 = ADD(r2, r2);                      /* Update r2 = rem(2^p, |d|) */
660

661
		if (CMP(r2, ad) & ir_relation_greater_equal) {
662
663
664
			q2 = ADD(q2, ONE(u_mode));
			r2 = SUB(r2, ad);
		}
665

666
		delta = SUB(ad, r2);
Matthias Braun's avatar
Matthias Braun committed
667
668
669
	} while (CMP(q1, delta) & ir_relation_less
	         || (CMP(q1, delta) & ir_relation_equal
	             && CMP(r1, ZERO(u_mode)) & ir_relation_equal));
670

Matthias Braun's avatar
Matthias Braun committed
671
	ir_relation d_cmp = CMP(d, ZERO(mode));
672

Matthias Braun's avatar
Matthias Braun committed
673
	struct ms mag;
674
	if (d_cmp & ir_relation_greater_equal)
675
676
677
		mag.M = ADD(CNV(q2, mode), ONE(mode));
	else
		mag.M = SUB(ZERO(mode), ADD(CNV(q2, mode), ONE(mode)));
678

Matthias Braun's avatar
Matthias Braun committed
679
	ir_relation M_cmp = CMP(mag.M, ZERO(mode));
680

681
	mag.s = p - bits;
682

683
	/* need an add if d > 0 && M < 0 */
684
	mag.need_add = d_cmp & ir_relation_greater && M_cmp & ir_relation_less;
685

686
	/* need a sub if d < 0 && M > 0 */
687
	mag.need_sub = d_cmp & ir_relation_less && M_cmp & ir_relation_greater;
688

689
	return mag;
690
691
692
}

/**
Matthias Braun's avatar
Matthias Braun committed
693
694
 * Unsigned division by constant d: calculate the Magic multiplier M and the
 * shift amount s
695
 *
Matthias Braun's avatar
Matthias Braun committed
696
697
 * see Faster Unsigned Division by Constants
 *  (http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html)
698
 */
699
struct magicu_info
700
{
701
702
703
	ir_tarval *multiplier; /* the "magic number" multiplier */
	unsigned   pre_shift;  /* shift for the dividend before multiplying */
	unsigned   post_shift; /* shift for the dividend after multiplying */
Matthias Braun's avatar
Matthias Braun committed
704
705
	unsigned   increment;  /* 0 or 1; if set then increment the numerator,
	                          using one of the two strategies */
706
};
707

Matthias Braun's avatar
Matthias Braun committed
708
709
static struct magicu_info compute_unsigned_magic_info(ir_tarval *divisor,
                                                      unsigned num_bits)
710
711
712
713
714
715
716
717
718
719
720
721
722
723
{
	ir_mode *mode = get_tarval_mode(divisor);

	/* divisor must be larger than zero and not a power of 2
	 * D & (D-1) > 0 */
	assert(get_tarval_long(AND(divisor, SUB(divisor, ONE(mode)))));

	/* Bits in ir_tarval */
	const unsigned UINT_BITS = get_mode_size_bits(mode);

	/* The extra shift implicit in the difference between UINT_BITS and num_bits */
	const unsigned extra_shift = UINT_BITS - num_bits;

	/* The initial power of 2 is one less than the first one that can possibly work */
724
	ir_tarval *initial_power_of_2 = SHL(ONE(mode), UINT_BITS - 1);
725
726
727
728
729
730
731
732
733
734
735

	/* The remainder and quotient of our power of 2 divided by divisor */
	ir_tarval *quotient  = DIV(initial_power_of_2, divisor);
	ir_tarval *remainder = MOD(initial_power_of_2, divisor);

	/* The magic info for the variant "round down" algorithm */
	ir_tarval *down_multiplier = ZERO(mode);
	unsigned   down_exponent   = 0;
	int        has_magic_down  = 0;

	/* Compute ceil(log_2 D) */
Matthias Braun's avatar
Matthias Braun committed
736
	unsigned ceil_log_2_D = 0;
737
	for (ir_tarval *tmp = divisor; CMP(tmp, ZERO(mode)) & ir_relation_greater; tmp = SHR(tmp, 1))
738
739
		ceil_log_2_D++;

Matthias Braun's avatar
Matthias Braun committed
740
741
742
743
744
745
	/* Begin a loop that increments the exponent, until we find a power of 2
	 * that works. */
	unsigned exponent = 0;
	for (;; ++exponent) {
		/* Quotient and remainder is from previous exponent; compute it for
		 * this exponent. */
746
747
748
749
750
751
752
753
754
		ir_tarval *two = new_tarval_from_long(2, mode);
		if (CMP(remainder, SUB(divisor, remainder)) & ir_relation_greater_equal) {
			/* Doubling remainder will wrap around divisor */
			quotient  = ADD(MUL(quotient, two), ONE(mode));
			remainder = SUB(MUL(remainder, two), divisor);
		} else {
			/* Remainder will not wrap */
			quotient  = MUL(quotient, two);
			remainder = MUL(remainder, two);
755
756
		}

757
758
759
760
		/* We are done if this exponent works for the round_up algorithm.
		 * Note that exponent may be larger than the maximum shift supported,
		 * so the check for >= ceil_log_2_D is critical. */
		if ((exponent + extra_shift >= ceil_log_2_D) ||
sebastian.buchwald1's avatar
sebastian.buchwald1 committed
761
762
		    /* (divisor - remainder) <= (1 << exponent + extra_shift) */
		    (CMP(SUB(divisor, remainder), SHL(ONE(mode), exponent + extra_shift)) & ir_relation_less_equal))
763
764
			break;

Matthias Braun's avatar
Matthias Braun committed
765
766
767
		/* Set magic_down if we have not set it yet and this exponent works for
		 * the round_down algorithm */
		if (!has_magic_down &&
sebastian.buchwald1's avatar
sebastian.buchwald1 committed
768
769
		    (CMP(remainder, SHL(ONE(mode), exponent + extra_shift)) &
		    ir_relation_less_equal)) {
770
771
772
			has_magic_down  = 1;
			down_multiplier = quotient;
			down_exponent   = exponent;
773
		}
774
	}
775

Matthias Braun's avatar
Matthias Braun committed
776
	struct magicu_info result;
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
	if (exponent < ceil_log_2_D) {
		/* magic_up is efficient */
		result.multiplier = ADD(quotient, ONE(mode));
		result.pre_shift  = 0;
		result.post_shift = exponent;
		result.increment  = 0;
	} else if (CMP(AND(divisor, ONE(mode)), ZERO(mode)) & ir_relation_greater) {
		/* Odd divisor, so use magic_down, which must have been set */
		assert(has_magic_down);
		result.multiplier = down_multiplier;
		result.pre_shift  = 0;
		result.post_shift = down_exponent;
		result.increment  = 1;
	} else {
		/* Even divisor, so use a prefix-shifted dividend */
		unsigned pre_shift   = 0;
		ir_tarval *shifted_D = divisor;
		while (CMP(AND(shifted_D, ONE(mode)), ZERO(mode)) & ir_relation_equal) {
795
			shifted_D  = SHR(shifted_D, 1);
796
			pre_shift += 1;
797
		}
798
799
800
801
802
803
804
		result = compute_unsigned_magic_info(shifted_D, num_bits - pre_shift);
		/* expect no increment or pre_shift in this path */
		assert(result.increment == 0 && result.pre_shift == 0);
		result.pre_shift = pre_shift;
	}
	return result;
}
805

806
807
808
809
static struct magicu_info get_magic_info(ir_tarval *d)
{
	unsigned num_bits = get_mode_size_bits(get_tarval_mode(d));
	return compute_unsigned_magic_info(d, num_bits);
810
811
812
}

/**
813
 * Build the Mulh replacement code for n / tv.
814
 *
Matthias Braun's avatar
Matthias Braun committed
815
 * Note that 'div' might be a Mod operation as well
816
 */
Matthias Braun's avatar
Matthias Braun committed
817
static ir_node *replace_div_by_mulh(ir_node *div, ir_tarval *tv)
818
{
Matthias Braun's avatar
Matthias Braun committed
819
	/* Beware: do not transform bad code */
820
	ir_node *n     = get_binop_left(div);
821
	ir_node *block = get_nodes_block(div);
822
823
824
	if (is_Bad(n) || is_Bad(block))
		return div;

Matthias Braun's avatar
Matthias Braun committed
825
826
827
828
	dbg_info *dbg   = get_irn_dbg_info(div);
	ir_mode  *mode  = get_irn_mode(n);
	int       bits  = get_mode_size_bits(mode);
	ir_node  *res;
829
	if (mode_is_signed(mode)) {
830
		ir_graph *irg = get_irn_irg(div);
831
832
833
		struct ms mag = magic(tv);

		/* generate the Mulh instruction */
834
		ir_node *c = new_r_Const(irg, mag.M);
835
		ir_node *q = new_rd_Mulh(dbg, block, n, c);
836
837
838

		/* do we need an Add or Sub */
		if (mag.need_add)
839
			q = new_rd_Add(dbg, block, q, n);
840
		else if (mag.need_sub)
841
			q = new_rd_Sub(dbg, block, q, n, mode);
842
843
844

		/* Do we need the shift */
		if (mag.s > 0) {
845
			c = new_r_Const_long(irg, mode_Iu, mag.s);
846
			q = new_rd_Shrs(dbg, block, q, c);
847
848
849
		}

		/* final */
Matthias Braun's avatar
Matthias Braun committed
850
		ir_node *c2 = new_r_Const_long(irg, mode_Iu, bits - 1);
851
		ir_node *t  = new_rd_Shr(dbg, block, q, c2);
852
		res = new_rd_Add(dbg, block, q, t);
853
	} else {
854
		struct magicu_info mafo = get_magic_info(tv);
Matthias Braun's avatar
Matthias Braun committed
855
		ir_graph          *irg  = get_irn_irg(div);
856

857
		if (mafo.pre_shift > 0) {
Matthias Braun's avatar
Matthias Braun committed
858
			ir_node *c = new_r_Const_long(irg, mode_Iu, mafo.pre_shift);
859
			n = new_rd_Shr(dbg, block, n, c);
860
		}
861

862
863
864
865
		if (mafo.increment) {
			ir_node *no_mem    = new_rd_NoMem(dbg, irg);
			ir_node *in[1]     = {n};
			ir_type *utype     = get_unknown_type();
Matthias Braun's avatar
Matthias Braun committed
866
867
			ir_node *increment = new_rd_Builtin(dbg, block, no_mem, 1, in,
			                                    ir_bk_saturating_increment, utype);
868

869
870
			n = new_r_Proj(increment, mode_Iu, pn_Builtin_max + 1);
		}
871

872
		/* generate the Mulh instruction */
Matthias Braun's avatar
Matthias Braun committed
873
		ir_node *c = new_r_Const(irg, mafo.multiplier);
874
		ir_node *q = new_rd_Mulh(dbg, block, n, c);
875
		c = new_r_Const_long(irg, mode_Iu, get_mode_size_bits(get_tarval_mode(tv)));
876
		res = new_rd_Shr(dbg, block, q, c);
877
878
		if (mafo.post_shift > 0) {
			c = new_r_Const_long(irg, mode_Iu, mafo.post_shift);
879
			res = new_rd_Shr(dbg, block, res, c);
880
881
		}
	}
Matthias Braun's avatar
Matthias Braun committed
882
	return res;
883
884
}

885
/* Replace Divs with Shifts and Add/Subs and Mulh. */
886
887
ir_node *arch_dep_replace_div_by_const(ir_node *irn)
{
888
	/* If the architecture dependent optimizations were not initialized
889
		or this optimization was not enabled. */
Matthias Braun's avatar
Matthias Braun committed
890
	const ir_settings_arch_dep_t *params = be_get_backend_param()->dep_param;
891
892
	if (params == NULL || (opts & arch_dep_div_by_const) == 0)
		return irn;
893
894
	if (!is_Div(irn))
		return irn;
895

896
	ir_node *c = get_Div_right(irn);
Matthias Braun's avatar
Matthias Braun committed
897
	if (!is_Const(c))
898
		return irn;
899

900
	/* check for division by zero */
Matthias Braun's avatar
Matthias Braun committed
901
	ir_tarval *tv = get_Const_tarval(c);
902
903
	if (tarval_is_null(tv))
		return irn;
Michael Beck's avatar
Michael Beck committed
904

905
	/* can only handle integer Div's */
Matthias Braun's avatar
Matthias Braun committed
906
907
	ir_node *left = get_Div_left(irn);
	ir_mode *mode = get_irn_mode(left);
908
909
	if (!mode_is_int(mode))
		return irn;
910

Matthias Braun's avatar
Matthias Braun committed
911
912
913
914
915
916
	ir_node  *block  = get_nodes_block(irn);
	dbg_info *dbg    = get_irn_dbg_info(irn);
	int       bits   = get_mode_size_bits(mode);
	int       n      = (bits + 7) / 8;
	int       k      = -1;
	bool      n_flag = false;
917
	if (mode_is_signed(mode)) {
Matthias Braun's avatar
Matthias Braun committed
918
919
920
921
		/* for signed divisions, the algorithm works for a / -2^k by negating
		 * the result */
		ir_tarval *ntv = tarval_neg(tv);
		n_flag = true;
922
923
924
		k = tv_ld2(ntv, n);
	}
	if (k < 0) {
Matthias Braun's avatar
Matthias Braun committed
925
		n_flag = false;
926
927
		k = tv_ld2(tv, n);
	}
928

Matthias Braun's avatar
Matthias Braun committed
929
	ir_node *res = irn;
Andreas Zwinkau's avatar
Andreas Zwinkau committed
930
	if (k > 0) { /* division by 2^k or -2^k */
931
932
933
934
935
		ir_graph *irg = get_irn_irg(irn);
		if (mode_is_signed(mode)) {
			ir_node *curr = left;

			/* create the correction code for signed values only if there might be a remainder */
Matthias Braun's avatar
Matthias Braun committed
936
			if (!get_Div_no_remainder(irn)) {
937
				if (k != 1) {
Matthias Braun's avatar
Matthias Braun committed
938
					ir_node *k_node = new_r_Const_long(irg, mode_Iu, k - 1);
939
					curr   = new_rd_Shrs(dbg, block, left, k_node);
940
941
				}

Matthias Braun's avatar
Matthias Braun committed
942
				ir_node *k_node = new_r_Const_long(irg, mode_Iu, bits - k);
943
				curr = new_rd_Shr(dbg, block, curr, k_node);
944
945
946
				/* curr is now 2^(k-1) in case left <  0
				 *          or       0 in case left >= 0
				 *
Matthias Braun's avatar
Matthias Braun committed
947
				 * For an example, where this fixup is necessary consider -3/2,
948
949
950
951
				 * which should compute to -1,
				 * but simply shifting right by one computes -2.
				 */

952
				curr = new_rd_Add(dbg, block, left, curr);
953
			}
954

Matthias Braun's avatar
Matthias Braun committed
955
			ir_node *k_node = new_r_Const_long(irg, mode_Iu, k);
956
			res = new_rd_Shrs(dbg, block, curr, k_node);
957

958
			if (n_flag) { /* negate the result */
Matthias Braun's avatar
Matthias Braun committed
959
				ir_node *k_node = new_r_Const_null(irg, mode);
960
				res = new_rd_Sub(dbg, block, k_node, res, mode);
961
			}
962
		} else {      /* unsigned case */
Matthias Braun's avatar
Matthias Braun committed
963
			ir_node *k_node = new_r_Const_long(irg, mode_Iu, k);
964
			res    = new_rd_Shr(dbg, block, left, k_node);
965
		}
Andreas Zwinkau's avatar
Andreas Zwinkau committed
966
	} else if (k != 0) {
967
968
969
		/* other constant */
		if (allow_Mulh(params, mode))
			res = replace_div_by_mulh(irn, tv);
Andreas Zwinkau's avatar
Andreas Zwinkau committed
970
971
	} else { /* k == 0  i.e. division by 1 */
		res = left;
972
973
974
	}

	return res;
975
976
}

977
/* Replace Mods with Shifts and Add/Subs and Mulh. */
Christoph Mallon's avatar