Commit 081ef4f7 authored by Michael Beck's avatar Michael Beck
Browse files

- add optimisation for size

[r19340]
parent 12130465
......@@ -113,6 +113,7 @@ enum cpu_support {
cpu_c3_2 = arch_ppro | arch_feature_sse1, /* really no 3DNow! */
};
static int opt_size = 0;
static cpu_support arch = cpu_generic;
static cpu_support opt_arch = cpu_core2;
static int use_sse2 = 0;
......@@ -191,6 +192,7 @@ static lc_opt_enum_int_var_t fp_unit_var = {
};
static const lc_opt_table_entry_t ia32_architecture_options[] = {
LC_OPT_ENT_BOOL("size", "optimize for size", &opt_size),
LC_OPT_ENT_ENUM_INT("arch", "select the instruction architecture",
&arch_var),
LC_OPT_ENT_ENUM_INT("opt", "optimize for instruction architecture",
......@@ -215,6 +217,18 @@ typedef struct insn_const {
unsigned label_alignment_max_skip; /**< maximum skip for alignment of loops labels */
} insn_const;
/* costs for optimizing for size */
static const insn_const size_cost = {
2, /* cost of an add instruction */
3, /* cost of a lea instruction */
3, /* cost of a constant shift instruction */
3, /* starting cost of a multiply instruction */
0, /* cost of multiply for every set bit */
0, /* logarithm for alignment of function labels */
0, /* logarithm for alignment of loops labels */
0, /* maximum skip for alignment of loops labels */
};
/* costs for the i386 */
static const insn_const i386_cost = {
1, /* cost of an add instruction */
......@@ -375,6 +389,10 @@ static const insn_const *arch_costs = &generic32_cost;
static void set_arch_costs(void)
{
if (opt_size) {
arch_costs = &size_cost;
return;
}
switch (opt_arch & arch_mask) {
case arch_i386:
arch_costs = &i386_cost;
......@@ -459,12 +477,13 @@ void ia32_setup_cg_config(void)
set_arch_costs();
ia32_cg_config.optimize_size = opt_size != 0;
/* on newer intel cpus mov, pop is often faster then leave although it has a
* longer opcode */
ia32_cg_config.use_leave = FLAGS(opt_arch, arch_i386 | arch_all_amd | arch_core2);
/* P4s don't like inc/decs because they only partially write the flags
register which produces false dependencies */
ia32_cg_config.use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_geode);
ia32_cg_config.use_incdec = !FLAGS(opt_arch, arch_netburst | arch_nocona | arch_geode) || opt_size;
ia32_cg_config.use_sse2 = use_sse2;
ia32_cg_config.use_ffreep = FLAGS(opt_arch, arch_athlon_plus);
ia32_cg_config.use_ftst = !FLAGS(arch, arch_feature_p6_insn);
......@@ -475,17 +494,21 @@ void ia32_setup_cg_config(void)
ia32_cg_config.use_modeD_moves = FLAGS(opt_arch, arch_athlon_plus | arch_geode | arch_ppro |
arch_netburst | arch_nocona | arch_core2 | arch_generic32);
ia32_cg_config.use_add_esp_4 = FLAGS(opt_arch, arch_geode | arch_athlon_plus |
arch_netburst | arch_nocona | arch_core2 | arch_generic32);
arch_netburst | arch_nocona | arch_core2 | arch_generic32) &&
!opt_size;
ia32_cg_config.use_add_esp_8 = FLAGS(opt_arch, arch_geode | arch_athlon_plus |
arch_i386 | arch_i486 | arch_ppro | arch_netburst |
arch_nocona | arch_core2 | arch_generic32);
arch_nocona | arch_core2 | arch_generic32) &&
!opt_size;
ia32_cg_config.use_sub_esp_4 = FLAGS(opt_arch, arch_athlon_plus | arch_ppro |
arch_netburst | arch_nocona | arch_core2 | arch_generic32);
arch_netburst | arch_nocona | arch_core2 | arch_generic32) &&
!opt_size;
ia32_cg_config.use_sub_esp_8 = FLAGS(opt_arch, arch_athlon_plus | arch_i386 | arch_i486 |
arch_ppro | arch_netburst | arch_nocona | arch_core2 | arch_generic32);
ia32_cg_config.use_imul_mem_imm32 = !FLAGS(opt_arch, arch_k8 | arch_k10);
ia32_cg_config.use_mov_0 = FLAGS(opt_arch, arch_k6);
ia32_cg_config.use_pad_return = FLAGS(opt_arch, arch_athlon_plus | cpu_core2 | arch_generic32);
arch_ppro | arch_netburst | arch_nocona | arch_core2 | arch_generic32) &&
!opt_size;
ia32_cg_config.use_imul_mem_imm32 = !FLAGS(opt_arch, arch_k8 | arch_k10) || opt_size;
ia32_cg_config.use_mov_0 = FLAGS(opt_arch, arch_k6) && !opt_size;
ia32_cg_config.use_pad_return = FLAGS(opt_arch, arch_athlon_plus | cpu_core2 | arch_generic32) && !opt_size;
ia32_cg_config.optimize_cc = opt_cc;
ia32_cg_config.use_unsafe_floatconv = opt_unsafe_floatconv;
......
......@@ -27,6 +27,8 @@
#define FIRM_BE_IA32_ARCHITECTURE_H
typedef struct {
/** optimize for size */
unsigned optimize_size:1;
/** use leave in function epilogue */
unsigned use_leave:1;
/** use inc, dec instead of add ,1 and add, -1 */
......
......@@ -2025,31 +2025,33 @@ static void ia32_emit_block_header(ir_node *block, ir_node *prev_block)
}
}
/* align the current block if:
* a) if should be aligned due to its execution frequency
* b) there is no fall-through here
*/
if (should_align_block(block, prev_block)) {
ia32_emit_align_label();
} else {
/* if the predecessor block has no fall-through,
we can always align the label. */
int i;
ir_node *check_node = NULL;
if (ia32_cg_config.label_alignment > 0) {
/* align the current block if:
* a) if should be aligned due to its execution frequency
* b) there is no fall-through here
*/
if (should_align_block(block, prev_block)) {
ia32_emit_align_label();
} else {
/* if the predecessor block has no fall-through,
we can always align the label. */
int i;
ir_node *check_node = NULL;
for (i = n_cfgpreds - 1; i >= 0; --i) {
ir_node *cfg_pred = get_Block_cfgpred(block, i);
for (i = n_cfgpreds - 1; i >= 0; --i) {
ir_node *cfg_pred = get_Block_cfgpred(block, i);
if (get_nodes_block(skip_Proj(cfg_pred)) == prev_block) {
check_node = cfg_pred;
break;
if (get_nodes_block(skip_Proj(cfg_pred)) == prev_block) {
check_node = cfg_pred;
break;
}
}
if (check_node == NULL || !is_fallthrough(check_node))
ia32_emit_align_label();
}
if (check_node == NULL || !is_fallthrough(check_node))
ia32_emit_align_label();
}
if(need_label) {
if (need_label) {
ia32_emit_block_name(block);
be_emit_char(':');
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment