Commit 50fcbd2c authored by Matthias Braun's avatar Matthias Braun
Browse files

ia32: fix and optimize bswap sequence on i386

parent 5f5fba36
......@@ -108,7 +108,7 @@ enum cpu_arch_features {
* CPU's.
*/
typedef enum cpu_support {
cpu_generic = arch_generic32,
cpu_generic = arch_generic32,
/* intel CPUs */
cpu_i386 = arch_i386,
......@@ -916,7 +916,7 @@ void ia32_setup_cg_config(void)
c->use_sse_prefetch = FLAGS(arch, (arch_feature_3DNowE | arch_feature_sse1));
c->use_3dnow_prefetch = FLAGS(arch, arch_feature_3DNow);
c->use_popcnt = FLAGS(arch, arch_feature_popcnt);
c->use_i486 = (arch & arch_mask) >= arch_i486;
c->use_bswap = (arch & arch_mask) >= arch_i486;
c->optimize_cc = opt_cc;
c->use_unsafe_floatconv = opt_unsafe_floatconv;
c->emit_machcode = emit_machcode;
......
......@@ -80,7 +80,7 @@ typedef struct {
/** use SSE4.2 or SSE4a popcnt instruction */
unsigned use_popcnt:1;
/** use i486 instructions */
unsigned use_i486:1;
unsigned use_bswap:1;
/** optimize calling convention where possible */
unsigned optimize_cc:1;
/**
......
......@@ -746,7 +746,7 @@ Rol => {
out => [ "in_r1 !in_r2", "flags" ] },
ins => [ "val", "count" ],
outs => [ "res", "flags" ],
emit => '. rol%M %SB1, %S0',
emit => '. rol%M %SB1, %DS0',
units => [ "GP" ],
latency => 1,
mode => $mode_gp,
......
......@@ -5283,27 +5283,23 @@ static ir_node *gen_bswap(ir_node *node)
ir_node *new_block = be_transform_node(block);
ir_mode *mode = get_irn_mode(param);
unsigned size = get_mode_size_bits(mode);
ir_node *m1, *m2, *m3, *m4, *s1, *s2, *s3, *s4;
switch (size) {
case 32:
if (ia32_cg_config.use_i486) {
if (ia32_cg_config.use_bswap) {
/* swap available */
return new_bd_ia32_Bswap(dbgi, new_block, param);
} else {
ir_node *i8 = ia32_create_Immediate(NULL, 0, 8);
ir_node *rol1 = new_bd_ia32_Rol(dbgi, new_block, param, i8);
ir_node *i16 = ia32_create_Immediate(NULL, 0, 16);
ir_node *rol2 = new_bd_ia32_Rol(dbgi, new_block, rol1, i16);
ir_node *rol3 = new_bd_ia32_Rol(dbgi, new_block, rol2, i8);
set_ia32_ls_mode(rol1, mode_Hu);
set_ia32_ls_mode(rol2, mode_Iu);
set_ia32_ls_mode(rol3, mode_Hu);
return rol3;
}
s1 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
s2 = new_bd_ia32_Shl(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
m1 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s2, ia32_create_Immediate(NULL, 0, 0xFF00));
m2 = new_bd_ia32_Lea(dbgi, new_block, s1, m1);
s3 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 8));
m3 = new_bd_ia32_And(dbgi, new_block, noreg_GP, noreg_GP, nomem, s3, ia32_create_Immediate(NULL, 0, 0xFF0000));
m4 = new_bd_ia32_Lea(dbgi, new_block, m2, m3);
s4 = new_bd_ia32_Shr(dbgi, new_block, param, ia32_create_Immediate(NULL, 0, 24));
return new_bd_ia32_Lea(dbgi, new_block, m4, s4);
case 16:
/* swap16 always available */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment