Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Zwinkau
libfirm
Commits
c1106f5b
Commit
c1106f5b
authored
Apr 18, 2013
by
Matthias Braun
Browse files
ia32: use builtin lowerer for popcount instead of handcrafted transformer
parent
173b3a62
Changes
2
Hide whitespace changes
Inline
Side-by-side
ir/be/ia32/bearch_ia32.c
View file @
c1106f5b
...
...
@@ -33,6 +33,7 @@
#include "lower_calls.h"
#include "lower_mode_b.h"
#include "lower_softfloat.h"
#include "lower_builtins.h"
#include "firmstat_t.h"
#include "beabi.h"
...
...
@@ -1779,7 +1780,7 @@ static int ia32_is_valid_clobber(const char *clobber)
static
void
ia32_lower_for_target
(
void
)
{
ir_mode
*
mode_gp
=
ia32_reg_classes
[
CLASS_ia32_gp
].
mode
;
size_t
i
,
n_irgs
=
get_irp_n_irgs
();
size_t
n_irgs
=
get_irp_n_irgs
();
/* perform doubleword lowering */
lwrdw_param_t
lower_dw_params
=
{
...
...
@@ -1803,7 +1804,28 @@ static void ia32_lower_for_target(void)
lower_floating_point
();
}
for
(
i
=
0
;
i
<
n_irgs
;
++
i
)
{
ir_builtin_kind
supported
[
32
];
size_t
s
=
0
;
supported
[
s
++
]
=
ir_bk_trap
;
supported
[
s
++
]
=
ir_bk_debugbreak
;
supported
[
s
++
]
=
ir_bk_return_address
;
supported
[
s
++
]
=
ir_bk_frame_address
;
supported
[
s
++
]
=
ir_bk_prefetch
;
supported
[
s
++
]
=
ir_bk_ffs
;
supported
[
s
++
]
=
ir_bk_clz
;
supported
[
s
++
]
=
ir_bk_ctz
;
supported
[
s
++
]
=
ir_bk_parity
;
supported
[
s
++
]
=
ir_bk_bswap
;
supported
[
s
++
]
=
ir_bk_outport
;
supported
[
s
++
]
=
ir_bk_inport
;
supported
[
s
++
]
=
ir_bk_inner_trampoline
;
supported
[
s
++
]
=
ir_bk_saturating_increment
;
if
(
ia32_cg_config
.
use_popcnt
)
supported
[
s
++
]
=
ir_bk_popcount
;
assert
(
s
<
ARRAY_SIZE
(
supported
));
lower_builtins
(
s
,
supported
);
for
(
size_t
i
=
0
;
i
<
n_irgs
;
++
i
)
{
ir_graph
*
irg
=
get_irp_irg
(
i
);
/* break up switches with wide ranges */
lower_switch
(
irg
,
4
,
256
,
mode_gp
);
...
...
@@ -1812,13 +1834,13 @@ static void ia32_lower_for_target(void)
ir_prepare_dw_lowering
(
&
lower_dw_params
);
ir_lower_dw_ops
();
for
(
i
=
0
;
i
<
n_irgs
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
n_irgs
;
++
i
)
{
ir_graph
*
irg
=
get_irp_irg
(
i
);
/* lower for mode_b stuff */
ir_lower_mode_b
(
irg
,
mode_Iu
);
}
for
(
i
=
0
;
i
<
n_irgs
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
n_irgs
;
++
i
)
{
ir_graph
*
irg
=
get_irp_irg
(
i
);
/* Turn all small CopyBs into loads/stores, keep medium-sized CopyBs,
* so we can generate rep movs later, and turn all big CopyBs into
...
...
ir/be/ia32/ia32_transform.c
View file @
c1106f5b
...
...
@@ -4695,6 +4695,10 @@ static ir_node *gen_be_Call(ir_node *node)
am
.
new_op2
,
sp
,
fpcw
,
eax
,
ecx
,
edx
,
pop
,
call_tp
);
ir_set_throws_exception
(
call
,
throws_exception
);
set_am_attributes
(
call
,
&
am
);
arch_set_irn_register_out
(
call
,
pn_ia32_Call_stack
,
&
ia32_registers
[
REG_ESP
]);
arch_set_irn_register_out
(
call
,
pn_ia32_Call_fpcw
,
&
ia32_registers
[
REG_FPCW
]);
call
=
fix_mem_proj
(
call
,
&
am
);
if
(
get_irn_pinned
(
node
)
==
op_pin_state_pinned
)
...
...
@@ -5035,109 +5039,26 @@ static ir_node *gen_parity(ir_node *node)
*/
static
ir_node
*
gen_popcount
(
ir_node
*
node
)
{
ir_node
*
param
=
get_Builtin_param
(
node
,
0
);
dbg_info
*
dbgi
=
get_irn_dbg_info
(
node
);
ir_node
*
block
=
get_nodes_block
(
node
);
ir_node
*
new_block
=
be_transform_node
(
block
);
ir_node
*
new_param
;
ir_node
*
imm
,
*
simm
,
*
m1
,
*
s1
,
*
s2
,
*
s3
,
*
s4
,
*
s5
,
*
m2
,
*
m3
,
*
m4
,
*
m5
,
*
m6
,
*
m7
,
*
m8
,
*
m9
,
*
m10
,
*
m11
,
*
m12
,
*
m13
;
/* check for SSE4.2 or SSE4a and use the popcnt instruction */
if
(
ia32_cg_config
.
use_popcnt
)
{
ia32_address_mode_t
am
;
ia32_address_t
*
addr
=
&
am
.
addr
;
ir_node
*
cnt
;
match_arguments
(
&
am
,
block
,
NULL
,
param
,
NULL
,
match_am
|
match_16bit_am
|
match_upconv
);
cnt
=
new_bd_ia32_Popcnt
(
dbgi
,
new_block
,
addr
->
base
,
addr
->
index
,
addr
->
mem
,
am
.
new_op2
);
set_am_attributes
(
cnt
,
&
am
);
set_ia32_ls_mode
(
cnt
,
get_irn_mode
(
param
));
SET_IA32_ORIG_NODE
(
cnt
,
node
);
return
fix_mem_proj
(
cnt
,
&
am
);
}
new_param
=
be_transform_node
(
param
);
/* do the standard popcount algo */
/* TODO: This is stupid, we should transform this before the backend,
* to get CSE, localopts, etc. for the operations
* TODO: This is also not the optimal algorithm (it is just the starting
* example in hackers delight, they optimize it more on the following page)
* But I'm too lazy to fix this now, as the code should get lowered before
* the backend anyway.
*/
ir_graph
*
const
irg
=
get_Block_irg
(
new_block
);
/* m1 = x & 0x55555555 */
imm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
0x55555555
);
m1
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
new_param
,
imm
);
/* s1 = x >> 1 */
simm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
1
);
s1
=
new_bd_ia32_Shr
(
dbgi
,
new_block
,
new_param
,
simm
);
/* m2 = s1 & 0x55555555 */
m2
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
s1
,
imm
);
/* m3 = m1 + m2 */
m3
=
new_bd_ia32_Lea
(
dbgi
,
new_block
,
m2
,
m1
);
/* m4 = m3 & 0x33333333 */
imm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
0x33333333
);
m4
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
m3
,
imm
);
/* s2 = m3 >> 2 */
simm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
2
);
s2
=
new_bd_ia32_Shr
(
dbgi
,
new_block
,
m3
,
simm
);
/* m5 = s2 & 0x33333333 */
m5
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
s2
,
imm
);
/* builtin lowerer should have replaced the popcount if !use_popcount */
assert
(
ia32_cg_config
.
use_popcnt
);
/* m6 = m4 + m5 */
m6
=
new_bd_ia32_Lea
(
dbgi
,
new_block
,
m4
,
m5
);
ir_node
*
param
=
get_Builtin_param
(
node
,
0
);
ir_node
*
block
=
get_nodes_block
(
node
);
ia32_address_mode_t
am
;
match_arguments
(
&
am
,
block
,
NULL
,
param
,
NULL
,
match_am
|
match_16bit_am
|
match_upconv
);
/* m7 = m6 & 0x0F0F0F0F */
imm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
0x0F0F0F0F
);
m7
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
m6
,
imm
);
/* s3 = m6 >> 4 */
simm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
4
);
s3
=
new_bd_ia32_Shr
(
dbgi
,
new_block
,
m6
,
simm
);
/* m8 = s3 & 0x0F0F0F0F */
m8
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
s3
,
imm
);
/* m9 = m7 + m8 */
m9
=
new_bd_ia32_Lea
(
dbgi
,
new_block
,
m7
,
m8
);
/* m10 = m9 & 0x00FF00FF */
imm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
0x00FF00FF
);
m10
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
m9
,
imm
);
/* s4 = m9 >> 8 */
simm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
8
);
s4
=
new_bd_ia32_Shr
(
dbgi
,
new_block
,
m9
,
simm
);
/* m11 = s4 & 0x00FF00FF */
m11
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
s4
,
imm
);
/* m12 = m10 + m11 */
m12
=
new_bd_ia32_Lea
(
dbgi
,
new_block
,
m10
,
m11
);
/* m13 = m12 & 0x0000FFFF */
imm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
0x0000FFFF
);
m13
=
new_bd_ia32_And
(
dbgi
,
new_block
,
noreg_GP
,
noreg_GP
,
nomem
,
m12
,
imm
);
/* s5 = m12 >> 16 */
simm
=
ia32_create_Immediate
(
irg
,
NULL
,
0
,
16
);
s5
=
new_bd_ia32_Shr
(
dbgi
,
new_block
,
m12
,
simm
);
ia32_address_t
*
addr
=
&
am
.
addr
;
dbg_info
*
dbgi
=
get_irn_dbg_info
(
node
);
ir_node
*
new_block
=
be_transform_node
(
block
);
ir_node
*
cnt
=
new_bd_ia32_Popcnt
(
dbgi
,
new_block
,
addr
->
base
,
addr
->
index
,
addr
->
mem
,
am
.
new_op2
);
set_am_attributes
(
cnt
,
&
am
);
set_ia32_ls_mode
(
cnt
,
get_irn_mode
(
param
));
/* res = m13 + s5 */
return
new_bd_ia32_Lea
(
dbgi
,
new_block
,
m13
,
s5
);
SET_IA32_ORIG_NODE
(
cnt
,
node
);
return
fix_mem_proj
(
cnt
,
&
am
);
}
/**
...
...
@@ -5466,18 +5387,6 @@ found:;
}
res
=
new_rd_Proj
(
dbgi
,
new_call
,
mode
,
proj
);
/* TODO arch_set_irn_register() only operates on Projs, need variant with index */
switch
(
proj
)
{
case
pn_ia32_Call_stack
:
arch_set_irn_register
(
res
,
&
ia32_registers
[
REG_ESP
]);
break
;
case
pn_ia32_Call_fpcw
:
arch_set_irn_register
(
res
,
&
ia32_registers
[
REG_FPCW
]);
break
;
}
return
res
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment