Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Zwinkau
libfirm
Commits
8234c50c
Commit
8234c50c
authored
Nov 25, 2010
by
Andreas Zwinkau
Browse files
Add -march=native parameter
[r28154]
parent
b6497b15
Changes
1
Hide whitespace changes
Inline
Side-by-side
ir/be/ia32/ia32_architecture.c
View file @
8234c50c
...
...
@@ -127,6 +127,8 @@ typedef enum cpu_support {
cpu_winchip2
=
arch_i486
|
arch_feature_mmx
|
arch_feature_3DNow
,
cpu_c3
=
arch_i486
|
arch_feature_mmx
|
arch_feature_3DNow
,
cpu_c3_2
=
arch_ppro
|
arch_feature_p6_insn
|
arch_sse1_insn
,
/* really no 3DNow! */
cpu_autodetect
=
0
,
}
cpu_support
;
static
int
opt_size
=
0
;
...
...
@@ -188,6 +190,8 @@ static const lc_opt_enum_int_items_t arch_items[] = {
{
"generic"
,
cpu_generic
},
{
"generic32"
,
cpu_generic
},
{
"native"
,
cpu_autodetect
},
{
NULL
,
0
}
};
...
...
@@ -484,6 +488,124 @@ int ia32_evaluate_insn(insn_kind kind, const ir_mode *mode, ir_tarval *tv)
}
}
static
struct
cpu_info_t
{
char
cpu_model
;
char
cpu_family
;
char
cpu_type
;
char
cpu_ext_model
;
char
cpu_ext_family
;
unsigned
edx_features
;
unsigned
ecx_features
;
};
static
void
auto_detect_Intel
(
struct
cpu_info_t
info
);
static
void
auto_detect_AMD
(
struct
cpu_info_t
info
);
static
void
autodetect_arch
()
{
/* We use the cpuid instruction to detect the CPU features */
/* If bit 21 of the EFLAGS register can be changed, the cpuid instruction is available */
unsigned
eflags_before
;
unsigned
eflags_after
;
asm
(
"pushf
\n\t
"
"popl %0
\n\t
"
"movl %0, %1
\n\t
"
"xorl $0x00200000, %1
\n\t
"
"pushl %1
\n\t
"
"popf
\n\t
"
"pushf
\n\t
"
"popl %1
\n\t
"
:
"=r"
(
eflags_before
),
"=r"
(
eflags_after
)
::
"cc"
);
if
(
eflags_before
==
eflags_after
)
{
panic
(
"arch autodetection impossible: no cpuid instruction available"
);
}
unsigned
highest_calling_parameter
,
vid0
,
vid1
,
vid2
;
__asm__
(
"xorl %%eax, %%eax
\n\t
"
// 0 in eax for vendor ID
"cpuid
\n\t
"
:
"=a"
(
highest_calling_parameter
),
"=b"
(
vid0
),
"=d"
(
vid1
),
"=c"
(
vid2
)
);
int
vendorid
[
4
]
=
{
vid0
,
vid1
,
vid2
,
0
};
unsigned
cpu_signature
,
edx_features
,
ecx_features
,
add_feature_flags
;
__asm__
(
"movl $1, %%eax
\n\t
"
// 1 in eax for processor info and feature bits
"cpuid
\n\t
"
:
"=a"
(
cpu_signature
),
"=b"
(
add_feature_flags
),
"=d"
(
ecx_features
),
"=c"
(
edx_features
)
);
struct
cpu_info_t
cpu_info
=
{
(
cpu_signature
>>
4
)
&
((
1
<<
4
)
-
1
),
(
cpu_signature
>>
8
)
&
((
1
<<
4
)
-
1
),
(
cpu_signature
>>
12
)
&
((
1
<<
2
)
-
1
),
(
cpu_signature
>>
16
)
&
((
1
<<
4
)
-
1
),
(
cpu_signature
>>
20
)
&
((
1
<<
8
)
-
1
),
edx_features
,
ecx_features
,
};
if
(
0
==
strcmp
((
char
*
)
vendorid
,
"GenuineIntel"
))
{
auto_detect_Intel
(
cpu_info
);
}
else
if
(
0
==
strcmp
((
char
*
)
vendorid
,
"AuthenticAMD"
))
{
auto_detect_AMD
(
cpu_info
);
}
else
{
panic
(
"Unknown Vendor ID for arch autodetection: %s
\n
"
,
vendorid
);
}
}
static
void
auto_detect_Intel
(
const
struct
cpu_info_t
info
)
{
cpu_support
auto_arch
=
cpu_generic
;
switch
(
info
.
cpu_family
)
{
case
4
:
auto_arch
=
arch_i486
;
break
;
case
5
:
auto_arch
=
arch_pentium
;
break
;
case
6
:
auto_arch
=
arch_ppro
;
break
;
case
15
:
auto_arch
=
arch_netburst
;
break
;
default:
panic
(
"Unknown cpu family for arch autodetection: %X
\n
"
,
info
.
cpu_family
);
}
if
(
info
.
edx_features
&
(
1
<<
23
))
auto_arch
|=
arch_feature_mmx
;
if
(
info
.
edx_features
&
(
1
<<
25
))
auto_arch
|=
arch_feature_sse1
;
if
(
info
.
edx_features
&
(
1
<<
26
))
auto_arch
|=
arch_feature_sse2
;
if
(
info
.
ecx_features
&
(
1
<<
0
))
auto_arch
|=
arch_feature_sse3
;
if
(
info
.
ecx_features
&
(
1
<<
9
))
auto_arch
|=
arch_feature_ssse3
;
if
(
info
.
ecx_features
&
(
1
<<
19
))
auto_arch
|=
arch_feature_sse4_1
;
if
(
info
.
ecx_features
&
(
1
<<
20
))
auto_arch
|=
arch_feature_sse4_2
;
arch
=
auto_arch
;
opt_arch
=
auto_arch
;
}
static
void
auto_detect_AMD
(
const
struct
cpu_info_t
info
)
{
cpu_support
auto_arch
=
cpu_generic
;
/* TODO find documentation on the cpu_family bits
for now we fall back to arch_k6. */
auto_arch
=
arch_k6
;
if
(
info
.
edx_features
&
(
1
<<
23
))
auto_arch
|=
arch_feature_mmx
;
if
(
info
.
edx_features
&
(
1
<<
25
))
auto_arch
|=
arch_feature_sse1
;
if
(
info
.
edx_features
&
(
1
<<
26
))
auto_arch
|=
arch_feature_sse2
;
if
(
info
.
ecx_features
&
(
1
<<
0
))
auto_arch
|=
arch_feature_sse3
;
if
(
info
.
ecx_features
&
(
1
<<
9
))
auto_arch
|=
arch_feature_ssse3
;
if
(
info
.
ecx_features
&
(
1
<<
19
))
auto_arch
|=
arch_feature_sse4_1
;
if
(
info
.
ecx_features
&
(
1
<<
20
))
auto_arch
|=
arch_feature_sse4_2
;
arch
=
auto_arch
;
opt_arch
=
auto_arch
;
}
void
ia32_setup_cg_config
(
void
)
{
ia32_code_gen_config_t
*
const
c
=
&
ia32_cg_config
;
...
...
@@ -491,6 +613,8 @@ void ia32_setup_cg_config(void)
set_arch_costs
();
if
(
arch
==
0
)
autodetect_arch
();
c
->
optimize_size
=
opt_size
!=
0
;
/* on newer intel cpus mov, pop is often faster than leave although it has a
* longer opcode */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment