Commit 590df148 authored by Christoph Mallon's avatar Christoph Mallon
Browse files

- Allow an arbitrary (for arbitrary < 32) number of in_rBAR and !in_rBAR constraints per node.

- Remove !in and arch_register_req_type_should_be_different_from_all.
- Specify the output constraints of ia32 ShlD and ShrD more accurately as in_r1 !in_r2 !in_r3 instead of !in. This usually saves the fourth register.

[r16320]
parent 49945632
......@@ -91,13 +91,27 @@ static void dump_reg_req(FILE *F, ir_node *n, const arch_register_req_t **reqs,
}
if (reqs[i]->type & arch_register_req_type_should_be_same) {
ir_fprintf(F, " same as %+F", get_irn_n(n, reqs[i]->other_same[0]));
if (reqs[i]->other_same[1] != -1)
ir_fprintf(F, " or %+F", get_irn_n(n, reqs[i]->other_same[1]));
const unsigned other = reqs[i]->other_same;
int i;
ir_fprintf(F, " same as");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(n, i));
}
}
}
if (reqs[i]->type & arch_register_req_type_should_be_different) {
ir_fprintf(F, " different from %+F", get_irn_n(n, reqs[i]->other_different));
const unsigned other = reqs[i]->other_different;
int i;
ir_fprintf(F, " different from");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(n, i));
}
}
}
fprintf(F, "\n");
......
......@@ -100,13 +100,27 @@ static void dump_reg_req(FILE *F, const ir_node *node,
}
if (reqs[i]->type & arch_register_req_type_should_be_same) {
ir_fprintf(F, " same as %+F", get_irn_n(node, reqs[i]->other_same[0]));
if (reqs[i]->other_same[1] != -1)
ir_fprintf(F, " or %+F", get_irn_n(node, reqs[i]->other_same[1]));
const unsigned other = reqs[i]->other_same;
int i;
ir_fprintf(F, " same as");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(node, i));
}
}
}
if (reqs[i]->type & arch_register_req_type_should_be_different) {
ir_fprintf(F, " different from %+F", get_irn_n(node, reqs[i]->other_different));
const unsigned other = reqs[i]->other_different;
int i;
ir_fprintf(F, " different from");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(node, i));
}
}
}
fprintf(F, "\n");
......
......@@ -333,20 +333,29 @@ extern char *arch_register_req_format(char *buf, size_t len,
}
if(arch_register_req_is(req, should_be_same)) {
const ir_node *same = get_irn_n(skip_Proj_const(node), req->other_same[0]);
ir_snprintf(tmp, sizeof(tmp), " same to: %+F", same);
strncat(buf, tmp, len);
if (req->other_same[1] != -1) {
const ir_node *same2 = get_irn_n(skip_Proj_const(node), req->other_same[1]);
ir_snprintf(tmp, sizeof(tmp), "or %+F", same2);
strncat(buf, tmp, len);
const unsigned other = req->other_same;
int i;
ir_snprintf(tmp, sizeof(tmp), " same to:");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_snprintf(tmp, sizeof(tmp), " %+F", get_irn_n(skip_Proj_const(node), i));
strncat(buf, tmp, len);
}
}
}
if(arch_register_req_is(req, should_be_different)) {
const ir_node *different = get_irn_n(skip_Proj_const(node), req->other_different);
ir_snprintf(tmp, sizeof(tmp), " different to: %+F", different);
strncat(buf, tmp, len);
const unsigned other = req->other_different;
int i;
ir_snprintf(tmp, sizeof(tmp), " different from:");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_snprintf(tmp, sizeof(tmp), " %+F", get_irn_n(skip_Proj_const(node), i));
strncat(buf, tmp, len);
}
}
}
return buf;
......@@ -356,7 +365,7 @@ static const arch_register_req_t no_requirement = {
arch_register_req_type_none,
NULL,
NULL,
{ -1, -1 },
-1
0,
0
};
const arch_register_req_t *arch_no_register_req = &no_requirement;
......@@ -88,12 +88,11 @@ typedef enum arch_operand_type_t {
* Different types of register allocation requirements.
*/
typedef enum arch_register_req_type_t {
arch_register_req_type_none = 0, /**< No register requirement. */
arch_register_req_type_normal = 1, /**< All registers in the class are allowed. */
arch_register_req_type_limited = 2, /**< Only a real subset of the class is allowed. */
arch_register_req_type_should_be_same = 4, /**< The register should be equal another one at the node. */
arch_register_req_type_should_be_different = 8, /**< The register must be unequal to some other at the node. */
arch_register_req_type_should_be_different_from_all = 16, /**< The register must be different from all in's at the node */
arch_register_req_type_none = 0, /**< No register requirement. */
arch_register_req_type_normal = 1, /**< All registers in the class are allowed. */
arch_register_req_type_limited = 2, /**< Only a real subset of the class is allowed. */
arch_register_req_type_should_be_same = 4, /**< The register should be equal to another one at the node. */
arch_register_req_type_should_be_different = 8, /**< The register must be unequal from some other at the node. */
} arch_register_req_type_t;
extern const arch_register_req_t *arch_no_register_req;
......
......@@ -132,14 +132,11 @@ struct arch_register_req_t {
const unsigned *limited; /**< allowed register bitset */
int other_same[2]; /**< The in numbers which shall have the
same res (should_be_same). More than
two are unnecessary because there is
no machine with more than two
commutative inputs to one operation */
int other_different; /**< The other node from which this
one's register must be different
(case must_be_different). */
unsigned other_same; /**< Bitmask of ins which should use the
same register (should_be_same). */
unsigned other_different; /**< Bitmask of ins which shall use a
different register
(must_be_different) */
};
/**
......
......@@ -460,42 +460,37 @@ static void co_collect_units(ir_node *irn, void *env) {
/* Src == Tgt of a 2-addr-code instruction */
if (is_2addr_code(req)) {
ir_node *other = get_irn_n(skip_Proj(irn), req->other_same[0]);
ir_node *other2 = NULL;
int count;
if (arch_irn_is(co->aenv, other, ignore) ||
nodes_interfere(co->cenv, irn, other)) {
other = NULL;
}
if (req->other_same[1] != -1) {
other2 = get_irn_n(skip_Proj(irn), req->other_same[1]);
if (arch_irn_is(co->aenv, other2, ignore) ||
nodes_interfere(co->cenv, irn, other2)) {
other2 = NULL;
const unsigned other = req->other_same;
int count = 0;
int i;
for (i = 0; (1U << i) <= other; ++i) {
if (other & (1U << i)) {
ir_node *o = get_irn_n(skip_Proj(irn), i);
if (!arch_irn_is(co->aenv, o, ignore) &&
!nodes_interfere(co->cenv, irn, o)) {
++count;
}
}
}
count = 1 + (other != NULL) + (other2 != NULL && other != other2);
if (count > 1) {
int i = 0;
if (count != 0) {
int k = 0;
++count;
unit->nodes = xmalloc(count * sizeof(*unit->nodes));
unit->costs = xmalloc(count * sizeof(*unit->costs));
unit->node_count = count;
unit->nodes[i] = irn;
if (other != NULL) {
++i;
unit->nodes[i] = other;
unit->costs[i] = co->get_costs(co, irn, other, -1);
}
if (other2 != NULL) {
if (other == other2) {
unit->costs[i] += co->get_costs(co, irn, other2, -1);
} else {
++i;
unit->nodes[i] = other2;
unit->costs[i] = co->get_costs(co, irn, other2, -1);
unit->nodes[k++] = irn;
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_node *o = get_irn_n(skip_Proj(irn), i);
if (!arch_irn_is(co->aenv, o, ignore) &&
!nodes_interfere(co->cenv, irn, o)) {
unit->nodes[k] = o;
unit->costs[k] = co->get_costs(co, irn, o, -1);
++k;
}
}
}
}
......@@ -805,15 +800,15 @@ static void build_graph_walker(ir_node *irn, void *env) {
else { /* 2-address code */
const arch_register_req_t *req = arch_get_register_req(co->aenv, irn, -1);
if (is_2addr_code(req)) {
const int *i;
for (i = req->other_same; i != ENDOF(req->other_same); ++i) {
ir_node *other;
if (*i == -1) break;
other = get_irn_n(skip_Proj(irn), *i);
if (! arch_irn_is(co->aenv, other, ignore))
add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
const unsigned other = req->other_same;
int i;
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_node *other = get_irn_n(skip_Proj(irn), i);
if (! arch_irn_is(co->aenv, other, ignore))
add_edges(co, irn, other, co->get_costs(co, irn, other, 0));
}
}
}
}
......
......@@ -631,12 +631,14 @@ static void assure_different_constraints(ir_node *irn, constraint_env_t *env) {
req = arch_get_register_req(arch_env, irn, -1);
if (arch_register_req_is(req, should_be_different)) {
ir_node *different_from = get_irn_n(belower_skip_proj(irn), req->other_different);
gen_assure_different_pattern(irn, different_from, env);
} else if (arch_register_req_is(req, should_be_different_from_all)) {
int i, n = get_irn_arity(belower_skip_proj(irn));
for (i = 0; i < n; i++) {
gen_assure_different_pattern(irn, get_irn_n(belower_skip_proj(irn), i), env);
const unsigned other = req->other_different;
int i;
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_node *different_from = get_irn_n(belower_skip_proj(irn), i);
gen_assure_different_pattern(irn, different_from, env);
}
}
}
}
......
......@@ -550,8 +550,7 @@ ir_node *be_new_Copy(const arch_register_class_t *cls, ir_graph *irg, ir_node *b
req = get_req(res, OUT_POS(0));
req->cls = cls;
req->type = arch_register_req_type_should_be_same;
req->other_same[0] = 0;
req->other_same[1] = -1;
req->other_same = 1U << 0;
return res;
}
......
......@@ -155,6 +155,16 @@ static INLINE int need_constraint_copy(ir_node *irn) {
! is_ia32_CMov(irn);
}
static int get_first_same(const arch_register_req_t* req)
{
const unsigned other = req->other_same;
int i;
for (i = 0;; ++i) {
if (other & (1U << i)) return i;
}
}
/**
* Insert copies for all ia32 nodes where the should_be_same requirement
* is not fulfilled.
......@@ -200,7 +210,7 @@ static void assure_should_be_same_requirements(ia32_code_gen_t *cg,
if (!arch_register_req_is(req, should_be_same))
continue;
same_pos = req->other_same[0];
same_pos = get_first_same(req);
/* get in and out register */
out_reg = get_ia32_out_reg(node, i);
......@@ -347,7 +357,7 @@ static void fix_am_source(ir_node *irn, void *env) {
if (arch_register_req_is(reqs[i], should_be_same)) {
/* get in and out register */
const arch_register_t *out_reg = get_ia32_out_reg(irn, i);
int same_pos = reqs[i]->other_same[0];
int same_pos = get_first_same(reqs[i]);
ir_node *same_node = get_irn_n(irn, same_pos);
const arch_register_t *same_reg
= arch_get_irn_register(arch_env, same_node);
......
......@@ -96,13 +96,27 @@ static void dump_reg_req(FILE *F, ir_node *n, const arch_register_req_t **reqs,
}
if (reqs[i]->type & arch_register_req_type_should_be_same) {
ir_fprintf(F, " same as %+F", get_irn_n(n, reqs[i]->other_same[0]));
if (reqs[i]->other_same[1] != -1)
ir_fprintf(F, " or %+F", get_irn_n(n, reqs[i]->other_same[1]));
unsigned other = reqs[i]->other_same;
int i;
ir_fprintf(F, " same as");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(n, i));
}
}
}
if (reqs[i]->type & arch_register_req_type_should_be_different) {
ir_fprintf(F, " different from %+F", get_irn_n(n, reqs[i]->other_different));
unsigned other = reqs[i]->other_different;
int i;
ir_fprintf(F, " different from");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(n, i));
}
}
}
fprintf(F, "\n");
......
......@@ -719,11 +719,9 @@ l_ShlDep => {
ShlD => {
irn_flags => "R",
# see ShrD about the strange out constraint
reg_req => { in => [ "gp", "gp", "ecx" ], out => [ "!in" ] },
reg_req => { in => [ "gp", "gp", "ecx" ], out => [ "in_r1 !in_r2 !in_r3" ] },
ins => [ "left_high", "left_low", "right" ],
emit => ". movl %S0, %D0\n".
". shld%M %SB2, %S1, %D0\n",
emit => ". shld%M %SB2, %S1, %D0",
latency => 6,
units => [ "GP" ],
mode => $mode_gp,
......@@ -767,25 +765,10 @@ l_ShrDep => {
},
ShrD => {
# What's going on with the out constraint here? We would like to write
# "in_r2" and be done. However in firm we only support should_be_same
# constraints at the moment. Which means they could be non-fullfilled in
# some cases. Now when all values happen to live through the node, out
# is ecx and in_r2 not ecx, then we're screwed. Because in this case we
# need a 4th Register.
#
# The best solution for this is extending the register allocator to support
# must_be_same constraints which create a copy when the in_r2 value
# lives through (this ensures that we have the 4th register in the cases
# when we need it and can always fix the situation).
#
# For now I'm doing this ultra ugly !in hack which allocates 4 registers
# and creates an extra mov
irn_flags => "R",
reg_req => { in => [ "gp", "gp", "ecx" ], out => [ "!in" ] },
irn_flags => "R",
reg_req => { in => [ "gp", "gp", "ecx" ], out => [ "in_r1 !in_r2 !in_r3" ] },
ins => [ "left_high", "left_low", "right" ],
emit => ". movl %S0, %D0\n".
". shrd%M %SB2, %S1, %D0\n",
emit => ". shrd%M %SB2, %S1, %D0",
latency => 6,
units => [ "GP" ],
mode => $mode_gp,
......
......@@ -3187,8 +3187,8 @@ static const arch_register_req_t no_register_req = {
arch_register_req_type_none,
NULL, /* regclass */
NULL, /* limit bitset */
{ -1, -1 }, /* same pos */
-1 /* different pos */
0, /* same pos */
0 /* different pos */
};
/**
......@@ -3406,9 +3406,8 @@ static void parse_asm_constraint(int pos, constraint_t *constraint, const char *
req->cls = other_constr->cls;
req->type = arch_register_req_type_should_be_same;
req->limited = NULL;
req->other_same[0] = pos;
req->other_same[1] = -1;
req->other_different = -1;
req->other_same = 1U << pos;
req->other_different = 0;
/* switch constraints. This is because in firm we have same_as
* constraints on the output constraints while in the gcc asm syntax
......
......@@ -94,13 +94,27 @@ static void dump_reg_req(FILE *F, ir_node *n, const arch_register_req_t **reqs,
}
if (reqs[i]->type & arch_register_req_type_should_be_same) {
ir_fprintf(F, " same as %+F", get_irn_n(n, reqs[i]->other_same[0]));
if (reqs[i]->other_same[1] != -1)
ir_fprintf(F, " or %+F", get_irn_n(n, reqs[i]->other_same[1]));
const unsigned other = reqs[i]->other_same;
int i;
ir_fprintf(F, " same as");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", i);
}
}
}
if (reqs[i]->type & arch_register_req_type_should_be_different) {
ir_fprintf(F, " different from %+F", get_irn_n(n, reqs[i]->other_different));
const unsigned other = reqs[i]->other_different;
int i;
ir_fprintf(F, " different from");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", i);
}
}
}
fprintf(F, "\n");
......
......@@ -91,13 +91,27 @@ static void dump_reg_req(FILE *F, ir_node *n, const arch_register_req_t **reqs,
}
if (reqs[i]->type & arch_register_req_type_should_be_same) {
ir_fprintf(F, " same as %+F", get_irn_n(n, reqs[i]->other_same[0]));
if (reqs[i]->other_same[1] != -1)
ir_fprintf(F, " or %+F", get_irn_n(n, reqs[i]->other_same[1]));
const unsigned other = reqs[i]->other_same;
int i;
ir_fprintf(F, " same as");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(n, i));
}
}
}
if (reqs[i]->type & arch_register_req_type_should_be_different) {
ir_fprintf(F, " different from %+F", get_irn_n(n, reqs[i]->other_different));
const unsigned other = reqs[i]->other_different;
int i;
ir_fprintf(F, " different from");
for (i = 0; 1U << i <= other; ++i) {
if (other & (1U << i)) {
ir_fprintf(F, " %+F", get_irn_n(n, i));
}
}
}
fprintf(F, "\n");
......
......@@ -976,9 +976,8 @@ sub build_subset_class_func {
my $class = undef;
my $has_limit = 0;
my $limit_name;
my $same_pos = undef;
my $same_pos2 = undef;
my $different_pos = undef;
my $same_pos = 0;
my $different_pos = 0;
my $temp;
my @obst_init;
my @obst_limits;
......@@ -998,26 +997,33 @@ sub build_subset_class_func {
# set/unset registers
CHECK_REQS: foreach (@regs) {
if (!$is_in && /(!)?in_r(\d+)/) {
if (($1 && defined($different_pos)) || (!$1 && defined($same_pos2))) {
print STDERR "Multiple in/out references of same type in one requirement not allowed.\n";
return (undef, undef, undef, undef, undef);
my $bit_pos = 1 << ($2 - 1);
if ($different_pos & $bit_pos) {
if ($1) {
print STDERR "duplicate !in constraint\n";
} else {
print STDERR "conflicting !in and in constraints\n";
}
return (undef, undef, undef, undef);
}
if ($1) {
$different_pos = $2 - 1;
} else {
if (!defined($same_pos)) {
$same_pos = $2 - 1;
if ($same_pos & $bit_pos) {
if ($1) {
print STDERR "conflicting !in and in constraints\n";
} else {
$same_pos2 = $2 - 1;
print STDERR "duplicate in constraint\n";
}
return (undef, undef, undef, undef);
}
if ($1) {
$different_pos |= $bit_pos;
} else {
$same_pos |= $bit_pos;
}
$class = $idx_class[$2 - 1];
next CHECK_REQS;
} elsif (/!in/) {
$class = $idx_class[0];
return ($class, "NULL", undef, undef, 666);
}
# check for negate
......@@ -1026,7 +1032,7 @@ CHECK_REQS: foreach (@regs) {
# we have seen a positiv constraint as first one but this one is negative
# this doesn't make sense
print STDERR "Mixed positive and negative constraints for the same slot are not allowed.\n";
return (undef, undef, undef, undef, undef);
return (undef, undef, undef, undef);
}
if (!defined($neg)) {
......@@ -1040,7 +1046,7 @@ CHECK_REQS: foreach (@regs) {
# we have seen a negative constraint as first one but this one is positive
# this doesn't make sense
print STDERR "Mixed positive and negative constraints for the same slot are not allowed.\n";
return (undef, undef, undef, undef, undef);
return (undef, undef, undef, undef);
}
$has_limit = 1;
......@@ -1051,7 +1057,7 @@ CHECK_REQS: foreach (@regs) {
$temp = get_reg_class($_);
if (!defined($temp)) {
print STDERR "Unknown register '$_'!\n";
return (undef, undef, undef, undef, undef);
return (undef, undef, undef, undef);
}
# set class
......@@ -1060,7 +1066,7 @@ CHECK_REQS: foreach (@regs) {
} elsif ($class ne $temp) {
# all registers must belong to the same class
print STDERR "Registerclass mismatch. '$_' is not member of class '$class'.\n";
return (undef, undef, undef, undef, undef);
return (undef, undef, undef, undef);
}
# calculate position inside the initializer bitfield (only 32 bits per
......@@ -1090,7 +1096,7 @@ CHECK_REQS: foreach (@regs) {
if(defined($limit_bitsets{$limit_name})) {
$limit_name = $limit_bitsets{$limit_name};
return ($class, $limit_name, $same_pos, $same_pos2, $different_pos);
return ($class, $limit_name, $same_pos, $different_pos);
}
$limit_bitsets{$limit_name} = $limit_name;
......@@ -1128,7 +1134,7 @@ CHECK_REQS: foreach (@regs) {
push(@obst_limit_func, " };\n");
}
return ($class, $limit_name, $same_pos, $same_pos2, $different_pos);
return ($class, $limit_name, $same_pos, $different_pos);
}
###
......@@ -1150,8 +1156,8 @@ sub generate_requirements {