/* * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved. * * This file is part of libFirm. * * This file may be distributed and/or modified under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation and appearing in the file LICENSE.GPL included in the * packaging of this file. * * Licensees holding valid libFirm Professional Edition licenses may use * this file in accordance with the libFirm Commercial License. * Agreement provided with the Software. * * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE. */ /** * @file * @brief Preference Guided Register Assignment * @author Matthias Braun * @date 14.2.2009 * @version $Id$ * * The idea is to allocate registers in 2 passes: * 1. A first pass to determine "preferred" registers for live-ranges. This * calculates for each register and each live-range a value indicating * the usefulness. (You can roughly think of the value as the negative * costs needed for copies when the value is in the specific registers...) * * 2. Walk blocks and assigns registers in a greedy fashion. Preferring * registers with high preferences. When register constraints are not met, * add copies and split live-ranges. * * TODO: * - make use of free registers in the permute_values code */ #include "config.h" #include #include #include #include "error.h" #include "execfreq.h" #include "ircons.h" #include "irdom.h" #include "iredges_t.h" #include "irgraph_t.h" #include "irgwalk.h" #include "irnode_t.h" #include "irprintf.h" #include "irdump.h" #include "obst.h" #include "raw_bitset.h" #include "unionfind.h" #include "pdeq.h" #include "hungarian.h" #include "beabi.h" #include "bechordal_t.h" #include "be.h" #include "beirg.h" #include "belive_t.h" #include "bemodule.h" #include "benode.h" #include "bera.h" #include "besched.h" #include "bespill.h" #include "bespillutil.h" #include "beverify.h" #include "beutil.h" #include "bestack.h" #define USE_FACTOR 1.0f #define DEF_FACTOR 1.0f #define NEIGHBOR_FACTOR 0.2f #define AFF_SHOULD_BE_SAME 0.5f #define AFF_PHI 1.0f #define SPLIT_DELTA 1.0f #define MAX_OPTIMISTIC_SPLIT_RECURSION 0 DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) static struct obstack obst; static ir_graph *irg; static const arch_register_class_t *cls; static be_lv_t *lv; static const ir_exec_freq *execfreqs; static unsigned n_regs; static unsigned *normal_regs; static int *congruence_classes; static ir_node **block_order; static int n_block_order; static int create_preferences = true; static int create_congruence_classes = true; static int propagate_phi_registers = true; static const lc_opt_table_entry_t options[] = { LC_OPT_ENT_BOOL("prefs", "use preference based coloring", &create_preferences), LC_OPT_ENT_BOOL("congruences", "create congruence classes", &create_congruence_classes), LC_OPT_ENT_BOOL("prop_phi", "propagate phi registers", &propagate_phi_registers), LC_OPT_LAST }; /** currently active assignments (while processing a basic block) * maps registers to values(their current copies) */ static ir_node **assignments; /** * allocation information: last_uses, register preferences * the information is per firm-node. */ struct allocation_info_t { unsigned last_uses[2]; /**< bitset indicating last uses (input pos) */ ir_node *current_value; /**< copy of the value that should be used */ ir_node *original_value; /**< for copies point to original value */ float prefs[0]; /**< register preferences */ }; typedef struct allocation_info_t allocation_info_t; /** helper datastructure used when sorting register preferences */ struct reg_pref_t { unsigned num; float pref; }; typedef struct reg_pref_t reg_pref_t; /** per basic-block information */ struct block_info_t { bool processed; /**< indicate whether block is processed */ ir_node *assignments[0]; /**< register assignments at end of block */ }; typedef struct block_info_t block_info_t; /** * Get the allocation info for a node. * The info is allocated on the first visit of a node. */ static allocation_info_t *get_allocation_info(ir_node *node) { allocation_info_t *info = (allocation_info_t*)get_irn_link(node); if (info == NULL) { info = OALLOCFZ(&obst, allocation_info_t, prefs, n_regs); info->current_value = node; info->original_value = node; set_irn_link(node, info); } return info; } static allocation_info_t *try_get_allocation_info(const ir_node *node) { return (allocation_info_t*) get_irn_link(node); } /** * Get allocation information for a basic block */ static block_info_t *get_block_info(ir_node *block) { block_info_t *info = (block_info_t*)get_irn_link(block); assert(is_Block(block)); if (info == NULL) { info = OALLOCFZ(&obst, block_info_t, assignments, n_regs); set_irn_link(block, info); } return info; } /** * Link the allocation info of a node to a copy. * Afterwards, both nodes uses the same allocation info. * Copy must not have an allocation info assigned yet. * * @param copy the node that gets the allocation info assigned * @param value the original node */ static void mark_as_copy_of(ir_node *copy, ir_node *value) { ir_node *original; allocation_info_t *info = get_allocation_info(value); allocation_info_t *copy_info = get_allocation_info(copy); /* find original value */ original = info->original_value; if (original != value) { info = get_allocation_info(original); } assert(info->original_value == original); info->current_value = copy; /* the copy should not be linked to something else yet */ assert(copy_info->original_value == copy); copy_info->original_value = original; /* copy over allocation preferences */ memcpy(copy_info->prefs, info->prefs, n_regs * sizeof(copy_info->prefs[0])); } /** * Calculate the penalties for every register on a node and its live neighbors. * * @param live_nodes the set of live nodes at the current position, may be NULL * @param penalty the penalty to subtract from * @param limited a raw bitset containing the limited set for the node * @param node the node */ static void give_penalties_for_limits(const ir_nodeset_t *live_nodes, float penalty, const unsigned* limited, ir_node *node) { ir_nodeset_iterator_t iter; unsigned r; size_t n_allowed; allocation_info_t *info = get_allocation_info(node); ir_node *neighbor; /* give penalty for all forbidden regs */ for (r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; info->prefs[r] -= penalty; } /* all other live values should get a penalty for allowed regs */ if (live_nodes == NULL) return; penalty *= NEIGHBOR_FACTOR; n_allowed = rbitset_popcount(limited, n_regs); if (n_allowed > 1) { /* only create a very weak penalty if multiple regs are allowed */ penalty = (penalty * 0.8f) / n_allowed; } foreach_ir_nodeset(live_nodes, neighbor, iter) { allocation_info_t *neighbor_info; /* TODO: if op is used on multiple inputs we might not do a * continue here */ if (neighbor == node) continue; neighbor_info = get_allocation_info(neighbor); for (r = 0; r < n_regs; ++r) { if (!rbitset_is_set(limited, r)) continue; neighbor_info->prefs[r] -= penalty; } } } /** * Calculate the preferences of a definition for the current register class. * If the definition uses a limited set of registers, reduce the preferences * for the limited register on the node and its neighbors. * * @param live_nodes the set of live nodes at the current node * @param weight the weight * @param node the current node */ static void check_defs(const ir_nodeset_t *live_nodes, float weight, ir_node *node) { const arch_register_req_t *req = arch_get_register_req_out(node); if (req->type & arch_register_req_type_limited) { const unsigned *limited = req->limited; float penalty = weight * DEF_FACTOR; give_penalties_for_limits(live_nodes, penalty, limited, node); } if (req->type & arch_register_req_type_should_be_same) { ir_node *insn = skip_Proj(node); allocation_info_t *info = get_allocation_info(node); int arity = get_irn_arity(insn); int i; float factor = 1.0f / rbitset_popcount(&req->other_same, arity); for (i = 0; i < arity; ++i) { ir_node *op; unsigned r; allocation_info_t *op_info; if (!rbitset_is_set(&req->other_same, i)) continue; op = get_irn_n(insn, i); /* if we the value at the should_be_same input doesn't die at the * node, then it is no use to propagate the constraints (since a * copy will emerge anyway) */ if (ir_nodeset_contains(live_nodes, op)) continue; op_info = get_allocation_info(op); for (r = 0; r < n_regs; ++r) { op_info->prefs[r] += info->prefs[r] * factor; } } } } /** * Walker: Runs an a block calculates the preferences for any * node and every register from the considered register class. */ static void analyze_block(ir_node *block, void *data) { float weight = (float)get_block_execfreq(execfreqs, block); ir_nodeset_t live_nodes; ir_node *node; (void) data; ir_nodeset_init(&live_nodes); be_liveness_end_of_block(lv, cls, block, &live_nodes); sched_foreach_reverse(block, node) { allocation_info_t *info; int i; int arity; if (is_Phi(node)) break; if (create_preferences) { ir_node *value; be_foreach_definition(node, cls, value, check_defs(&live_nodes, weight, value); ); } /* mark last uses */ arity = get_irn_arity(node); /* the allocation info node currently only uses 1 unsigned value to mark last used inputs. So we will fail for a node with more than 32 inputs. */ if (arity >= (int) sizeof(info->last_uses) * 8) { panic("Node with more than %d inputs not supported yet", (int) sizeof(info->last_uses) * 8); } info = get_allocation_info(node); for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); const arch_register_req_t *req = arch_get_register_req_out(op); if (req->cls != cls) continue; /* last usage of a value? */ if (!ir_nodeset_contains(&live_nodes, op)) { rbitset_set(info->last_uses, i); } } be_liveness_transfer(cls, node, &live_nodes); if (create_preferences) { /* update weights based on usage constraints */ for (i = 0; i < arity; ++i) { const arch_register_req_t *req; const unsigned *limited; ir_node *op = get_irn_n(node, i); if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; req = arch_get_register_req(node, i); if (!(req->type & arch_register_req_type_limited)) continue; limited = req->limited; give_penalties_for_limits(&live_nodes, weight * USE_FACTOR, limited, op); } } } ir_nodeset_destroy(&live_nodes); } static void congruence_def(ir_nodeset_t *live_nodes, const ir_node *node) { const arch_register_req_t *req = arch_get_register_req_out(node); /* should be same constraint? */ if (req->type & arch_register_req_type_should_be_same) { const ir_node *insn = skip_Proj_const(node); int arity = get_irn_arity(insn); int i; unsigned node_idx = get_irn_idx(node); node_idx = uf_find(congruence_classes, node_idx); for (i = 0; i < arity; ++i) { ir_node *live; ir_node *op; int op_idx; ir_nodeset_iterator_t iter; bool interferes = false; if (!rbitset_is_set(&req->other_same, i)) continue; op = get_irn_n(insn, i); op_idx = get_irn_idx(op); op_idx = uf_find(congruence_classes, op_idx); /* do we interfere with the value */ foreach_ir_nodeset(live_nodes, live, iter) { int lv_idx = get_irn_idx(live); lv_idx = uf_find(congruence_classes, lv_idx); if (lv_idx == op_idx) { interferes = true; break; } } /* don't put in same affinity class if we interfere */ if (interferes) continue; node_idx = uf_union(congruence_classes, node_idx, op_idx); DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", node, op)); /* one should_be_same is enough... */ break; } } } static void create_congruence_class(ir_node *block, void *data) { ir_nodeset_t live_nodes; ir_node *node; (void) data; ir_nodeset_init(&live_nodes); be_liveness_end_of_block(lv, cls, block, &live_nodes); /* check should be same constraints */ sched_foreach_reverse(block, node) { ir_node *value; if (is_Phi(node)) break; be_foreach_definition(node, cls, value, congruence_def(&live_nodes, value); ); be_liveness_transfer(cls, node, &live_nodes); } /* check phi congruence classes */ sched_foreach_reverse_from(node, node) { int i; int arity; int node_idx; assert(is_Phi(node)); if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; node_idx = get_irn_idx(node); node_idx = uf_find(congruence_classes, node_idx); arity = get_irn_arity(node); for (i = 0; i < arity; ++i) { bool interferes = false; ir_nodeset_iterator_t iter; unsigned r; int old_node_idx; ir_node *live; ir_node *phi; allocation_info_t *head_info; allocation_info_t *other_info; ir_node *op = get_Phi_pred(node, i); int op_idx = get_irn_idx(op); op_idx = uf_find(congruence_classes, op_idx); /* do we interfere with the value */ foreach_ir_nodeset(&live_nodes, live, iter) { int lv_idx = get_irn_idx(live); lv_idx = uf_find(congruence_classes, lv_idx); if (lv_idx == op_idx) { interferes = true; break; } } /* don't put in same affinity class if we interfere */ if (interferes) continue; /* any other phi has the same input? */ sched_foreach(block, phi) { ir_node *oop; int oop_idx; if (!is_Phi(phi)) break; if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; oop = get_Phi_pred(phi, i); if (oop == op) continue; oop_idx = get_irn_idx(oop); oop_idx = uf_find(congruence_classes, oop_idx); if (oop_idx == op_idx) { interferes = true; break; } } if (interferes) continue; /* merge the 2 congruence classes and sum up their preferences */ old_node_idx = node_idx; node_idx = uf_union(congruence_classes, node_idx, op_idx); DB((dbg, LEVEL_3, "Merge %+F and %+F congruence classes\n", node, op)); old_node_idx = node_idx == old_node_idx ? op_idx : old_node_idx; head_info = get_allocation_info(get_idx_irn(irg, node_idx)); other_info = get_allocation_info(get_idx_irn(irg, old_node_idx)); for (r = 0; r < n_regs; ++r) { head_info->prefs[r] += other_info->prefs[r]; } } } } static void set_congruence_prefs(ir_node *node, void *data) { allocation_info_t *info; allocation_info_t *head_info; unsigned node_idx = get_irn_idx(node); unsigned node_set = uf_find(congruence_classes, node_idx); (void) data; /* head of congruence class or not in any class */ if (node_set == node_idx) return; if (!arch_irn_consider_in_reg_alloc(cls, node)) return; head_info = get_allocation_info(get_idx_irn(irg, node_set)); info = get_allocation_info(node); memcpy(info->prefs, head_info->prefs, n_regs * sizeof(info->prefs[0])); } static void combine_congruence_classes(void) { size_t n = get_irg_last_idx(irg); congruence_classes = XMALLOCN(int, n); uf_init(congruence_classes, n); /* create congruence classes */ irg_block_walk_graph(irg, create_congruence_class, NULL, NULL); /* merge preferences */ irg_walk_graph(irg, set_congruence_prefs, NULL, NULL); free(congruence_classes); } /** * Assign register reg to the given node. * * @param node the node * @param reg the register */ static void use_reg(ir_node *node, const arch_register_t *reg) { unsigned r = arch_register_get_index(reg); assignments[r] = node; arch_set_irn_register(node, reg); } static void free_reg_of_value(ir_node *node) { const arch_register_t *reg; unsigned r; if (!arch_irn_consider_in_reg_alloc(cls, node)) return; reg = arch_get_irn_register(node); r = arch_register_get_index(reg); /* assignment->value may be NULL if a value is used at 2 inputs so it gets freed twice. */ assert(assignments[r] == node || assignments[r] == NULL); assignments[r] = NULL; } /** * Compare two register preferences in decreasing order. */ static int compare_reg_pref(const void *e1, const void *e2) { const reg_pref_t *rp1 = (const reg_pref_t*) e1; const reg_pref_t *rp2 = (const reg_pref_t*) e2; if (rp1->pref < rp2->pref) return 1; if (rp1->pref > rp2->pref) return -1; return 0; } static void fill_sort_candidates(reg_pref_t *regprefs, const allocation_info_t *info) { unsigned r; for (r = 0; r < n_regs; ++r) { float pref = info->prefs[r]; regprefs[r].num = r; regprefs[r].pref = pref; } /* TODO: use a stable sort here to avoid unnecessary register jumping */ qsort(regprefs, n_regs, sizeof(regprefs[0]), compare_reg_pref); } static bool try_optimistic_split(ir_node *to_split, ir_node *before, float pref, float pref_delta, unsigned *forbidden_regs, int recursion) { const arch_register_t *from_reg; const arch_register_t *reg; ir_node *original_insn; ir_node *block; ir_node *copy; unsigned r; unsigned from_r; unsigned i; allocation_info_t *info = get_allocation_info(to_split); reg_pref_t *prefs; float delta; float split_threshold; (void) pref; /* stupid hack: don't optimisticallt split don't spill nodes... * (so we don't split away the values produced because of * must_be_different constraints) */ original_insn = skip_Proj(info->original_value); if (arch_irn_get_flags(original_insn) & arch_irn_flags_dont_spill) return false; from_reg = arch_get_irn_register(to_split); from_r = arch_register_get_index(from_reg); block = get_nodes_block(before); split_threshold = (float)get_block_execfreq(execfreqs, block) * SPLIT_DELTA; if (pref_delta < split_threshold*0.5) return false; /* find the best free position where we could move to */ prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(prefs, info); for (i = 0; i < n_regs; ++i) { float apref; float apref_delta; bool res; bool old_source_state; /* we need a normal register which is not an output register an different from the current register of to_split */ r = prefs[i].num; if (!rbitset_is_set(normal_regs, r)) continue; if (rbitset_is_set(forbidden_regs, r)) continue; if (r == from_r) continue; /* is the split worth it? */ delta = pref_delta + prefs[i].pref; if (delta < split_threshold) { DB((dbg, LEVEL_3, "Not doing optimistical split of %+F (depth %d), win %f too low\n", to_split, recursion, delta)); return false; } /* if the register is free then we can do the split */ if (assignments[r] == NULL) break; /* otherwise we might try recursively calling optimistic_split */ if (recursion+1 > MAX_OPTIMISTIC_SPLIT_RECURSION) continue; apref = prefs[i].pref; apref_delta = i+1 < n_regs ? apref - prefs[i+1].pref : 0; apref_delta += pref_delta - split_threshold; /* our source register isn't a useful destination for recursive splits */ old_source_state = rbitset_is_set(forbidden_regs, from_r); rbitset_set(forbidden_regs, from_r); /* try recursive split */ res = try_optimistic_split(assignments[r], before, apref, apref_delta, forbidden_regs, recursion+1); /* restore our destination */ if (old_source_state) { rbitset_set(forbidden_regs, from_r); } else { rbitset_clear(forbidden_regs, from_r); } if (res) break; } if (i >= n_regs) return false; reg = arch_register_for_index(cls, r); copy = be_new_Copy(cls, block, to_split); mark_as_copy_of(copy, to_split); /* hacky, but correct here */ if (assignments[arch_register_get_index(from_reg)] == to_split) free_reg_of_value(to_split); use_reg(copy, reg); sched_add_before(before, copy); DB((dbg, LEVEL_3, "Optimistic live-range split %+F move %+F(%s) -> %s before %+F (win %f, depth %d)\n", copy, to_split, from_reg->name, reg->name, before, delta, recursion)); return true; } /** * Determine and assign a register for node @p node */ static void assign_reg(const ir_node *block, ir_node *node, unsigned *forbidden_regs) { const arch_register_t *reg; allocation_info_t *info; const arch_register_req_t *req; reg_pref_t *reg_prefs; ir_node *in_node; unsigned i; const unsigned *allowed_regs; unsigned r; assert(!is_Phi(node)); /* preassigned register? */ reg = arch_get_irn_register(node); if (reg != NULL) { DB((dbg, LEVEL_2, "Preassignment %+F -> %s\n", node, reg->name)); use_reg(node, reg); return; } req = arch_get_register_req_out(node); /* ignore reqs must be preassigned */ assert (! (req->type & arch_register_req_type_ignore)); /* give should_be_same boni */ info = get_allocation_info(node); in_node = skip_Proj(node); if (req->type & arch_register_req_type_should_be_same) { float weight = (float)get_block_execfreq(execfreqs, block); int arity = get_irn_arity(in_node); int i; assert(arity <= (int) sizeof(req->other_same) * 8); for (i = 0; i < arity; ++i) { ir_node *in; const arch_register_t *reg; unsigned r; if (!rbitset_is_set(&req->other_same, i)) continue; in = get_irn_n(in_node, i); reg = arch_get_irn_register(in); assert(reg != NULL); r = arch_register_get_index(reg); /* if the value didn't die here then we should not propagate the * should_be_same info */ if (assignments[r] == in) continue; info->prefs[r] += weight * AFF_SHOULD_BE_SAME; } } /* create list of register candidates and sort by their preference */ DB((dbg, LEVEL_2, "Candidates for %+F:", node)); reg_prefs = ALLOCAN(reg_pref_t, n_regs); fill_sort_candidates(reg_prefs, info); for (i = 0; i < n_regs; ++i) { unsigned num = reg_prefs[i].num; const arch_register_t *reg; if (!rbitset_is_set(normal_regs, num)) continue; reg = arch_register_for_index(cls, num); DB((dbg, LEVEL_2, " %s(%f)", reg->name, reg_prefs[i].pref)); } DB((dbg, LEVEL_2, "\n")); allowed_regs = normal_regs; if (req->type & arch_register_req_type_limited) { allowed_regs = req->limited; } for (i = 0; i < n_regs; ++i) { float pref, delta; ir_node *before; bool res; r = reg_prefs[i].num; if (!rbitset_is_set(allowed_regs, r)) continue; if (assignments[r] == NULL) break; pref = reg_prefs[i].pref; delta = i+1 < n_regs ? pref - reg_prefs[i+1].pref : 0; before = skip_Proj(node); res = try_optimistic_split(assignments[r], before, pref, delta, forbidden_regs, 0); if (res) break; } if (i >= n_regs) { /* the common reason to hit this panic is when 1 of your nodes is not * register pressure faithful */ panic("No register left for %+F\n", node); } reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); use_reg(node, reg); } /** * Add an permutation in front of a node and change the assignments * due to this permutation. * * To understand this imagine a permutation like this: * * 1 -> 2 * 2 -> 3 * 3 -> 1, 5 * 4 -> 6 * 5 * 6 * 7 -> 7 * * First we count how many destinations a single value has. At the same time * we can be sure that each destination register has at most 1 source register * (it can have 0 which means we don't care what value is in it). * We ignore all fullfilled permuations (like 7->7) * In a first pass we create as much copy instructions as possible as they * are generally cheaper than exchanges. We do this by counting into how many * destinations a register has to be copied (in the example it's 2 for register * 3, or 1 for the registers 1,2,4 and 7). * We can then create a copy into every destination register when the usecount * of that register is 0 (= noone else needs the value in the register). * * After this step we should have cycles left. We implement a cyclic permutation * of n registers with n-1 transpositions. * * @param live_nodes the set of live nodes, updated due to live range split * @param before the node before we add the permutation * @param permutation the permutation array indices are the destination * registers, the values in the array are the source * registers. */ static void permute_values(ir_nodeset_t *live_nodes, ir_node *before, unsigned *permutation) { unsigned *n_used = ALLOCANZ(unsigned, n_regs); ir_node *block; unsigned r; /* determine how often each source register needs to be read */ for (r = 0; r < n_regs; ++r) { unsigned old_reg = permutation[r]; ir_node *value; value = assignments[old_reg]; if (value == NULL) { /* nothing to do here, reg is not live. Mark it as fixpoint * so we ignore it in the next steps */ permutation[r] = r; continue; } ++n_used[old_reg]; } block = get_nodes_block(before); /* step1: create copies where immediately possible */ for (r = 0; r < n_regs; /* empty */) { ir_node *copy; ir_node *src; const arch_register_t *reg; unsigned old_r = permutation[r]; /* - no need to do anything for fixed points. - we can't copy if the value in the dest reg is still needed */ if (old_r == r || n_used[r] > 0) { ++r; continue; } /* create a copy */ src = assignments[old_r]; copy = be_new_Copy(cls, block, src); sched_add_before(before, copy); reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Copy %+F (from %+F, before %+F) -> %s\n", copy, src, before, reg->name)); mark_as_copy_of(copy, src); use_reg(copy, reg); if (live_nodes != NULL) { ir_nodeset_insert(live_nodes, copy); } /* old register has 1 user less, permutation is resolved */ assert(arch_register_get_index(arch_get_irn_register(src)) == old_r); permutation[r] = r; assert(n_used[old_r] > 0); --n_used[old_r]; if (n_used[old_r] == 0) { if (live_nodes != NULL) { ir_nodeset_remove(live_nodes, src); } free_reg_of_value(src); } /* advance or jump back (if this copy enabled another copy) */ if (old_r < r && n_used[old_r] == 0) { r = old_r; } else { ++r; } } /* at this point we only have "cycles" left which we have to resolve with * perm instructions * TODO: if we have free registers left, then we should really use copy * instructions for any cycle longer than 2 registers... * (this is probably architecture dependent, there might be archs where * copies are preferable even for 2-cycles) */ /* create perms with the rest */ for (r = 0; r < n_regs; /* empty */) { const arch_register_t *reg; unsigned old_r = permutation[r]; unsigned r2; ir_node *in[2]; ir_node *perm; ir_node *proj0; ir_node *proj1; if (old_r == r) { ++r; continue; } /* we shouldn't have copies from 1 value to multiple destinations left*/ assert(n_used[old_r] == 1); /* exchange old_r and r2; after that old_r is a fixed point */ r2 = permutation[old_r]; in[0] = assignments[r2]; in[1] = assignments[old_r]; perm = be_new_Perm(cls, block, 2, in); sched_add_before(before, perm); DB((dbg, LEVEL_2, "Perm %+F (perm %+F,%+F, before %+F)\n", perm, in[0], in[1], before)); proj0 = new_r_Proj(perm, get_irn_mode(in[0]), 0); mark_as_copy_of(proj0, in[0]); reg = arch_register_for_index(cls, old_r); use_reg(proj0, reg); proj1 = new_r_Proj(perm, get_irn_mode(in[1]), 1); mark_as_copy_of(proj1, in[1]); reg = arch_register_for_index(cls, r2); use_reg(proj1, reg); /* 1 value is now in the correct register */ permutation[old_r] = old_r; /* the source of r changed to r2 */ permutation[r] = r2; /* if we have reached a fixpoint update data structures */ if (live_nodes != NULL) { ir_nodeset_remove(live_nodes, in[0]); ir_nodeset_remove(live_nodes, in[1]); ir_nodeset_remove(live_nodes, proj0); ir_nodeset_insert(live_nodes, proj1); } } #ifdef DEBUG_libfirm /* now we should only have fixpoints left */ for (r = 0; r < n_regs; ++r) { assert(permutation[r] == r); } #endif } /** * Free regs for values last used. * * @param live_nodes set of live nodes, will be updated * @param node the node to consider */ static void free_last_uses(ir_nodeset_t *live_nodes, ir_node *node) { allocation_info_t *info = get_allocation_info(node); const unsigned *last_uses = info->last_uses; int arity = get_irn_arity(node); int i; for (i = 0; i < arity; ++i) { ir_node *op; /* check if one operand is the last use */ if (!rbitset_is_set(last_uses, i)) continue; op = get_irn_n(node, i); free_reg_of_value(op); ir_nodeset_remove(live_nodes, op); } } /** * change inputs of a node to the current value (copies/perms) */ static void rewire_inputs(ir_node *node) { int i; int arity = get_irn_arity(node); for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); allocation_info_t *info = try_get_allocation_info(op); if (info == NULL) continue; info = get_allocation_info(info->original_value); if (info->current_value != op) { set_irn_n(node, i, info->current_value); } } } /** * Create a bitset of registers occupied with value living through an * instruction */ static void determine_live_through_regs(unsigned *bitset, ir_node *node) { const allocation_info_t *info = get_allocation_info(node); unsigned r; int i; int arity; /* mark all used registers as potentially live-through */ for (r = 0; r < n_regs; ++r) { if (assignments[r] == NULL) continue; if (!rbitset_is_set(normal_regs, r)) continue; rbitset_set(bitset, r); } /* remove registers of value dying at the instruction */ arity = get_irn_arity(node); for (i = 0; i < arity; ++i) { ir_node *op; const arch_register_t *reg; if (!rbitset_is_set(info->last_uses, i)) continue; op = get_irn_n(node, i); reg = arch_get_irn_register(op); rbitset_clear(bitset, arch_register_get_index(reg)); } } /** * Enforce constraints at a node by live range splits. * * @param live_nodes the set of live nodes, might be changed * @param node the current node */ static void enforce_constraints(ir_nodeset_t *live_nodes, ir_node *node, unsigned *forbidden_regs) { int arity = get_irn_arity(node); int i, res; hungarian_problem_t *bp; unsigned l, r; unsigned *assignment; ir_node *value; /* construct a list of register occupied by live-through values */ unsigned *live_through_regs = NULL; /* see if any use constraints are not met */ bool good = true; for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); const arch_register_t *reg; const arch_register_req_t *req; const unsigned *limited; unsigned r; if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; /* are there any limitations for the i'th operand? */ req = arch_get_register_req(node, i); if (!(req->type & arch_register_req_type_limited)) continue; limited = req->limited; reg = arch_get_irn_register(op); r = arch_register_get_index(reg); if (!rbitset_is_set(limited, r)) { /* found an assignment outside the limited set */ good = false; break; } } /* is any of the live-throughs using a constrained output register? */ be_foreach_definition(node, cls, value, if (! (req_->type & arch_register_req_type_limited)) continue; if (live_through_regs == NULL) { rbitset_alloca(live_through_regs, n_regs); determine_live_through_regs(live_through_regs, node); } rbitset_or(forbidden_regs, req_->limited, n_regs); if (rbitsets_have_common(req_->limited, live_through_regs, n_regs)) good = false; ); if (good) return; /* create these arrays if we haven't yet */ if (live_through_regs == NULL) { rbitset_alloca(live_through_regs, n_regs); } /* at this point we have to construct a bipartite matching problem to see * which values should go to which registers * Note: We're building the matrix in "reverse" - source registers are * right, destinations left because this will produce the solution * in the format required for permute_values. */ bp = hungarian_new(n_regs, n_regs, HUNGARIAN_MATCH_PERFECT); /* add all combinations, then remove not allowed ones */ for (l = 0; l < n_regs; ++l) { if (!rbitset_is_set(normal_regs, l)) { hungarian_add(bp, l, l, 1); continue; } for (r = 0; r < n_regs; ++r) { if (!rbitset_is_set(normal_regs, r)) continue; /* livethrough values may not use constrainted output registers */ if (rbitset_is_set(live_through_regs, l) && rbitset_is_set(forbidden_regs, r)) continue; hungarian_add(bp, r, l, l == r ? 9 : 8); } } for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); const arch_register_t *reg; const arch_register_req_t *req; const unsigned *limited; unsigned current_reg; if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; req = arch_get_register_req(node, i); if (!(req->type & arch_register_req_type_limited)) continue; limited = req->limited; reg = arch_get_irn_register(op); current_reg = arch_register_get_index(reg); for (r = 0; r < n_regs; ++r) { if (rbitset_is_set(limited, r)) continue; hungarian_remove(bp, r, current_reg); } } //hungarian_print_cost_matrix(bp, 1); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); assignment = ALLOCAN(unsigned, n_regs); res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); #if 0 fprintf(stderr, "Swap result:"); for (i = 0; i < (int) n_regs; ++i) { fprintf(stderr, " %d", assignment[i]); } fprintf(stderr, "\n"); #endif hungarian_free(bp); permute_values(live_nodes, node, assignment); } /** test wether a node @p n is a copy of the value of node @p of */ static bool is_copy_of(ir_node *value, ir_node *test_value) { allocation_info_t *test_info; allocation_info_t *info; if (value == test_value) return true; info = get_allocation_info(value); test_info = get_allocation_info(test_value); return test_info->original_value == info->original_value; } /** * find a value in the end-assignment of a basic block * @returns the index into the assignment array if found * -1 if not found */ static int find_value_in_block_info(block_info_t *info, ir_node *value) { unsigned r; ir_node **assignments = info->assignments; for (r = 0; r < n_regs; ++r) { ir_node *a_value = assignments[r]; if (a_value == NULL) continue; if (is_copy_of(a_value, value)) return (int) r; } return -1; } /** * Create the necessary permutations at the end of a basic block to fullfill * the register assignment for phi-nodes in the next block */ static void add_phi_permutations(ir_node *block, int p) { unsigned r; unsigned *permutation; ir_node **old_assignments; bool need_permutation; ir_node *phi; ir_node *pred = get_Block_cfgpred_block(block, p); block_info_t *pred_info = get_block_info(pred); /* predecessor not processed yet? nothing to do */ if (!pred_info->processed) return; permutation = ALLOCAN(unsigned, n_regs); for (r = 0; r < n_regs; ++r) { permutation[r] = r; } /* check phi nodes */ need_permutation = false; phi = sched_first(block); for ( ; is_Phi(phi); phi = sched_next(phi)) { const arch_register_t *reg; const arch_register_t *op_reg; int regn; int a; ir_node *op; if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; op = get_Phi_pred(phi, p); a = find_value_in_block_info(pred_info, op); assert(a >= 0); reg = arch_get_irn_register(phi); regn = arch_register_get_index(reg); /* same register? nothing to do */ if (regn == a) continue; op = pred_info->assignments[a]; op_reg = arch_get_irn_register(op); /* virtual or joker registers are ok too */ if ((op_reg->type & arch_register_type_joker) || (op_reg->type & arch_register_type_virtual)) continue; permutation[regn] = a; need_permutation = true; } if (need_permutation) { /* permute values at end of predecessor */ old_assignments = assignments; assignments = pred_info->assignments; permute_values(NULL, be_get_end_of_block_insertion_point(pred), permutation); assignments = old_assignments; } /* change phi nodes to use the copied values */ phi = sched_first(block); for ( ; is_Phi(phi); phi = sched_next(phi)) { int a; ir_node *op; if (!arch_irn_consider_in_reg_alloc(cls, phi)) continue; op = get_Phi_pred(phi, p); /* we have permuted all values into the correct registers so we can simply query which value occupies the phis register in the predecessor */ a = arch_register_get_index(arch_get_irn_register(phi)); op = pred_info->assignments[a]; set_Phi_pred(phi, p, op); } } /** * Set preferences for a phis register based on the registers used on the * phi inputs. */ static void adapt_phi_prefs(ir_node *phi) { int i; int arity = get_irn_arity(phi); ir_node *block = get_nodes_block(phi); allocation_info_t *info = get_allocation_info(phi); for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(phi, i); const arch_register_t *reg = arch_get_irn_register(op); ir_node *pred_block; block_info_t *pred_block_info; float weight; unsigned r; if (reg == NULL) continue; /* we only give the bonus if the predecessor already has registers * assigned, otherwise we only see a dummy value * and any conclusions about its register are useless */ pred_block = get_Block_cfgpred_block(block, i); pred_block_info = get_block_info(pred_block); if (!pred_block_info->processed) continue; /* give bonus for already assigned register */ weight = (float)get_block_execfreq(execfreqs, pred_block); r = arch_register_get_index(reg); info->prefs[r] += weight * AFF_PHI; } } /** * After a phi has been assigned a register propagate preference inputs * to the phi inputs. */ static void propagate_phi_register(ir_node *phi, unsigned assigned_r) { int i; ir_node *block = get_nodes_block(phi); int arity = get_irn_arity(phi); for (i = 0; i < arity; ++i) { ir_node *op = get_Phi_pred(phi, i); allocation_info_t *info = get_allocation_info(op); ir_node *pred_block = get_Block_cfgpred_block(block, i); unsigned r; float weight = (float)get_block_execfreq(execfreqs, pred_block) * AFF_PHI; if (info->prefs[assigned_r] >= weight) continue; /* promote the prefered register */ for (r = 0; r < n_regs; ++r) { if (info->prefs[r] > -weight) { info->prefs[r] = -weight; } } info->prefs[assigned_r] = weight; if (is_Phi(op)) propagate_phi_register(op, assigned_r); } } static void assign_phi_registers(ir_node *block) { int n_phis = 0; int n; int res; unsigned *assignment; ir_node *node; hungarian_problem_t *bp; /* count phi nodes */ sched_foreach(block, node) { if (!is_Phi(node)) break; if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; ++n_phis; } if (n_phis == 0) return; /* build a bipartite matching problem for all phi nodes */ bp = hungarian_new(n_phis, n_regs, HUNGARIAN_MATCH_PERFECT); n = 0; sched_foreach(block, node) { unsigned r; allocation_info_t *info; if (!is_Phi(node)) break; if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; /* give boni for predecessor colorings */ adapt_phi_prefs(node); /* add stuff to bipartite problem */ info = get_allocation_info(node); DB((dbg, LEVEL_3, "Prefs for %+F: ", node)); for (r = 0; r < n_regs; ++r) { float costs; if (!rbitset_is_set(normal_regs, r)) continue; costs = info->prefs[r]; costs = costs < 0 ? -logf(-costs+1) : logf(costs+1); costs *= 100; costs += 10000; hungarian_add(bp, n, r, (int)costs); DB((dbg, LEVEL_3, " %s(%f)", arch_register_for_index(cls, r)->name, info->prefs[r])); } DB((dbg, LEVEL_3, "\n")); ++n; } //hungarian_print_cost_matrix(bp, 7); hungarian_prepare_cost_matrix(bp, HUNGARIAN_MODE_MAXIMIZE_UTIL); assignment = ALLOCAN(unsigned, n_regs); res = hungarian_solve(bp, assignment, NULL, 0); assert(res == 0); /* apply results */ n = 0; sched_foreach(block, node) { unsigned r; const arch_register_t *reg; if (!is_Phi(node)) break; if (!arch_irn_consider_in_reg_alloc(cls, node)) continue; r = assignment[n++]; assert(rbitset_is_set(normal_regs, r)); reg = arch_register_for_index(cls, r); DB((dbg, LEVEL_2, "Assign %+F -> %s\n", node, reg->name)); use_reg(node, reg); /* adapt preferences for phi inputs */ if (propagate_phi_registers) propagate_phi_register(node, r); } } /** * Walker: assign registers to all nodes of a block that * need registers from the currently considered register class. */ static void allocate_coalesce_block(ir_node *block, void *data) { int i; ir_nodeset_t live_nodes; ir_node *node; int n_preds; block_info_t *block_info; block_info_t **pred_block_infos; ir_node **phi_ins; unsigned *forbidden_regs; /**< collects registers which must not be used for optimistic splits */ (void) data; DB((dbg, LEVEL_2, "* Block %+F\n", block)); /* clear assignments */ block_info = get_block_info(block); assignments = block_info->assignments; ir_nodeset_init(&live_nodes); /* gather regalloc infos of predecessor blocks */ n_preds = get_Block_n_cfgpreds(block); pred_block_infos = ALLOCAN(block_info_t*, n_preds); for (i = 0; i < n_preds; ++i) { ir_node *pred = get_Block_cfgpred_block(block, i); block_info_t *pred_info = get_block_info(pred); pred_block_infos[i] = pred_info; } phi_ins = ALLOCAN(ir_node*, n_preds); /* collect live-in nodes and preassigned values */ be_lv_foreach(lv, block, be_lv_state_in, i) { bool need_phi = false; const arch_register_req_t *req; const arch_register_t *reg; int p; node = be_lv_get_irn(lv, block, i); req = arch_get_register_req_out(node); if (req->cls != cls) continue; if (req->type & arch_register_req_type_ignore) { allocation_info_t *info = get_allocation_info(node); info->current_value = node; reg = arch_get_irn_register(node); assert(reg != NULL); /* ignore values must be preassigned */ use_reg(node, reg); continue; } /* check all predecessors for this value, if it is not everywhere the same or unknown then we have to construct a phi (we collect the potential phi inputs here) */ for (p = 0; p < n_preds; ++p) { block_info_t *pred_info = pred_block_infos[p]; if (!pred_info->processed) { /* use node for now, it will get fixed later */ phi_ins[p] = node; need_phi = true; } else { int a = find_value_in_block_info(pred_info, node); /* must live out of predecessor */ assert(a >= 0); phi_ins[p] = pred_info->assignments[a]; /* different value from last time? then we need a phi */ if (p > 0 && phi_ins[p-1] != phi_ins[p]) { need_phi = true; } } } if (need_phi) { ir_mode *mode = get_irn_mode(node); ir_node *phi = be_new_Phi(block, n_preds, phi_ins, mode, cls); DB((dbg, LEVEL_3, "Create Phi %+F (for %+F) -", phi, node)); #ifdef DEBUG_libfirm { int i; for (i = 0; i < n_preds; ++i) { DB((dbg, LEVEL_3, " %+F", phi_ins[i])); } DB((dbg, LEVEL_3, "\n")); } #endif mark_as_copy_of(phi, node); sched_add_after(block, phi); node = phi; } else { allocation_info_t *info = get_allocation_info(node); info->current_value = phi_ins[0]; /* Grab 1 of the inputs we constructed (might not be the same as * "node" as we could see the same copy of the value in all * predecessors */ node = phi_ins[0]; } /* if the node already has a register assigned use it */ reg = arch_get_irn_register(node); if (reg != NULL) { use_reg(node, reg); } /* remember that this node is live at the beginning of the block */ ir_nodeset_insert(&live_nodes, node); } rbitset_alloca(forbidden_regs, n_regs); /* handle phis... */ assign_phi_registers(block); /* all live-ins must have a register */ #ifdef DEBUG_libfirm { ir_nodeset_iterator_t iter; foreach_ir_nodeset(&live_nodes, node, iter) { const arch_register_t *reg = arch_get_irn_register(node); assert(reg != NULL); } } #endif /* assign instructions in the block */ sched_foreach(block, node) { int i; int arity; ir_node *value; /* phis are already assigned */ if (is_Phi(node)) continue; rewire_inputs(node); /* enforce use constraints */ rbitset_clear_all(forbidden_regs, n_regs); enforce_constraints(&live_nodes, node, forbidden_regs); rewire_inputs(node); /* we may not use registers used for inputs for optimistic splits */ arity = get_irn_arity(node); for (i = 0; i < arity; ++i) { ir_node *op = get_irn_n(node, i); const arch_register_t *reg; if (!arch_irn_consider_in_reg_alloc(cls, op)) continue; reg = arch_get_irn_register(op); rbitset_set(forbidden_regs, arch_register_get_index(reg)); } /* free registers of values last used at this instruction */ free_last_uses(&live_nodes, node); /* assign output registers */ be_foreach_definition_(node, cls, value, assign_reg(block, value, forbidden_regs); ); } ir_nodeset_destroy(&live_nodes); assignments = NULL; block_info->processed = true; /* permute values at end of predecessor blocks in case of phi-nodes */ if (n_preds > 1) { int p; for (p = 0; p < n_preds; ++p) { add_phi_permutations(block, p); } } /* if we have exactly 1 successor then we might be able to produce phi copies now */ if (get_irn_n_edges_kind(block, EDGE_KIND_BLOCK) == 1) { const ir_edge_t *edge = get_irn_out_edge_first_kind(block, EDGE_KIND_BLOCK); ir_node *succ = get_edge_src_irn(edge); int p = get_edge_src_pos(edge); block_info_t *succ_info = get_block_info(succ); if (succ_info->processed) { add_phi_permutations(succ, p); } } } typedef struct block_costs_t block_costs_t; struct block_costs_t { float costs; /**< costs of the block */ int dfs_num; /**< depth first search number (to detect backedges) */ }; static int cmp_block_costs(const void *d1, const void *d2) { const ir_node * const *block1 = (const ir_node**)d1; const ir_node * const *block2 = (const ir_node**)d2; const block_costs_t *info1 = (const block_costs_t*)get_irn_link(*block1); const block_costs_t *info2 = (const block_costs_t*)get_irn_link(*block2); return QSORT_CMP(info2->costs, info1->costs); } static void determine_block_order(void) { int i; ir_node **blocklist = be_get_cfgpostorder(irg); int n_blocks = ARR_LEN(blocklist); int dfs_num = 0; pdeq *worklist = new_pdeq(); ir_node **order = XMALLOCN(ir_node*, n_blocks); int order_p = 0; /* clear block links... */ for (i = 0; i < n_blocks; ++i) { ir_node *block = blocklist[i]; set_irn_link(block, NULL); } /* walk blocks in reverse postorder, the costs for each block are the * sum of the costs of its predecessors (excluding the costs on backedges * which we can't determine) */ for (i = n_blocks-1; i >= 0; --i) { block_costs_t *cost_info; ir_node *block = blocklist[i]; float execfreq = (float)get_block_execfreq(execfreqs, block); float costs = execfreq; int n_cfgpreds = get_Block_n_cfgpreds(block); int p; for (p = 0; p < n_cfgpreds; ++p) { ir_node *pred_block = get_Block_cfgpred_block(block, p); block_costs_t *pred_costs = (block_costs_t*)get_irn_link(pred_block); /* we don't have any info for backedges */ if (pred_costs == NULL) continue; costs += pred_costs->costs; } cost_info = OALLOCZ(&obst, block_costs_t); cost_info->costs = costs; cost_info->dfs_num = dfs_num++; set_irn_link(block, cost_info); } /* sort array by block costs */ qsort(blocklist, n_blocks, sizeof(blocklist[0]), cmp_block_costs); ir_reserve_resources(irg, IR_RESOURCE_BLOCK_VISITED); inc_irg_block_visited(irg); for (i = 0; i < n_blocks; ++i) { ir_node *block = blocklist[i]; if (Block_block_visited(block)) continue; /* continually add predecessors with highest costs to worklist * (without using backedges) */ do { block_costs_t *info = (block_costs_t*)get_irn_link(block); ir_node *best_pred = NULL; float best_costs = -1; int n_cfgpred = get_Block_n_cfgpreds(block); int i; pdeq_putr(worklist, block); mark_Block_block_visited(block); for (i = 0; i < n_cfgpred; ++i) { ir_node *pred_block = get_Block_cfgpred_block(block, i); block_costs_t *pred_info = (block_costs_t*)get_irn_link(pred_block); /* ignore backedges */ if (pred_info->dfs_num > info->dfs_num) continue; if (info->costs > best_costs) { best_costs = info->costs; best_pred = pred_block; } } block = best_pred; } while (block != NULL && !Block_block_visited(block)); /* now put all nodes in the worklist in our final order */ while (!pdeq_empty(worklist)) { ir_node *pblock = (ir_node*)pdeq_getr(worklist); assert(order_p < n_blocks); order[order_p++] = pblock; } } assert(order_p == n_blocks); del_pdeq(worklist); ir_free_resources(irg, IR_RESOURCE_BLOCK_VISITED); DEL_ARR_F(blocklist); obstack_free(&obst, NULL); obstack_init(&obst); block_order = order; n_block_order = n_blocks; } /** * Run the register allocator for the current register class. */ static void be_pref_alloc_cls(void) { int i; lv = be_assure_liveness(irg); be_liveness_assure_sets(lv); ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); DB((dbg, LEVEL_2, "=== Allocating registers of %s ===\n", cls->name)); be_clear_links(irg); irg_block_walk_graph(irg, NULL, analyze_block, NULL); if (create_congruence_classes) combine_congruence_classes(); for (i = 0; i < n_block_order; ++i) { ir_node *block = block_order[i]; allocate_coalesce_block(block, NULL); } ir_free_resources(irg, IR_RESOURCE_IRN_LINK); } static void dump(int mask, ir_graph *irg, const char *suffix) { if (be_get_irg_options(irg)->dump_flags & mask) dump_ir_graph(irg, suffix); } /** * Run the spiller on the current graph. */ static void spill(void) { /* make sure all nodes show their real register pressure */ be_timer_push(T_RA_CONSTR); be_pre_spill_prepare_constr(irg, cls); be_timer_pop(T_RA_CONSTR); dump(DUMP_RA, irg, "-spillprepare"); /* spill */ be_timer_push(T_RA_SPILL); be_do_spill(irg, cls); be_timer_pop(T_RA_SPILL); be_timer_push(T_RA_SPILL_APPLY); check_for_memory_operands(irg); be_timer_pop(T_RA_SPILL_APPLY); dump(DUMP_RA, irg, "-spill"); } /** * The pref register allocator for a whole procedure. */ static void be_pref_alloc(ir_graph *new_irg) { const arch_env_t *arch_env = be_get_irg_arch_env(new_irg); int n_cls = arch_env->n_register_classes; int c; obstack_init(&obst); irg = new_irg; execfreqs = be_get_irg_exec_freq(irg); /* determine a good coloring order */ determine_block_order(); for (c = 0; c < n_cls; ++c) { cls = &arch_env->register_classes[c]; if (arch_register_class_flags(cls) & arch_register_class_flag_manual_ra) continue; stat_ev_ctx_push_str("regcls", cls->name); n_regs = arch_register_class_n_regs(cls); normal_regs = rbitset_malloc(n_regs); be_set_allocatable_regs(irg, cls, normal_regs); spill(); /* verify schedule and register pressure */ be_timer_push(T_VERIFY); if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { be_verify_schedule(irg); be_verify_register_pressure(irg, cls); } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { assert(be_verify_schedule(irg) && "Schedule verification failed"); assert(be_verify_register_pressure(irg, cls) && "Register pressure verification failed"); } be_timer_pop(T_VERIFY); be_timer_push(T_RA_COLOR); be_pref_alloc_cls(); be_timer_pop(T_RA_COLOR); /* we most probably constructed new Phis so liveness info is invalid * now */ /* TODO: test liveness_introduce */ be_liveness_invalidate(lv); free(normal_regs); stat_ev_ctx_pop("regcls"); } be_timer_push(T_RA_SPILL_APPLY); be_abi_fix_stack_nodes(irg); be_timer_pop(T_RA_SPILL_APPLY); be_timer_push(T_VERIFY); if (be_get_irg_options(irg)->verify_option == BE_VERIFY_WARN) { be_verify_register_allocation(irg); } else if (be_get_irg_options(irg)->verify_option == BE_VERIFY_ASSERT) { assert(be_verify_register_allocation(irg) && "Register allocation invalid"); } be_timer_pop(T_VERIFY); obstack_free(&obst, NULL); } BE_REGISTER_MODULE_CONSTRUCTOR(be_init_pref_alloc); void be_init_pref_alloc(void) { static be_ra_t be_ra_pref = { be_pref_alloc, }; lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); lc_opt_entry_t *prefalloc_group = lc_opt_get_grp(be_grp, "prefalloc"); lc_opt_add_table(prefalloc_group, options); be_register_allocator("pref", &be_ra_pref); FIRM_DBG_REGISTER(dbg, "firm.be.prefalloc"); }