Commit bb3144f0 authored by Manuel Mohr's avatar Manuel Mohr
Browse files

Improved CopyB lowering, made it part of target lowering.

Backends can configure CopyB lowering, so that it's possible to keep
CopyB nodes in a certain size range for special backend-specific
optimizations.  Furthermore, large CopyBs are turned into memcpy calls.
parent c3d71411
......@@ -33,10 +33,25 @@
#include "begin.h"
/**
* Lower CopyB nodes of size smaller that max_size into Loads/Stores
*/
FIRM_API void lower_CopyB(ir_graph *irg, unsigned max_size,
unsigned native_mode_bytes);
* Lower small CopyB nodes to Load/Store nodes, preserve medium-sized CopyB
* nodes and replace large CopyBs by a call to memcpy, depending on the given
* parameters.
*
* Small CopyB nodes (size <= max_small_size) are turned into a series of
* loads and stores.
* Medium-sized CopyB nodes (max_small_size < size < min_large_size) are
* left untouched.
* Large CopyB nodes (size >= min_large_size) are turned into a memcpy call.
*
* @param irg The graph to be lowered.
* @param max_small_size The maximum number of bytes for a CopyB node so
* that it is still considered 'small'.
* @param min_large_size The minimum number of bytes for a CopyB node so
* that it is regarded as 'large'.
* @param native_mode_bytes Specify load/store size, typically register width.
*/
FIRM_API void lower_CopyB(ir_graph *irg, unsigned max_small_size,
unsigned min_large_size, unsigned native_mode_bytes);
/**
* Lowers all Switches (Cond nodes with non-boolean mode) depending on spare_size.
......
......@@ -470,8 +470,19 @@ static int amd64_get_reg_class_alignment(const arch_register_class_t *cls)
static void amd64_lower_for_target(void)
{
size_t i, n_irgs = get_irp_n_irgs();
/* lower compound param handling */
lower_calls_with_compounds(LF_RETURN_HIDDEN);
for (i = 0; i < n_irgs; ++i) {
ir_graph *irg = get_irp_irg(i);
/* Turn all small CopyBs into loads/stores, and turn all bigger
* CopyBs into memcpy calls, because we cannot handle CopyB nodes
* during code generation yet.
* TODO: Adapt this once custom CopyB handling is implemented. */
lower_CopyB(irg, 64, 65, 4);
}
}
static int amd64_is_mux_allowed(ir_node *sel, ir_node *mux_false,
......
......@@ -542,6 +542,14 @@ static void arm_lower_for_target(void)
ir_graph *irg = get_irp_irg(i);
lower_switch(irg, 4, 256, true);
}
for (i = 0; i < n_irgs; ++i) {
ir_graph *irg = get_irp_irg(i);
/* Turn all small CopyBs into loads/stores and all bigger CopyBs into
* memcpy calls.
* TODO: These constants need arm-specific tuning. */
lower_CopyB(irg, 31, 32, 4);
}
}
/**
......
......@@ -2049,6 +2049,14 @@ static void ia32_lower_for_target(void)
/* break up switches with wide ranges */
lower_switch(irg, 4, 256, false);
}
for (i = 0; i < n_irgs; ++i) {
ir_graph *irg = get_irp_irg(i);
/* Turn all small CopyBs into loads/stores, keep medium-sized CopyBs,
* so we can generate rep movs later, and turn all big CopyBs into
* memcpy calls. */
lower_CopyB(irg, 64, 8193, 4);
}
}
/**
......
......@@ -420,6 +420,7 @@ static void sparc_lower_for_target(void)
sparc_create_set,
0,
};
lower_calls_with_compounds(LF_RETURN_HIDDEN);
if (sparc_isa_template.fpu_arch == SPARC_FPU_ARCH_SOFTFLOAT)
......@@ -434,6 +435,13 @@ static void sparc_lower_for_target(void)
ir_lower_mode_b(irg, &lower_mode_b_config);
lower_switch(irg, 4, 256, false);
}
for (i = 0; i < n_irgs; ++i) {
ir_graph *irg = get_irp_irg(i);
/* Turn all small CopyBs into loads/stores and all bigger CopyBs into
* memcpy calls. */
lower_CopyB(irg, 31, 32, 4);
}
}
static int sparc_is_mux_allowed(ir_node *sel, ir_node *mux_false,
......
......@@ -19,8 +19,8 @@
/**
* @file
* @brief Lower small CopyB nodes into a series of Load/store
* @author Michael Beck, Matthias Braun
* @brief Lower small CopyB nodes into a series of Load/Store nodes
* @author Michael Beck, Matthias Braun, Manuel Mohr
* @version $Id$
*/
#include "config.h"
......@@ -42,10 +42,53 @@ struct entry {
ir_node *copyb;
};
/**
* Every CopyB is assigned a size category as follows:
* - 'small' iff size <= max_small_size,
* - 'medium' iff max_small_size < size < min_large_size,
* - 'large' iff size >= min_large_size.
*
* The idea is that each backend can apply different optimizations in each
* of the three categories.
*
* For small CopyBs, the x86 backend could, e.g., emit a single SSE
* instruction to copy 16 bytes. Other backends might just go with a series
* of Load/Stores. Therefore, x86 would like to keep the small CopyB nodes
* around whereas other backends would not.
* For medium-sized CopyBs, the x86 backend might generate a rep-prefixed mov
* instruction. Hence, it also wants to keep the CopyBs in these cases. Other
* backends might handle this differently.
* For large CopyBs, a call to memcpy is worth the call overhead, so large
* CopyBs should always be lowered to memcpy calls.
*
* The lowerer performs the following actions if the CopyB is
* - 'small': Replace it with a series of Loads/Stores
* - 'medium': Nothing.
* - 'large': Replace it with a call to memcpy.
*
* max_small_size and min_large_size allow for a flexible configuration.
* For example, one backend could specify max_small_size == 0 and
* min_large_size == 8192 to keep all CopyB nodes smaller than 8192 and get
* memcpy Calls for all others. Here, the set of small CopyBs is empty.
* Another backend could specify max_small_size == 63 and min_large_size == 64
* to lower all small CopyBs to Loads/Stores and all big CopyBs to memcpy.
* Hence, the set of medium-sized CopyBs is empty and this backend never
* sees a CopyB node at all.
* If memcpy is not available, min_large_size can be set to UINT_MAX to prevent
* the creation of calls to memcpy. Note that CopyBs whose size is UINT_MAX
* will still be lowered to memcpy calls because we check if the size is greater
* *or equal* to min_large_size. However, this should never occur in practice.
*/
static unsigned max_small_size; /**< The maximum size of a CopyB node
so that it is regarded as 'small'. */
static unsigned min_large_size; /**< The minimum size of a CopyB node
so that it is regarded as 'large'. */
typedef struct walk_env {
unsigned max_size;
struct obstack obst; /**< the obstack where data is allocated on */
struct list_head list; /**< the list of copyb nodes */
struct obstack obst; /**< the obstack where data is allocated
on. */
struct list_head list; /**< the list of copyb nodes. */
} walk_env_t;
static ir_mode *get_ir_mode(unsigned bytes)
......@@ -62,20 +105,20 @@ static ir_mode *get_ir_mode(unsigned bytes)
}
/**
* lower a CopyB node.
* Turn a small CopyB node into a series of Load/Store nodes.
*/
static void lower_copyb_nodes(ir_node *irn, unsigned mode_bytes)
static void lower_small_copyb_node(ir_node *irn, unsigned mode_bytes)
{
ir_graph *irg = get_irn_irg(irn);
unsigned size;
unsigned offset;
ir_mode *mode;
ir_mode *addr_mode;
ir_node *mem;
ir_node *addr_src;
ir_node *addr_dst;
ir_node *block;
ir_type *tp;
ir_graph *irg = get_irn_irg(irn);
unsigned size;
unsigned offset;
ir_mode *mode;
ir_mode *addr_mode;
ir_node *mem;
ir_node *addr_src;
ir_node *addr_dst;
ir_node *block;
ir_type *tp;
addr_src = get_CopyB_src(irn);
addr_dst = get_CopyB_dst(irn);
......@@ -124,8 +167,76 @@ static void lower_copyb_nodes(ir_node *irn, unsigned mode_bytes)
set_Tuple_pred(irn, pn_CopyB_X_except, new_r_Bad(irg, mode_X));
}
static ir_type *get_memcpy_methodtype()
{
ir_type *tp = new_type_method(3, 1);
set_method_param_type(tp, 0, get_type_for_mode(mode_P));
set_method_param_type(tp, 1, get_type_for_mode(mode_P));
set_method_param_type(tp, 2, get_type_for_mode(mode_Lu));
set_method_res_type (tp, 0, get_type_for_mode(mode_P));
return tp;
}
static ir_node *get_memcpy_symconst(ir_graph *irg)
{
ident *id = new_id_from_str("memcpy");
ir_type *mt = get_memcpy_methodtype();
ir_entity *ent = new_entity(get_glob_type(), id, mt);
symconst_symbol sym;
set_entity_ld_ident(ent, get_entity_ident(ent));
sym.entity_p = ent;
return new_r_SymConst(irg, mode_P_code, sym, symconst_addr_ent);
}
/**
* Turn a large CopyB node into a memcpy call.
*/
static void lower_large_copyb_node(ir_node *irn)
{
ir_graph *irg = get_irn_irg(irn);
ir_node *block = get_nodes_block(irn);
dbg_info *dbgi = get_irn_dbg_info(irn);
ir_node *mem = get_CopyB_mem(irn);
ir_node *addr_src = get_CopyB_src(irn);
ir_node *addr_dst = get_CopyB_dst(irn);
ir_type *copyb_tp = get_CopyB_type(irn);
unsigned size = get_type_size_bytes(copyb_tp);
ir_node *symconst = get_memcpy_symconst(irg);
ir_type *call_tp = get_memcpy_methodtype();
ir_node *in[3];
ir_node *call;
ir_node *call_mem;
in[0] = addr_dst;
in[1] = addr_src;
in[2] = new_r_Const_long(irg, mode_Lu, size);
call = new_rd_Call(dbgi, block, mem, symconst, 3, in, call_tp);
call_mem = new_r_Proj(call, mode_M, pn_Call_M);
turn_into_tuple(irn, 1);
set_irn_n(irn, pn_CopyB_M, call_mem);
}
static void lower_copyb_node(ir_node *irn, unsigned native_mode_bytes)
{
ir_type *tp = get_CopyB_type(irn);
unsigned size = get_type_size_bytes(tp);
if (size <= max_small_size)
lower_small_copyb_node(irn, native_mode_bytes);
else if (size >= min_large_size)
lower_large_copyb_node(irn);
else
assert(!"CopyB of invalid size handed to lower_copyb_node");
}
/**
* Post-Walker: find small CopyB nodes.
* Post-Walker: find CopyB nodes.
*/
static void find_copyb_nodes(ir_node *irn, void *ctx)
{
......@@ -133,6 +244,7 @@ static void find_copyb_nodes(ir_node *irn, void *ctx)
ir_type *tp;
unsigned size;
entry_t *entry;
bool medium_sized;
if (is_Proj(irn)) {
ir_node *pred = get_Proj_pred(irn);
......@@ -152,11 +264,12 @@ static void find_copyb_nodes(ir_node *irn, void *ctx)
if (get_type_state(tp) != layout_fixed)
return;
size = get_type_size_bytes(tp);
if (size > env->max_size)
return;
size = get_type_size_bytes(tp);
medium_sized = max_small_size < size && size < min_large_size;
if (medium_sized)
return; /* Nothing to do for medium-sized CopyBs. */
/* ok, link it in */
/* Okay, either small or large CopyB, so link it in and lower it later. */
entry = OALLOC(&env->obst, entry_t);
entry->copyb = irn;
INIT_LIST_HEAD(&entry->list);
......@@ -164,18 +277,21 @@ static void find_copyb_nodes(ir_node *irn, void *ctx)
list_add_tail(&entry->list, &env->list);
}
void lower_CopyB(ir_graph *irg, unsigned max_size, unsigned native_mode_bytes)
void lower_CopyB(ir_graph *irg, unsigned max_small_sz,
unsigned min_large_sz, unsigned native_mode_bytes)
{
walk_env_t env;
entry_t *entry;
assert(max_small_sz < min_large_sz && "CopyB size ranges must not overlap");
obstack_init(&env.obst);
env.max_size = max_size;
max_small_size = max_small_sz;
min_large_size = min_large_sz;
INIT_LIST_HEAD(&env.list);
irg_walk_graph(irg, NULL, find_copyb_nodes, &env);
list_for_each_entry(entry_t, entry, &env.list, list) {
lower_copyb_nodes(entry->copyb, native_mode_bytes);
lower_copyb_node(entry->copyb, native_mode_bytes);
}
obstack_free(&env.obst, NULL);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment