Commit a2c0f19e authored by Matthias Braun's avatar Matthias Braun
Browse files

cleanup, use C99, move description to public header

parent 4576444a
......@@ -32,11 +32,41 @@
* nodes and replace large CopyBs by a call to memcpy, depending on the given
* parameters.
*
* Small CopyB nodes (size <= max_small_size) are turned into a series of
* loads and stores.
* Medium-sized CopyB nodes (max_small_size < size < min_large_size) are
* left untouched.
* Large CopyB nodes (size >= min_large_size) are turned into a memcpy call.
* Every CopyB is assigned a size category as follows:
* - 'small' iff size <= max_small_size,
* - 'medium' iff max_small_size < size < min_large_size,
* - 'large' iff size >= min_large_size.
*
* The idea is that each backend can apply different optimizations in each
* of the three categories.
*
* For small CopyBs, the x86 backend could, e.g., emit a single SSE
* instruction to copy 16 bytes. Other backends might just go with a series
* of Load/Stores. Therefore, x86 would like to keep the small CopyB nodes
* around whereas other backends would not.
* For medium-sized CopyBs, the x86 backend might generate a rep-prefixed mov
* instruction. Hence, it also wants to keep the CopyBs in these cases. Other
* backends might handle this differently.
* For large CopyBs, a call to memcpy is worth the call overhead, so large
* CopyBs should always be lowered to memcpy calls.
*
* The lowerer performs the following actions if the CopyB is
* - 'small': Replace it with a series of Loads/Stores
* - 'medium': Nothing.
* - 'large': Replace it with a call to memcpy.
*
* max_small_size and min_large_size allow for a flexible configuration.
* For example, one backend could specify max_small_size == 0 and
* min_large_size == 8192 to keep all CopyB nodes smaller than 8192 and get
* memcpy Calls for all others. Here, the set of small CopyBs is empty.
* Another backend could specify max_small_size == 63 and min_large_size == 64
* to lower all small CopyBs to Loads/Stores and all big CopyBs to memcpy.
* Hence, the set of medium-sized CopyBs is empty and this backend never
* sees a CopyB node at all.
* If memcpy is not available, min_large_size can be set to UINT_MAX to prevent
* the creation of calls to memcpy. Note that CopyBs whose size is UINT_MAX
* will still be lowered to memcpy calls because we check if the size is greater
* *or equal* to min_large_size. However, this should never occur in practice.
*
* @param irg The graph to be lowered.
* @param max_small_size The maximum number of bytes for a CopyB node so
......
......@@ -26,51 +26,13 @@ struct entry {
ir_node *copyb;
};
/**
* Every CopyB is assigned a size category as follows:
* - 'small' iff size <= max_small_size,
* - 'medium' iff max_small_size < size < min_large_size,
* - 'large' iff size >= min_large_size.
*
* The idea is that each backend can apply different optimizations in each
* of the three categories.
*
* For small CopyBs, the x86 backend could, e.g., emit a single SSE
* instruction to copy 16 bytes. Other backends might just go with a series
* of Load/Stores. Therefore, x86 would like to keep the small CopyB nodes
* around whereas other backends would not.
* For medium-sized CopyBs, the x86 backend might generate a rep-prefixed mov
* instruction. Hence, it also wants to keep the CopyBs in these cases. Other
* backends might handle this differently.
* For large CopyBs, a call to memcpy is worth the call overhead, so large
* CopyBs should always be lowered to memcpy calls.
*
* The lowerer performs the following actions if the CopyB is
* - 'small': Replace it with a series of Loads/Stores
* - 'medium': Nothing.
* - 'large': Replace it with a call to memcpy.
*
* max_small_size and min_large_size allow for a flexible configuration.
* For example, one backend could specify max_small_size == 0 and
* min_large_size == 8192 to keep all CopyB nodes smaller than 8192 and get
* memcpy Calls for all others. Here, the set of small CopyBs is empty.
* Another backend could specify max_small_size == 63 and min_large_size == 64
* to lower all small CopyBs to Loads/Stores and all big CopyBs to memcpy.
* Hence, the set of medium-sized CopyBs is empty and this backend never
* sees a CopyB node at all.
* If memcpy is not available, min_large_size can be set to UINT_MAX to prevent
* the creation of calls to memcpy. Note that CopyBs whose size is UINT_MAX
* will still be lowered to memcpy calls because we check if the size is greater
* *or equal* to min_large_size. However, this should never occur in practice.
*/
static unsigned max_small_size; /**< The maximum size of a CopyB node
so that it is regarded as 'small'. */
static unsigned min_large_size; /**< The minimum size of a CopyB node
so that it is regarded as 'large'. */
static unsigned native_mode_bytes; /**< The size of the native mode in bytes. */
static int allow_misalignments; /**< Whether backend can handle misaligned
loads and stores. */
static bool allow_misalignments; /**< Whether backend can handle misaligned
loads and stores. */
typedef struct walk_env {
struct obstack obst; /**< the obstack where data is allocated
......@@ -107,32 +69,26 @@ static void lower_small_copyb_node(ir_node *irn)
allow_misalignments ? native_mode_bytes : get_type_alignment_bytes(tp);
unsigned size = get_type_size_bytes(tp);
unsigned offset = 0;
ir_mode *mode;
while (offset < size) {
mode = get_ir_mode(mode_bytes);
ir_mode *mode = get_ir_mode(mode_bytes);
for (; offset + mode_bytes <= size; offset += mode_bytes) {
/* construct offset */
ir_node *addr_const;
ir_node *add;
ir_node *load;
ir_node *load_res;
ir_node *load_mem;
ir_node *store;
ir_node *store_mem;
addr_const = new_r_Const_long(irg, mode_Iu, offset);
add = new_r_Add(block, addr_src, addr_const, addr_mode);
ir_node *addr_const = new_r_Const_long(irg, mode_Iu, offset);
ir_node *add = new_r_Add(block, addr_src, addr_const,
addr_mode);
load = new_r_Load(block, mem, add, mode, cons_none);
load_res = new_r_Proj(load, mode, pn_Load_res);
load_mem = new_r_Proj(load, mode_M, pn_Load_M);
ir_node *load = new_r_Load(block, mem, add, mode, cons_none);
ir_node *load_res = new_r_Proj(load, mode, pn_Load_res);
ir_node *load_mem = new_r_Proj(load, mode_M, pn_Load_M);
addr_const = new_r_Const_long(irg, mode_Iu, offset);
add = new_r_Add(block, addr_dst, addr_const, addr_mode);
ir_node *addr_const2 = new_r_Const_long(irg, mode_Iu, offset);
ir_node *add2 = new_r_Add(block, addr_dst, addr_const2,
addr_mode);
store = new_r_Store(block, load_mem, add, load_res, cons_none);
store_mem = new_r_Proj(store, mode_M, pn_Store_M);
ir_node *store = new_r_Store(block, load_mem, add2, load_res,
cons_none);
ir_node *store_mem = new_r_Proj(store, mode_M, pn_Store_M);
mem = store_mem;
}
......@@ -184,15 +140,11 @@ static void lower_large_copyb_node(ir_node *irn)
ir_node *symconst = get_memcpy_symconst(irg);
ir_type *call_tp = get_memcpy_methodtype();
ir_mode *mode_size_t = get_ir_mode(native_mode_bytes);
ir_node *in[3];
ir_node *call;
ir_node *call_mem;
in[0] = addr_dst;
in[1] = addr_src;
in[2] = new_r_Const_long(irg, mode_size_t, size);
call = new_rd_Call(dbgi, block, mem, symconst, 3, in, call_tp);
call_mem = new_r_Proj(call, mode_M, pn_Call_M);
ir_node *size_cnst = new_r_Const_long(irg, mode_size_t, size);
ir_node *in[] = { addr_dst, addr_src, size_cnst };
ir_node *call = new_rd_Call(dbgi, block, mem, symconst,
ARRAY_SIZE(in), in, call_tp);
ir_node *call_mem = new_r_Proj(call, mode_M, pn_Call_M);
exchange(irn, call_mem);
}
......@@ -215,26 +167,21 @@ static void lower_copyb_node(ir_node *irn)
*/
static void find_copyb_nodes(ir_node *irn, void *ctx)
{
walk_env_t *env = (walk_env_t*)ctx;
ir_type *tp;
unsigned size;
entry_t *entry;
bool medium_sized;
if (! is_CopyB(irn))
if (!is_CopyB(irn))
return;
tp = get_CopyB_type(irn);
ir_type *tp = get_CopyB_type(irn);
if (get_type_state(tp) != layout_fixed)
return;
size = get_type_size_bytes(tp);
medium_sized = max_small_size < size && size < min_large_size;
unsigned size = get_type_size_bytes(tp);
bool medium_sized = max_small_size < size && size < min_large_size;
if (medium_sized)
return; /* Nothing to do for medium-sized CopyBs. */
/* Okay, either small or large CopyB, so link it in and lower it later. */
entry = OALLOC(&env->obst, entry_t);
walk_env_t *env = (walk_env_t*)ctx;
entry_t *entry = OALLOC(&env->obst, entry_t);
entry->copyb = irn;
INIT_LIST_HEAD(&entry->list);
set_irn_link(irn, entry);
......@@ -245,7 +192,6 @@ void lower_CopyB(ir_graph *irg, unsigned max_small_sz, unsigned min_large_sz,
int allow_misaligns)
{
const backend_params *bparams = be_get_backend_param();
walk_env_t env;
assert(max_small_sz < min_large_sz && "CopyB size ranges must not overlap");
......@@ -254,6 +200,7 @@ void lower_CopyB(ir_graph *irg, unsigned max_small_sz, unsigned min_large_sz,
native_mode_bytes = bparams->machine_size / 8;
allow_misalignments = allow_misaligns;
walk_env_t env;
obstack_init(&env.obst);
INIT_LIST_HEAD(&env.list);
irg_walk_graph(irg, NULL, find_copyb_nodes, &env);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment