Commit 2bae44dd authored by Marvin Damschen's avatar Marvin Damschen
Browse files

Device self-scheduling using atomic counting

parent ec5699f8
......@@ -1870,7 +1870,7 @@ main( int argc,
cl_context context = clCreateContext(NULL, num, devices, NULL, NULL, &error);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
......@@ -2432,7 +2432,7 @@ main( int argc,
free(mem);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
clReleaseContext(context);
return EXIT_SUCCESS;
......
......@@ -81,11 +81,11 @@ main( int argc, char** argv)
context = clCreateContext(NULL, num_devices, device_list, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
setup(context, argc, argv);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
clReleaseContext(context);
}
......
......@@ -199,7 +199,7 @@ int main(int argc, char * argv[])
cl_context context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int no_of_nodes;
......@@ -308,7 +308,7 @@ int main(int argc, char * argv[])
clSVMFree(context, h_graph_visited);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
}
......
......@@ -247,7 +247,7 @@ int main(int argc, char** argv){
exit(1);
}
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......@@ -417,7 +417,7 @@ int main(int argc, char** argv){
std::cout << "Cleaning up..." << std::endl;
// TODO
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
std::cout << "Done..." << std::endl;
clSVMFree(context, h_ff_variable);
......
......@@ -331,7 +331,7 @@ void horizontalStep (__local struct TransformBuffer *buffer,
const int prevOffset,
const int midOffset,
const int nextOffset,
int flag)
int flag, const uint done_local, global_work_state *__globalWorkState)
{
const int STEPS = count / buffer->SIZE_X;
const int finalCount = count % buffer->SIZE_X;
......@@ -370,14 +370,14 @@ void horizontalStep (__local struct TransformBuffer *buffer,
void forEachHorizontalOdd(__local struct TransformBuffer *buffer,
const int firstLine,
const int numLines,
int flag)
int flag, const uint done_local, global_work_state *__globalWorkState)
{
const int count = numLines * buffer->VERTICAL_STRIDE - 1 ;
const int prevOffset = firstLine * buffer->VERTICAL_STRIDE ;
const int centerOffset = prevOffset + buffer->ODD_OFFSET ;
const int nextOffset = prevOffset + 1;
horizontalStep (buffer, count, prevOffset, centerOffset, nextOffset, flag);
horizontalStep (buffer, count, prevOffset, centerOffset, nextOffset, flag, done_local, __globalWorkState);
}
......@@ -385,14 +385,14 @@ void forEachHorizontalOdd(__local struct TransformBuffer *buffer,
void forEachHorizontalEven(__local struct TransformBuffer *buffer,
const int firstLine,
const int numLines,
int flag)
int flag, const uint done_local, global_work_state *__globalWorkState)
{
const int count = numLines * buffer->VERTICAL_STRIDE - 1 ;
const int centerOffset = firstLine * buffer->VERTICAL_STRIDE + 1;
const int prevOffset = firstLine * buffer->VERTICAL_STRIDE + buffer->ODD_OFFSET;
const int nextOffset = prevOffset + 1;
horizontalStep (buffer, count, prevOffset, centerOffset, nextOffset, flag);
horizontalStep (buffer, count, prevOffset, centerOffset, nextOffset, flag, done_local, __globalWorkState);
}
......@@ -502,7 +502,7 @@ void initColumn(__local struct FDWT53 * fdwt53,
const int sizeY,
const int colIndex,
const int firstY,
struct VerticalDWTPixelIO *pIO)
struct VerticalDWTPixelIO *pIO, const uint done_local, global_work_state *__globalWorkState)
{
column->CHECKED_LOADER = CHECKED;
column->offset = getColumnOffset(colIndex, &fdwt53->buffer);
......@@ -566,7 +566,7 @@ void transform(__local struct FDWT53 *fdwt53,
__global int * out,
const int sizeX,
const int sizeY,
const int winSteps)
const int winSteps, const uint done_local, global_work_state *__globalWorkState)
{
// info about one main and one boundary columns processed by this thread
struct FDWT53Column column; column.CHECKED_LOADER = CHECK_LOADS;
......@@ -577,7 +577,7 @@ void transform(__local struct FDWT53 *fdwt53,
// Initialize all column info: initialize loaders, compute offset of
// column in shared buffer and initialize loader of column.
const int firstY = get_group_id(1) * fdwt53->WIN_SIZE_Y * winSteps;
initColumn(fdwt53, &column, CHECK_LOADS, in, sizeX, sizeY, get_local_id(0), firstY, &pIO);
initColumn(fdwt53, &column, CHECK_LOADS, in, sizeX, sizeY, get_local_id(0), firstY, &pIO, done_local, __globalWorkState);
// first 3 threads initialize boundary columns, others do not use them
......@@ -587,7 +587,7 @@ void transform(__local struct FDWT53 *fdwt53,
const int colId = get_local_id(0) + ((get_local_id(0)== 0) ? fdwt53->WIN_SIZE_X : -3);
// initialize the column
initColumn (fdwt53, &boundaryColumn, CHECK_LOADS, in, sizeX, sizeY, colId, firstY, &pIO_b);
initColumn (fdwt53, &boundaryColumn, CHECK_LOADS, in, sizeX, sizeY, colId, firstY, &pIO_b, done_local, __globalWorkState);
}
// index of column which will be written into output by this thread
......@@ -620,11 +620,11 @@ void transform(__local struct FDWT53 *fdwt53,
int flag = 0; //flag = 0 execute Forward53Predict, flag = 1 execute Forward53Update
forEachHorizontalOdd(&(fdwt53->buffer), 2, fdwt53->WIN_SIZE_Y, flag);
forEachHorizontalOdd(&(fdwt53->buffer), 2, fdwt53->WIN_SIZE_Y, flag, done_local, __globalWorkState);
barrier(CLK_LOCAL_MEM_FENCE);
flag = 1;
forEachHorizontalEven(&(fdwt53->buffer), 2, fdwt53->WIN_SIZE_Y, flag);
forEachHorizontalEven(&(fdwt53->buffer), 2, fdwt53->WIN_SIZE_Y, flag, done_local, __globalWorkState);
barrier(CLK_LOCAL_MEM_FENCE);
......@@ -695,17 +695,17 @@ __kernel void cl_fdwt53Kernel(__global const int * const in,
if(atBottomBoudary)
{
// near bottom boundary => check both writing and reading
transform(&fdwt53, true, true, in, out, sx, sy, steps);
transform(&fdwt53, true, true, in, out, sx, sy, steps, done_local, __globalWorkState);
}
else if(atRightBoudary)
{
// near right boundary only => check writing only
transform(&fdwt53, false, true, in, out, sx, sy, steps);
transform(&fdwt53, false, true, in, out, sx, sy, steps, done_local, __globalWorkState);
}
else
{
// no nearby boundary => check nothing
transform(&fdwt53, false, false, in, out, sx, sy, steps);
transform(&fdwt53, false, false, in, out, sx, sy, steps, done_local, __globalWorkState);
}
SCHEDULE_CHILD_ND(cl_fdwt53Kernel(in, out, sx, sy, steps, WIN_SIZE_X, WIN_SIZE_Y SELF_SCHEDULE_CHILD_ARGS))
......
......@@ -770,7 +770,7 @@ int main(int argc, char **argv)
context = clCreateContext(NULL, num, devices, NULL, NULL, &errNum);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
if (errNum != CL_SUCCESS)
{
......@@ -869,7 +869,7 @@ int main(int argc, char **argv)
clReleaseKernel(c_CopySrcToComponents);
clReleaseKernel(c_CopySrcToComponent);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
clSVMFree(context, d->srcImg);
......
......@@ -104,7 +104,7 @@ int main(int argc, char *argv[]) {
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
if(size < 1)
......@@ -180,7 +180,7 @@ int main(int argc, char *argv[]) {
clSVMFree(context, b);
free(finalVec);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
cl_cleanup();
//OpenClGaussianElimination(context,timing);
......
......@@ -84,7 +84,7 @@ main( int argc,
cl_context context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
......@@ -273,7 +273,7 @@ main( int argc,
//====================================================================================================100
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
err = clReleaseContext(context);
if (err != CL_SUCCESS)
......
......@@ -159,7 +159,7 @@ int main(int argc, char** argv) {
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......@@ -249,7 +249,7 @@ int main(int argc, char** argv) {
clSVMFree(context, MatrixPower);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
clReleaseContext(context);
......
......@@ -186,7 +186,7 @@ int main(int argc, char** argv)
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......@@ -314,7 +314,7 @@ int main(int argc, char** argv)
clReleaseCommandQueue(commandsCPU);
clReleaseCommandQueue(commandsGPU);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
clReleaseContext(context);
......
......@@ -51,7 +51,7 @@ int main(int argc, char** argv)
cl_context context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
// Fill our data set with random float values
......@@ -212,7 +212,7 @@ int main(int argc, char** argv)
clSVMFree(context, nullElements);
clSVMFree(context, origOffsets);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
clReleaseContext(context);
......
......@@ -73,7 +73,7 @@ static int initialize()
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......@@ -92,7 +92,7 @@ static int shutdown()
if( commandsCPU ) clReleaseCommandQueue( commandsCPU );
if( commandsGPU ) clReleaseCommandQueue( commandsGPU );
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
if( context ) clReleaseContext( context );
if( devices ) delete devices;
......
......@@ -118,7 +118,7 @@ main( int argc,
cl_context context = clCreateContext(NULL, num, devices, NULL, NULL, &error);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
printf("WG size of kernel = %d \n", NUMBER_THREADS);
......@@ -370,7 +370,7 @@ main( int argc,
// RETURN
//======================================================================================================================================================150
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
return 0.0; // always returns 0.0
......
......@@ -156,7 +156,7 @@ void select_device() {
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......
......@@ -135,7 +135,7 @@ static int initialize()
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......@@ -154,7 +154,7 @@ static int shutdown()
if( commandsCPU ) clReleaseCommandQueue( commandsCPU );
if( commandsGPU ) clReleaseCommandQueue( commandsGPU );
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
if( context ) clReleaseContext( context );
if( devices ) delete devices;
......
......@@ -208,7 +208,7 @@ main( int argc,
cl_context context = clCreateContext(NULL, num, devices, NULL, NULL, &error);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
//======================================================================================================================================================150
......@@ -349,7 +349,7 @@ main( int argc,
free(com);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
//====================================================================================================100
......
......@@ -42,7 +42,7 @@ int main(int argc, char *argv[]) {
context = clCreateContext(NULL, num, devices, NULL, NULL, &error);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int numRecords = loadData(filename,records,locations);
......@@ -72,7 +72,7 @@ int main(int argc, char *argv[]) {
clSVMFree(context, records);
clSVMFree(context, locations);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
return 0;
}
......
......@@ -76,7 +76,7 @@ static int initialize()
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......@@ -95,7 +95,7 @@ static int shutdown()
if( commandsCPU ) clReleaseCommandQueue( commandsCPU );
if( commandsGPU ) clReleaseCommandQueue( commandsGPU );
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
if( context ) clReleaseContext( context );
if( devices ) delete devices;
......
......@@ -85,7 +85,7 @@ static int initialize()
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
threads_per_block = 128;
......@@ -109,7 +109,7 @@ static int shutdown()
if( commandsCPU ) clReleaseCommandQueue( commandsCPU );
if( commandsGPU ) clReleaseCommandQueue( commandsGPU );
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
if( context ) clReleaseContext( context );
if( devices ) delete devices;
......
......@@ -89,7 +89,7 @@ static int initialize() {
context = clCreateContext(NULL, num, devices, NULL, NULL, &err);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues(context);
initSelfScheduling(context);
#endif
int iCPU, iGPU;
......@@ -108,7 +108,7 @@ static int shutdown()
if( commandsCPU ) clReleaseCommandQueue( commandsCPU );
if( commandsGPU ) clReleaseCommandQueue( commandsGPU );
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues(context);
cleanupSelfScheduling(context);
#endif
if( context ) clReleaseContext( context );
if( devices ) delete devices;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment