Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
CES
Rodinia-SVM
Commits
ec5699f8
Commit
ec5699f8
authored
Jul 11, 2018
by
Marvin Damschen
Browse files
Device self-scheduling using device-side queueing
parent
285a847d
Changes
58
Show whitespace changes
Inline
Side-by-side
Rodinia-SVM/b+tree-svm/kernel/kernel_gpu_opencl.cl
View file @
ec5699f8
...
...
@@ -62,7 +62,7 @@ findK( long height,
__global
long
*currKnodeD,
__global
long
*offsetD,
__global
int
*keysD,
__global
record
*ansD
)
__global
record
*ansD
SELF_SCHEDULE_ARGS
)
{
//
private
thread
IDs
...
...
@@ -99,7 +99,7 @@ findK( long height,
ansD[bid].value
=
recordsD[knodesD[currKnodeD[bid]].indices[thid]].value
;
}
SCHEDULE_CHILD_1D
(
findK
(
height,
knodesD,
knodes_elem,
recordsD,
currKnodeD,
offsetD,
keysD,
ansD
SELF_SCHEDULE_CHILD_ARGS
))
}
//========================================================================================================================================================================================================200
...
...
Rodinia-SVM/b+tree-svm/kernel/kernel_gpu_opencl_2.cl
View file @
ec5699f8
...
...
@@ -55,7 +55,7 @@ findRangeK( long height,
__global
int
*startD,
__global
int
*endD,
__global
int
*RecstartD,
__global
int
*ReclenD
)
__global
int
*ReclenD
SELF_SCHEDULE_ARGS
)
{
//
private
thread
IDs
...
...
@@ -105,7 +105,7 @@ findRangeK( long height,
ReclenD[bid]
=
knodesD[lastKnodeD[bid]].indices[thid]
-
RecstartD[bid]+1
;
}
SCHEDULE_CHILD_1D
(
findRangeK
(
height,
knodesD,
knodes_elem,
currKnodeD,
offsetD,
lastKnodeD,
offset_2D,
startD,
endD,
RecstartD,
ReclenD
SELF_SCHEDULE_CHILD_ARGS
))
}
//========================================================================================================================================================================================================200
...
...
Rodinia-SVM/b+tree-svm/main.c
View file @
ec5699f8
...
...
@@ -1869,6 +1869,9 @@ main( int argc,
//====================================================================================================100
cl_context
context
=
clCreateContext
(
NULL
,
num
,
devices
,
NULL
,
NULL
,
&
error
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
printf
(
"WG size of kernel 1 = %d WG size of kernel 2 = %d
\n
"
,
DEFAULT_ORDER
,
DEFAULT_ORDER_2
);
...
...
@@ -2428,6 +2431,9 @@ main( int argc,
// ------------------------------------------------------------60
free
(
mem
);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
clReleaseContext
(
context
);
return
EXIT_SUCCESS
;
...
...
Rodinia-SVM/backprop-svm/backprop_kernel.cl
View file @
ec5699f8
...
...
@@ -18,7 +18,7 @@ bpnn_layerforward_ocl(__global float *input_cuda,
__local
float
*input_node,
__local
float
*weight_matrix,
int
in,
int
hid
)
int
hid
SELF_SCHEDULE_ARGS
)
{
int
by
=
get_group_id
(
1
)
;
...
...
@@ -61,8 +61,8 @@ bpnn_layerforward_ocl(__global float *input_cuda,
hidden_partial_sum[by
*
hid
+
ty]
=
weight_matrix[tx*
WIDTH
+
ty]
;
}
SCHEDULE_CHILD_WITH_LOCAL_PTRS_ND
(
^
(
local
void
*input_node,
local
void
*weight_matrix
)
{bpnn_layerforward_ocl
(
input_cuda,
output_hidden_cuda,
input_hidden_cuda,
hidden_partial_sum,
input_node,
weight_matrix,
in,
hid
SELF_SCHEDULE_CHILD_ARGS
)
;}\
COMMA
(
uint
)(
HEIGHT*sizeof
(
float
))
COMMA
(
uint
)(
HEIGHT*WIDTH*sizeof
(
float
)))
}
...
...
@@ -71,7 +71,7 @@ __kernel void bpnn_adjust_weights_ocl( __global float * delta,
__global
float
*
ly,
int
in,
__global
float
*
w,
__global
float
*
oldw
)
__global
float
*
oldw
SELF_SCHEDULE_ARGS
)
{
int
by
=
get_group_id
(
1
)
;
...
...
@@ -92,6 +92,6 @@ __kernel void bpnn_adjust_weights_ocl( __global float * delta,
oldw[index_x]
=
((
ETA
*
delta[index_x]
)
+
(
MOMENTUM
*
oldw[index_x]
))
;
}
SCHEDULE_CHILD_ND
(
bpnn_adjust_weights_ocl
(
delta,
hid,
ly,
in,
w,
oldw
SELF_SCHEDULE_CHILD_ARGS
))
}
#
endif
Rodinia-SVM/backprop-svm/backprop_ocl.cpp
View file @
ec5699f8
...
...
@@ -80,7 +80,13 @@ main( int argc, char** argv)
}
context
=
clCreateContext
(
NULL
,
num_devices
,
device_list
,
NULL
,
NULL
,
&
err
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
setup
(
context
,
argc
,
argv
);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
clReleaseContext
(
context
);
}
...
...
Rodinia-SVM/bfs-svm/Kernels.cl
View file @
ec5699f8
...
...
@@ -18,7 +18,7 @@ __kernel void BFS_1( const __global Node* g_graph_nodes,
__global
char*
g_updating_graph_mask,
__global
char*
g_graph_visited,
__global
int*
g_cost,
const
int
no_of_nodes
)
{
const
int
no_of_nodes
SELF_SCHEDULE_ARGS
)
{
int
tid
=
get_global_id
(
0
)
;
if
(
tid<no_of_nodes
&&
g_graph_mask[tid]
)
{
g_graph_mask[tid]=false
;
...
...
@@ -31,7 +31,7 @@ __kernel void BFS_1( const __global Node* g_graph_nodes,
}
}
SCHEDULE_CHILD_1D
(
BFS_1
(
g_graph_nodes,
g_graph_edges,
g_graph_mask,
g_updating_graph_mask,
g_graph_visited,
g_cost,
no_of_nodes
SELF_SCHEDULE_CHILD_ARGS
))
}
//--5
parameters
...
...
@@ -40,7 +40,7 @@ __kernel void BFS_2(__global char* g_graph_mask,
__global
char*
g_graph_visited,
__global
char*
g_over,
const
int
no_of_nodes
)
{
SELF_SCHEDULE_ARGS
)
{
int
tid
=
get_global_id
(
0
)
;
if
(
tid<no_of_nodes
&&
g_updating_graph_mask[tid]
)
{
...
...
@@ -50,7 +50,7 @@ __kernel void BFS_2(__global char* g_graph_mask,
g_updating_graph_mask[tid]=false
;
}
SCHEDULE_CHILD_1D
(
BFS_2
(
g_graph_mask,
g_updating_graph_mask,
g_graph_visited,
g_over,
no_of_nodes
SELF_SCHEDULE_CHILD_ARGS
))
}
Rodinia-SVM/bfs-svm/bfs.cpp
View file @
ec5699f8
...
...
@@ -198,6 +198,9 @@ int main(int argc, char * argv[])
}
cl_context
context
=
clCreateContext
(
NULL
,
num
,
devices
,
NULL
,
NULL
,
&
err
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
int
no_of_nodes
;
int
edge_list_size
;
...
...
@@ -304,6 +307,9 @@ int main(int argc, char * argv[])
clSVMFree
(
context
,
h_updating_graph_mask
);
clSVMFree
(
context
,
h_graph_visited
);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
}
catch
(
std
::
string
msg
){
...
...
Rodinia-SVM/cfd-svm/Kernels.cl
View file @
ec5699f8
...
...
@@ -38,11 +38,14 @@ typedef struct{
num_bytes:
the
number
of
bytes
all
together
@return:
through
mem_d
------------------------------------------------------------*/
__kernel
void
memset_kernel
(
__global
char
*
mem_d,
short
val,
int
ct
)
{
__kernel
void
memset_kernel
(
__global
char
*
mem_d,
short
val,
int
ct
SELF_SCHEDULE_ARGS
)
{
const
int
thread_id
=
get_global_id
(
0
)
;
if
(
thread_id
>=
ct
)
return
;
if
(
thread_id
>=
ct
)
goto
self_schedule
;
mem_d[thread_id]
=
val
;
self_schedule:
SCHEDULE_CHILD_1D
(
memset_kernel
(
mem_d,
val,
ct
SELF_SCHEDULE_CHILD_ARGS
))
return
;
}
//--cambine:
omit
&
...
...
@@ -83,22 +86,25 @@ inline void compute_flux_contribution(float density, FLOAT3 momentum, float dens
fc_density_energy->y
=
velocity.y*de_p
;
fc_density_energy->z
=
velocity.z*de_p
;
}
__kernel
void
initialize_variables
(
__global
float*
variables,
__constant
float*
ff_variable,
int
nelr
)
{
__kernel
void
initialize_variables
(
__global
float*
variables,
__constant
float*
ff_variable,
int
nelr
SELF_SCHEDULE_ARGS
)
{
//const
int
i
=
(
blockDim.x*blockIdx.x
+
threadIdx.x
)
;
const
int
i
=
get_global_id
(
0
)
;
if
(
i
>=
nelr
)
return
;
if
(
i
>=
nelr
)
goto
self_schedule
;
for
(
int
j
=
0
; j < NVAR; j++)
variables[i
+
j*nelr]
=
ff_variable[j]
;
self_schedule:
SCHEDULE_CHILD_1D
(
initialize_variables
(
variables,
ff_variable,
nelr
SELF_SCHEDULE_CHILD_ARGS
))
return
;
}
__kernel
void
compute_step_factor
(
__global
float*
variables,
__global
float*
areas,
__global
float*
step_factors,
int
nelr
)
{
int
nelr
SELF_SCHEDULE_ARGS
)
{
//const
int
i
=
(
blockDim.x*blockIdx.x
+
threadIdx.x
)
;
const
int
i
=
get_global_id
(
0
)
;
if
(
i
>=
nelr
)
return
;
if
(
i
>=
nelr
)
goto
self_schedule
;
float
density
=
variables[i
+
VAR_DENSITY*nelr]
;
FLOAT3
momentum
;
...
...
@@ -119,6 +125,9 @@ __kernel void compute_step_factor(__global float* variables,
//step_factors[i]
=
(
float
)(
0.5f
)
/
(
sqrtf
(
areas[i]
)
*
(
sqrtf
(
speed_sqd
)
+
speed_of_sound
))
;
step_factors[i]
=
(
float
)(
0.5f
)
/
(
sqrt
(
areas[i]
)
*
(
sqrt
(
speed_sqd
)
+
speed_of_sound
))
;
self_schedule:
SCHEDULE_CHILD_1D
(
compute_step_factor
(
variables,
areas,
step_factors,
nelr
SELF_SCHEDULE_CHILD_ARGS
))
return
;
}
__kernel
void
compute_flux
(
...
...
@@ -131,11 +140,11 @@ __kernel void compute_flux(
__constant
FLOAT3*
ff_flux_contribution_momentum_x,
__constant
FLOAT3*
ff_flux_contribution_momentum_y,
__constant
FLOAT3*
ff_flux_contribution_momentum_z,
int
nelr
)
{
int
nelr
SELF_SCHEDULE_ARGS
)
{
const
float
smoothing_coefficient
=
(
float
)(
0.2f
)
;
//const
int
i
=
(
blockDim.x*blockIdx.x
+
threadIdx.x
)
;
const
int
i
=
get_global_id
(
0
)
;
if
(
i
>=
nelr
)
return
;
if
(
i
>=
nelr
)
goto
self_schedule
;
int
j,
nb
;
FLOAT3
normal
; float normal_len;
float
factor
;
...
...
@@ -266,7 +275,7 @@ __kernel void compute_flux(
fluxes[i
+
VAR_DENSITY_ENERGY*nelr]
=
flux_i_density_energy
;
self_schedule:
SCHEDULE_CHILD_1D
(
compute_flux
(
elements_surrounding_elements,
normals,
variables,
ff_variable,
fluxes,
ff_flux_contribution_density_energy,
ff_flux_contribution_momentum_x,
ff_flux_contribution_momentum_y,
ff_flux_contribution_momentum_z,
nelr
SELF_SCHEDULE_CHILD_ARGS
))
return
;
}
...
...
@@ -274,10 +283,10 @@ __kernel void time_step(int j, int nelr,
__global
float*
old_variables,
__global
float*
variables,
__global
float*
step_factors,
__global
float*
fluxes
)
{
__global
float*
fluxes
SELF_SCHEDULE_ARGS
)
{
//const
int
i
=
(
blockDim.x*blockIdx.x
+
threadIdx.x
)
;
const
int
i
=
get_global_id
(
0
)
;
if
(
i
>=
nelr
)
return
;
if
(
i
>=
nelr
)
goto
self_schedule
;
float
factor
=
step_factors[i]/
(
float
)(
RK+1-j
)
;
...
...
@@ -287,6 +296,9 @@ __kernel void time_step(int j, int nelr,
variables[i
+
(
VAR_MOMENTUM+1
)
*nelr]
=
old_variables[i
+
(
VAR_MOMENTUM+1
)
*nelr]
+
factor*fluxes[i
+
(
VAR_MOMENTUM+1
)
*nelr]
;
variables[i
+
(
VAR_MOMENTUM+2
)
*nelr]
=
old_variables[i
+
(
VAR_MOMENTUM+2
)
*nelr]
+
factor*fluxes[i
+
(
VAR_MOMENTUM+2
)
*nelr]
;
self_schedule:
SCHEDULE_CHILD_1D
(
time_step
(
j,
nelr,
old_variables,
variables,
step_factors,
fluxes
SELF_SCHEDULE_CHILD_ARGS
))
return
;
}
#
endif
Rodinia-SVM/cfd-svm/euler3d.cpp
View file @
ec5699f8
...
...
@@ -246,6 +246,9 @@ int main(int argc, char** argv){
printf
(
"Error: Failed to create context (%d)!
\n
"
,
err
);
exit
(
1
);
}
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
int
iCPU
,
iGPU
;
getCPUGPUIds
(
&
iCPU
,
&
iGPU
,
devices
,
num
);
...
...
@@ -413,6 +416,9 @@ int main(int argc, char** argv){
std
::
cout
<<
"Saved solution..."
<<
std
::
endl
;
std
::
cout
<<
"Cleaning up..."
<<
std
::
endl
;
// TODO
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
std
::
cout
<<
"Done..."
<<
std
::
endl
;
clSVMFree
(
context
,
h_ff_variable
);
clSVMFree
(
context
,
h_ff_flux_contribution_momentum_x
);
...
...
Rodinia-SVM/dwt2d-svm/com_dwt.cl
View file @
ec5699f8
...
...
@@ -57,7 +57,7 @@ __kernel void c_CopySrcToComponents (__global int *d_r,
__global
int
*d_g,
__global
int
*d_b,
__global
unsigned
char
*
cl_d_src,
int
pixels
)
int
pixels
SELF_SCHEDULE_ARGS
)
{
int
x
=
get_local_id
(
0
)
;
int
gX=
get_local_size
(
0
)
*
get_group_id
(
0
)
;
...
...
@@ -85,14 +85,14 @@ __kernel void c_CopySrcToComponents (__global int *d_r,
storeComponents
(
d_r,
d_g,
d_b,
r,
g,
b,
globalOutputPosition
)
;
}
SCHEDULE_CHILD_1D
(
c_CopySrcToComponents
(
d_r,
d_g,
d_b,
cl_d_src,
pixels
SELF_SCHEDULE_CHILD_ARGS
))
}
//
Copy
img
src
data
into
three
separated
component
buffers
__kernel
void
c_CopySrcToComponent
(
__global
int
*d_c,
__global
unsigned
char
*
cl_d_src,
int
pixels
)
int
pixels
SELF_SCHEDULE_ARGS
)
{
int
x
=
get_local_id
(
0
)
;
int
gX
=
get_local_size
(
0
)
*
get_group_id
(
0
)
;
...
...
@@ -113,7 +113,7 @@ __kernel void c_CopySrcToComponent (__global int *d_c,
storeComponent
(
d_c,
c,
globalOutputPosition
)
;
}
SCHEDULE_CHILD_1D
(
c_CopySrcToComponent
(
d_c,
cl_d_src,
pixels
SELF_SCHEDULE_CHILD_ARGS
))
}
...
...
@@ -662,7 +662,7 @@ __kernel void cl_fdwt53Kernel(__global const int * const in,
const
int
sy,
const
int
steps,
int
WIN_SIZE_X,
int
WIN_SIZE_Y
)
int
WIN_SIZE_Y
SELF_SCHEDULE_ARGS
)
{
__local
struct
FDWT53
fdwt53
;
fdwt53.WIN_SIZE_X
=
WIN_SIZE_X
;
...
...
@@ -708,5 +708,5 @@ __kernel void cl_fdwt53Kernel(__global const int * const in,
transform
(
&fdwt53,
false,
false,
in,
out,
sx,
sy,
steps
)
;
}
SCHEDULE_CHILD_ND
(
cl_fdwt53Kernel
(
in,
out,
sx,
sy,
steps,
WIN_SIZE_X,
WIN_SIZE_Y
SELF_SCHEDULE_CHILD_ARGS
))
}
Rodinia-SVM/dwt2d-svm/main.cpp
View file @
ec5699f8
...
...
@@ -769,6 +769,9 @@ int main(int argc, char **argv)
}
context
=
clCreateContext
(
NULL
,
num
,
devices
,
NULL
,
NULL
,
&
errNum
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
if
(
errNum
!=
CL_SUCCESS
)
{
std
::
cerr
<<
"Failed to create OpenCL context."
<<
std
::
endl
;
...
...
@@ -865,6 +868,9 @@ int main(int argc, char **argv)
clReleaseKernel
(
c_CopySrcToComponents
);
clReleaseKernel
(
c_CopySrcToComponent
);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
clSVMFree
(
context
,
d
->
srcImg
);
return
0
;
...
...
Rodinia-SVM/gaussian-svm/gaussianElim.cpp
View file @
ec5699f8
...
...
@@ -103,6 +103,9 @@ int main(int argc, char *argv[]) {
}
context
=
clCreateContext
(
NULL
,
num
,
devices
,
NULL
,
NULL
,
&
err
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
if
(
size
<
1
)
{
...
...
@@ -176,6 +179,9 @@ int main(int argc, char *argv[]) {
clSVMFree
(
context
,
a
);
clSVMFree
(
context
,
b
);
free
(
finalVec
);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
cl_cleanup
();
//OpenClGaussianElimination(context,timing);
...
...
Rodinia-SVM/gaussian-svm/gaussianElim_kernels.cl
View file @
ec5699f8
...
...
@@ -11,14 +11,14 @@ __kernel void Fan1(__global float *m_dev,
__global
float
*a_dev,
__global
float
*b_dev,
const
int
size,
const
int
t
)
{
const
int
t
SELF_SCHEDULE_ARGS
)
{
int
globalId
=
get_global_id
(
0
)
;
if
(
globalId
<
size-1-t
)
{
*
(
m_dev
+
size
*
(
globalId
+
t
+
1
)
+t
)
=
*
(
a_dev
+
size
*
(
globalId
+
t
+
1
)
+
t
)
/
*
(
a_dev
+
size
*
t
+
t
)
;
}
SCHEDULE_CHILD_1D
(
Fan1
(
m_dev,
a_dev,
b_dev,
size,
t
SELF_SCHEDULE_CHILD_ARGS
))
}
...
...
@@ -26,7 +26,7 @@ __kernel void Fan2(__global float *m_dev,
__global
float
*a_dev,
__global
float
*b_dev,
const
int
size,
const
int
t
)
{
const
int
t
SELF_SCHEDULE_ARGS
)
{
int
globalId
=
get_global_id
(
0
)
;
int
globalIdx
=
get_global_id
(
0
)
;
...
...
@@ -49,5 +49,5 @@ __kernel void Fan2(__global float *m_dev,
//
b_dev[globalIdx+1+t]
-=
m_dev[size*
(
globalIdx+1+t
)
+
(
globalIdy+t
)
]
*
b_dev[t]
;
//
}
SCHEDULE_CHILD_ND
(
Fan2
(
m_dev,
a_dev,
b_dev,
size,
t
SELF_SCHEDULE_CHILD_ARGS
))
}
Rodinia-SVM/heartwall-svm/kernel/kernel_gpu_opencl.cl
View file @
ec5699f8
...
...
@@ -69,7 +69,7 @@ kernel_gpu_opencl( // structures
__global
fp*
in_sqr_final_sum_all,
//
31
OUTPUT
common.allPoints
__global
fp*
denomT_all,
//
32
OUTPUT
common.allPoints
__global
fp*
checksum
)
//
33
OUTPUT
100
__global
fp*
checksum
SELF_SCHEDULE_ARGS
)
//
33
OUTPUT
100
{
...
...
@@ -2229,6 +2229,9 @@ kernel_gpu_opencl( // structures
//
End
//======================================================================================================================================================150
SCHEDULE_CHILD_1D
(
kernel_gpu_opencl
(
d_common,
d_frame,
d_frame_no,
d_endoRow,
d_endoCol,
d_tEndoRowLoc,
d_tEndoColLoc,
d_epiRow,
d_epiCol,
d_tEpiRowLoc,
d_tEpiColLoc,
d_endoT,
d_epiT,
d_in2_all,
d_conv_all,
d_in2_pad_cumv_all,
d_in2_pad_cumv_sel_all,
d_in2_sub_cumh_all,
d_in2_sub_cumh_sel_all,
d_in2_sub2_all,
d_in2_sqr_all,
d_in2_sqr_sub2_all,
d_in_sqr_all,
d_tMask_all,
d_mask_conv_all,
d_in_mod_temp_all,
in_partial_sum_all,
in_sqr_partial_sum_all,
par_max_val_all,
par_max_coo_all,
in_final_sum_all,
in_sqr_final_sum_all,
denomT_all,
checksum
SELF_SCHEDULE_CHILD_ARGS
))
}
//========================================================================================================================================================================================================200
...
...
Rodinia-SVM/heartwall-svm/main.c
View file @
ec5699f8
...
...
@@ -83,6 +83,9 @@ main( int argc,
}
cl_context
context
=
clCreateContext
(
NULL
,
num
,
devices
,
NULL
,
NULL
,
&
err
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
//======================================================================================================================================================150
...
...
@@ -269,6 +272,9 @@ main( int argc,
// End
//====================================================================================================100
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
err
=
clReleaseContext
(
context
);
if
(
err
!=
CL_SUCCESS
)
fatal_CL
(
err
,
__LINE__
);
...
...
Rodinia-SVM/hotspot-svm/hotspot.c
View file @
ec5699f8
...
...
@@ -158,6 +158,9 @@ int main(int argc, char** argv) {
}
context
=
clCreateContext
(
NULL
,
num
,
devices
,
NULL
,
NULL
,
&
err
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
int
iCPU
,
iGPU
;
getCPUGPUIds
(
&
iCPU
,
&
iGPU
,
devices
,
num
);
...
...
@@ -245,6 +248,9 @@ int main(int argc, char** argv) {
clSVMFree
(
context
,
MatrixTemp
[
1
]);
clSVMFree
(
context
,
MatrixPower
);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
clReleaseContext
(
context
);
...
...
Rodinia-SVM/hotspot-svm/hotspot_kernel.cl
View file @
ec5699f8
...
...
@@ -13,7 +13,7 @@ __kernel void hotspot( int iteration, //number of iteration
float
Rx,
float
Ry,
float
Rz,
float
step
)
{
float
step
SELF_SCHEDULE_ARGS
)
{
local
float
temp_on_cuda[BLOCK_SIZE][BLOCK_SIZE]
;
local
float
power_on_cuda[BLOCK_SIZE][BLOCK_SIZE]
;
...
...
@@ -114,5 +114,5 @@ __kernel void hotspot( int iteration, //number of iteration
temp_dst[index]=
temp_t[ty][tx]
;
}
SCHEDULE_CHILD_ND
(
hotspot
(
iteration,
power,
temp_src,
temp_dst,
grid_cols,
grid_rows,
border_cols,
border_rows,
Cap,
Rx,
Ry,
Rz,
step
SELF_SCHEDULE_CHILD_ARGS
))
}
Rodinia-SVM/hotspot3D-svm/3D.c
View file @
ec5699f8
...
...
@@ -185,6 +185,9 @@ int main(int argc, char** argv)
}
context
=
clCreateContext
(
NULL
,
num
,
devices
,
NULL
,
NULL
,
&
err
);
#ifdef SELF_SCHEDULE
initOnDeviceCommandQueues
(
context
);
#endif
int
iCPU
,
iGPU
;
getCPUGPUIds
(
&
iCPU
,
&
iGPU
,
devices
,
num
);
...
...
@@ -310,6 +313,9 @@ int main(int argc, char** argv)
clReleaseKernel
(
ko_vadd
);
clReleaseCommandQueue
(
commandsCPU
);
clReleaseCommandQueue
(
commandsGPU
);
#ifdef SELF_SCHEDULE
releaseOnDeviceCommandQueues
(
context
);
#endif
clReleaseContext
(
context
);
return
0
;
...
...
Rodinia-SVM/hotspot3D-svm/hotspotKernel.cl
View file @
ec5699f8
...
...
@@ -5,7 +5,7 @@ __kernel void hotspotOpt1(__global float *p, __global float* tIn, __global float
float
ce,
float
cw,
float
cn,
float
cs,
float
ct,
float
cb,
float
cc
)
float
cc
SELF_SCHEDULE_ARGS
)
{
float
amb_temp
=
80.0
;
...
...
@@ -47,7 +47,7 @@ __kernel void hotspotOpt1(__global float *p, __global float* tIn, __global float
tOut[c]
=
cc
*
temp2
+
cw
*
tIn[W]
+
ce
*
tIn[E]
+
cs
*
tIn[S]
+
cn
*
tIn[N]
+
cb
*
temp1
+
ct
*
temp3
+
sdc
*
p[c]
+
ct
*
amb_temp
;
SCHEDULE_CHILD_ND
(
hotspotOpt1
(
p,
tIn,
tOut,
sdc,
nx,
ny,
nz,
ce,
cw,
cn,
cs,
ct,
cb,
cc
SELF_SCHEDULE_CHILD_ARGS
))
}
Rodinia-SVM/hybridsort-svm/Makefile
View file @
ec5699f8
CC
=
gcc
CC_FLAGS
=
-std
=
gnu11
OPENCL_INC
=
/opt/intel/opencl/include
OPENCL_LIB
=
/opt/intel/opencl
include
../../common/make.config
ifdef
VERIFY
override
VERIFY
=
-DVERIFY
...
...
@@ -12,10 +11,6 @@ ifdef DEBUG
override
DEBUG
=
-DDEBUG
endif
ifdef
SELF_SCHEDULE
override
SELF_SCHEDULE
=
-DSELF_SCHEDULE
endif
ifdef
OUTPUT
override
OUTPUT
=
-DOUTPUT
endif
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment