Commit 8d859168 authored by Tilman Steinweg's avatar Tilman Steinweg

optimized allocation and exchange between PEs

For the exchange of the wavefields a wrapping layer of FDORDER/2 is needed.
Nevertheless a wrapping layer of FDORDER/2 +1 was implemented because of undetected errors in the exchange functions.
parent f2a67970
...@@ -44,7 +44,7 @@ void model_acoustic(float **rho, float **pi){ ...@@ -44,7 +44,7 @@ void model_acoustic(float **rho, float **pi){
float *fldepth, *flrho, *flvp, *fltaper; float *fldepth, *flrho, *flvp, *fltaper;
nd = FDORDER/2 + 1; nd = FDORDER/2;
/*read FL nodes from File*/ /*read FL nodes from File*/
nodes=5; nodes=5;
......
...@@ -39,7 +39,7 @@ MPI_Request *req_send, MPI_Request *req_rec){ ...@@ -39,7 +39,7 @@ MPI_Request *req_send, MPI_Request *req_rec){
/* comunication initialisation for persistent communication */ /* comunication initialisation for persistent communication */
fdo2 = 2*(FDORDER/2 + 1); fdo2 = 2*(FDORDER/2);
/* buffer arrays are copied into local buffers using buffered send (bsend), /* buffer arrays are copied into local buffers using buffered send (bsend),
......
...@@ -78,7 +78,7 @@ int main(int argc, char **argv){ ...@@ -78,7 +78,7 @@ int main(int argc, char **argv){
/*int ** tracekill=NULL, TRKILL, DTRKILL;*/ /*int ** tracekill=NULL, TRKILL, DTRKILL;*/
int * DTINV_help; int * DTINV_help;
float ** bufferlef_to_rig, ** bufferrig_to_lef, ** buffertop_to_bot, ** bufferbot_to_top; float ** bufferlef_to_rig, ** bufferrig_to_lef, ** buffertop_to_bot, ** bufferbot_to_top, ** p_bufferlef_to_rig, ** p_bufferrig_to_lef, ** p_buffertop_to_bot, ** p_bufferbot_to_top;
/* PML variables */ /* PML variables */
float * d_x, * K_x, * alpha_prime_x, * a_x, * b_x, * d_x_half, * K_x_half, * alpha_prime_x_half, * a_x_half, * b_x_half, * d_y, * K_y, * alpha_prime_y, * a_y, * b_y, * d_y_half, * K_y_half, * alpha_prime_y_half, * a_y_half, * b_y_half; float * d_x, * K_x, * alpha_prime_x, * a_x, * b_x, * d_x_half, * K_x_half, * alpha_prime_x_half, * a_x_half, * b_x_half, * d_y, * K_y, * alpha_prime_y, * a_y, * b_y, * d_y_half, * K_y_half, * alpha_prime_y_half, * a_y_half, * b_y_half;
...@@ -251,7 +251,7 @@ int main(int argc, char **argv){ ...@@ -251,7 +251,7 @@ int main(int argc, char **argv){
fac1=(NX+FDORDER)*(NY+FDORDER); fac1=(NX+FDORDER)*(NY+FDORDER);
fac2=sizeof(float)*pow(2.0,-20.0); fac2=sizeof(float)*pow(2.0,-20.0);
nd = FDORDER/2 + 1; nd = FDORDER/2;
fdo3 = 2*nd; fdo3 = 2*nd;
if (L){ if (L){
...@@ -521,6 +521,15 @@ int main(int argc, char **argv){ ...@@ -521,6 +521,15 @@ int main(int argc, char **argv){
bufferrig_to_lef = matrix(1,NY,1,fdo3); bufferrig_to_lef = matrix(1,NY,1,fdo3);
buffertop_to_bot = matrix(1,NX,1,fdo3); buffertop_to_bot = matrix(1,NX,1,fdo3);
bufferbot_to_top = matrix(1,NX,1,fdo3); bufferbot_to_top = matrix(1,NX,1,fdo3);
/* In exchange_p for acousic simulation only one variable (sp) will be exchanged,
therfore not fdo3=2*nd but 1*nd is enough buffer*/
if (ACOUSTIC) {
p_bufferlef_to_rig = matrix(1,NY,1,nd);
p_bufferrig_to_lef = matrix(1,NY,1,nd);
p_buffertop_to_bot = matrix(1,NX,1,nd);
p_bufferbot_to_top = matrix(1,NX,1,nd);
}
switch (SEISMO){ switch (SEISMO){
case 1 : /* particle velocities only */ case 1 : /* particle velocities only */
...@@ -1041,7 +1050,7 @@ int main(int argc, char **argv){ ...@@ -1041,7 +1050,7 @@ int main(int argc, char **argv){
if(!ACOUSTIC) if(!ACOUSTIC)
exchange_s(psxx,psyy,psxy,bufferlef_to_rig, bufferrig_to_lef,buffertop_to_bot, bufferbot_to_top,req_send, req_rec); exchange_s(psxx,psyy,psxy,bufferlef_to_rig, bufferrig_to_lef,buffertop_to_bot, bufferbot_to_top,req_send, req_rec);
else else
exchange_p(psp,bufferlef_to_rig, bufferrig_to_lef,buffertop_to_bot, bufferbot_to_top,req_send, req_rec); exchange_p(psp,p_bufferlef_to_rig, p_bufferrig_to_lef,p_buffertop_to_bot,p_bufferbot_to_top,req_send, req_rec);
if (MYID==0){ if (MYID==0){
...@@ -1485,7 +1494,7 @@ int main(int argc, char **argv){ ...@@ -1485,7 +1494,7 @@ int main(int argc, char **argv){
if(!ACOUSTIC) if(!ACOUSTIC)
exchange_s(psxx,psyy,psxy, bufferlef_to_rig, bufferrig_to_lef, buffertop_to_bot, bufferbot_to_top, req_send, req_rec); exchange_s(psxx,psyy,psxy, bufferlef_to_rig, bufferrig_to_lef, buffertop_to_bot, bufferbot_to_top, req_send, req_rec);
else else
exchange_p(psp,bufferlef_to_rig, bufferrig_to_lef,buffertop_to_bot, bufferbot_to_top,req_send, req_rec); exchange_p(psp,p_bufferlef_to_rig, p_bufferrig_to_lef,p_buffertop_to_bot, p_bufferbot_to_top,req_send, req_rec);
if (MYID==0){ if (MYID==0){
...@@ -1913,7 +1922,7 @@ int main(int argc, char **argv){ ...@@ -1913,7 +1922,7 @@ int main(int argc, char **argv){
if(!ACOUSTIC) if(!ACOUSTIC)
exchange_s(psxx,psyy,psxy, bufferlef_to_rig, bufferrig_to_lef, buffertop_to_bot, bufferbot_to_top, req_send, req_rec); exchange_s(psxx,psyy,psxy, bufferlef_to_rig, bufferrig_to_lef, buffertop_to_bot, bufferbot_to_top, req_send, req_rec);
else else
exchange_p(psp,bufferlef_to_rig, bufferrig_to_lef,buffertop_to_bot, bufferbot_to_top,req_send, req_rec); exchange_p(psp,p_bufferlef_to_rig, p_bufferrig_to_lef,p_buffertop_to_bot, p_bufferbot_to_top,req_send, req_rec);
if (MYID==0){ if (MYID==0){
...@@ -2607,7 +2616,7 @@ int main(int argc, char **argv){ ...@@ -2607,7 +2616,7 @@ int main(int argc, char **argv){
if(!ACOUSTIC) if(!ACOUSTIC)
exchange_s(psxx,psyy,psxy, bufferlef_to_rig, bufferrig_to_lef, buffertop_to_bot, bufferbot_to_top, req_send, req_rec); exchange_s(psxx,psyy,psxy, bufferlef_to_rig, bufferrig_to_lef, buffertop_to_bot, bufferbot_to_top, req_send, req_rec);
else else
exchange_p(psp,bufferlef_to_rig, bufferrig_to_lef,buffertop_to_bot, bufferbot_to_top,req_send, req_rec); exchange_p(psp,p_bufferlef_to_rig, p_bufferrig_to_lef,p_buffertop_to_bot, p_bufferbot_to_top,req_send, req_rec);
if (MYID==0){ if (MYID==0){
time7=MPI_Wtime(); time7=MPI_Wtime();
...@@ -3188,6 +3197,13 @@ int main(int argc, char **argv){ ...@@ -3188,6 +3197,13 @@ int main(int argc, char **argv){
free_matrix(bufferrig_to_lef,1,NY,1,fdo3); free_matrix(bufferrig_to_lef,1,NY,1,fdo3);
free_matrix(buffertop_to_bot,1,NX,1,fdo3); free_matrix(buffertop_to_bot,1,NX,1,fdo3);
free_matrix(bufferbot_to_top,1,NX,1,fdo3); free_matrix(bufferbot_to_top,1,NX,1,fdo3);
if (ACOUSTIC) {
free_matrix(p_bufferlef_to_rig,1,NY,1,nd);
free_matrix(p_bufferrig_to_lef,1,NY,1,nd);
free_matrix(p_buffertop_to_bot,1,NX,1,nd);
free_matrix(p_bufferbot_to_top,1,NX,1,nd);
}
switch (SEISMO){ switch (SEISMO){
case 1 : /* particle velocities only */ case 1 : /* particle velocities only */
......
...@@ -38,8 +38,8 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le ...@@ -38,8 +38,8 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le
int i, j, fdo, fdo3, n, l; int i, j, fdo, fdo3, n, l;
fdo = FDORDER/2 + 1; fdo = FDORDER/2;
fdo3 = 2*fdo; fdo3 = 1*fdo;
/* top - bottom */ /* top - bottom */
...@@ -94,7 +94,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le ...@@ -94,7 +94,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
/* storage of left edge of local volume into buffer */ /* storage of left edge of local volume into buffer */
n = 1; n = 1;
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
bufferlef_to_rig[j][n++] = sp[j][l]; bufferlef_to_rig[j][n++] = sp[j][l];
} }
} }
...@@ -104,7 +104,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le ...@@ -104,7 +104,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
/* storage of right edge of local volume into buffer */ /* storage of right edge of local volume into buffer */
n = 1; n = 1;
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
bufferrig_to_lef[j][n++] = sp[j][NX-l+1]; bufferrig_to_lef[j][n++] = sp[j][NX-l+1];
} }
} }
...@@ -119,7 +119,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le ...@@ -119,7 +119,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le
if ((BOUNDARY) || (POS[1]!=NPROCX-1)) /* no boundary exchange at right edge of global grid */ if ((BOUNDARY) || (POS[1]!=NPROCX-1)) /* no boundary exchange at right edge of global grid */
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
n = 1; n = 1;
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
sp[j][NX+l] = bufferlef_to_rig[j][n++]; sp[j][NX+l] = bufferlef_to_rig[j][n++];
} }
} }
...@@ -127,7 +127,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le ...@@ -127,7 +127,7 @@ void exchange_p(float ** sp, float ** bufferlef_to_rig, float ** bufferrig_to_le
if ((BOUNDARY) || (POS[1]!=0)) /* no boundary exchange at left edge of global grid */ if ((BOUNDARY) || (POS[1]!=0)) /* no boundary exchange at left edge of global grid */
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
n = 1; n = 1;
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
sp[j][1-l] = bufferrig_to_lef[j][n++]; sp[j][1-l] = bufferrig_to_lef[j][n++];
} }
} }
......
...@@ -39,7 +39,7 @@ void exchange_s(float ** sxx, float ** syy, ...@@ -39,7 +39,7 @@ void exchange_s(float ** sxx, float ** syy,
int i, j, fdo, fdo3, n, l; int i, j, fdo, fdo3, n, l;
fdo = FDORDER/2 + 1; fdo = FDORDER/2;
fdo3 = 2*fdo; fdo3 = 2*fdo;
...@@ -125,10 +125,10 @@ void exchange_s(float ** sxx, float ** syy, ...@@ -125,10 +125,10 @@ void exchange_s(float ** sxx, float ** syy,
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
/* storage of left edge of local volume into buffer */ /* storage of left edge of local volume into buffer */
n = 1; n = 1;
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
bufferlef_to_rig[j][n++] = sxy[j][l]; bufferlef_to_rig[j][n++] = sxy[j][l];
} }
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
bufferlef_to_rig[j][n++] = sxx[j][l]; bufferlef_to_rig[j][n++] = sxx[j][l];
} }
} }
...@@ -138,10 +138,10 @@ void exchange_s(float ** sxx, float ** syy, ...@@ -138,10 +138,10 @@ void exchange_s(float ** sxx, float ** syy,
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
/* storage of right edge of local volume into buffer */ /* storage of right edge of local volume into buffer */
n = 1; n = 1;
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
bufferrig_to_lef[j][n++] = sxy[j][NX-l+1]; bufferrig_to_lef[j][n++] = sxy[j][NX-l+1];
} }
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
bufferrig_to_lef[j][n++] = sxx[j][NX-l+1]; bufferrig_to_lef[j][n++] = sxx[j][NX-l+1];
} }
} }
...@@ -178,10 +178,10 @@ void exchange_s(float ** sxx, float ** syy, ...@@ -178,10 +178,10 @@ void exchange_s(float ** sxx, float ** syy,
if ((BOUNDARY) || (POS[1]!=NPROCX-1)) /* no boundary exchange at right edge of global grid */ if ((BOUNDARY) || (POS[1]!=NPROCX-1)) /* no boundary exchange at right edge of global grid */
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
n = 1; n = 1;
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
sxy[j][NX+l] = bufferlef_to_rig[j][n++]; sxy[j][NX+l] = bufferlef_to_rig[j][n++];
} }
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
sxx[j][NX+l] = bufferlef_to_rig[j][n++]; sxx[j][NX+l] = bufferlef_to_rig[j][n++];
} }
} }
...@@ -189,10 +189,10 @@ void exchange_s(float ** sxx, float ** syy, ...@@ -189,10 +189,10 @@ void exchange_s(float ** sxx, float ** syy,
if ((BOUNDARY) || (POS[1]!=0)) /* no boundary exchange at left edge of global grid */ if ((BOUNDARY) || (POS[1]!=0)) /* no boundary exchange at left edge of global grid */
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
n = 1; n = 1;
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
sxy[j][1-l] = bufferrig_to_lef[j][n++]; sxy[j][1-l] = bufferrig_to_lef[j][n++];
} }
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
sxx[j][1-l] = bufferrig_to_lef[j][n++]; sxx[j][1-l] = bufferrig_to_lef[j][n++];
} }
} }
......
...@@ -38,7 +38,7 @@ void exchange_v(float ** vx, float ** vy, ...@@ -38,7 +38,7 @@ void exchange_v(float ** vx, float ** vy,
MPI_Status status; MPI_Status status;
int i, j, fdo, fdo3, n, l; int i, j, fdo, fdo3, n, l;
fdo = FDORDER/2 + 1; fdo = FDORDER/2;
fdo3 = 2*fdo; fdo3 = 2*fdo;
/* top - bottom */ /* top - bottom */
...@@ -127,10 +127,10 @@ void exchange_v(float ** vx, float ** vy, ...@@ -127,10 +127,10 @@ void exchange_v(float ** vx, float ** vy,
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
/* storage of left edge of local volume into buffer */ /* storage of left edge of local volume into buffer */
n = 1; n = 1;
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
bufferlef_to_rig[j][n++] = vy[j][l]; bufferlef_to_rig[j][n++] = vy[j][l];
} }
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
bufferlef_to_rig[j][n++] = vx[j][l]; bufferlef_to_rig[j][n++] = vx[j][l];
} }
} }
...@@ -141,10 +141,10 @@ void exchange_v(float ** vx, float ** vy, ...@@ -141,10 +141,10 @@ void exchange_v(float ** vx, float ** vy,
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
/* storage of right edge of local volume into buffer */ /* storage of right edge of local volume into buffer */
n = 1; n = 1;
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
bufferrig_to_lef[j][n++] = vy[j][NX-l+1]; bufferrig_to_lef[j][n++] = vy[j][NX-l+1];
} }
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
bufferrig_to_lef[j][n++] = vx[j][NX-l+1]; bufferrig_to_lef[j][n++] = vx[j][NX-l+1];
} }
} }
...@@ -179,10 +179,10 @@ void exchange_v(float ** vx, float ** vy, ...@@ -179,10 +179,10 @@ void exchange_v(float ** vx, float ** vy,
if ((BOUNDARY) || (POS[1]!=NPROCX-1)) /* no boundary exchange at right edge of global grid */ if ((BOUNDARY) || (POS[1]!=NPROCX-1)) /* no boundary exchange at right edge of global grid */
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
n = 1; n = 1;
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
vy[j][NX+l] = bufferlef_to_rig[j][n++]; vy[j][NX+l] = bufferlef_to_rig[j][n++];
} }
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
vx[j][NX+l] = bufferlef_to_rig[j][n++]; vx[j][NX+l] = bufferlef_to_rig[j][n++];
} }
} }
...@@ -191,10 +191,10 @@ void exchange_v(float ** vx, float ** vy, ...@@ -191,10 +191,10 @@ void exchange_v(float ** vx, float ** vy,
if ((BOUNDARY) || (POS[1]!=0)) /* no boundary exchange at left edge of global grid */ if ((BOUNDARY) || (POS[1]!=0)) /* no boundary exchange at left edge of global grid */
for (j=1;j<=NY;j++){ for (j=1;j<=NY;j++){
n = 1; n = 1;
for (l=1;l<fdo-1;l++) { for (l=1;l<=fdo-1;l++) {
vy[j][1-l] = bufferrig_to_lef[j][n++]; vy[j][1-l] = bufferrig_to_lef[j][n++];
} }
for (l=1;l<fdo;l++) { for (l=1;l<=fdo;l++) {
vx[j][1-l] = bufferrig_to_lef[j][n++]; vx[j][1-l] = bufferrig_to_lef[j][n++];
} }
} }
......
...@@ -154,7 +154,7 @@ void snap(FILE *fp,int nt, int nsnap, float **vx, float **vy, float **sxx, ...@@ -154,7 +154,7 @@ void snap(FILE *fp,int nt, int nsnap, float **vx, float **vy, float **sxx,
fpx2=fopen(snapfile_div,wm); fpx2=fopen(snapfile_div,wm);
fpy2=fopen(snapfile_rot,wm); fpy2=fopen(snapfile_rot,wm);
nd = FDORDER/2+1; nd = FDORDER/2;
curlfield = matrix(-nd+1,NY+nd,-nd+1,NX+nd); curlfield = matrix(-nd+1,NY+nd,-nd+1,NX+nd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment