Skip to content

Commit

Permalink
Clean up of DoFilter for GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
dpgrote committed Sep 10, 2024
1 parent d863188 commit 8a6e0e5
Showing 1 changed file with 2 additions and 42 deletions.
44 changes: 2 additions & 42 deletions Source/Filter/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ void Filter::DoFilter (const Box& tbx,
)
Dim3 slen_local = slen;

#if AMREX_SPACEDIM == 3
AMREX_PARALLEL_FOR_4D ( tbx, ncomp, i, j, k, n,
{
Real d = 0.0;
Expand All @@ -116,6 +115,7 @@ void Filter::DoFilter (const Box& tbx,
for (int iz=0; iz < slen_local.z; ++iz){
for (int iy=0; iy < slen_local.y; ++iy){
for (int ix=0; ix < slen_local.x; ++ix){
#if AMREX_SPACEDIM == 3
Real sss = sx[ix]*sy[iy]*sz[iz];
d += sss*( src_zeropad(i-ix,j-iy,k-iz,scomp+n)
+src_zeropad(i+ix,j-iy,k-iz,scomp+n)
Expand All @@ -125,63 +125,23 @@ void Filter::DoFilter (const Box& tbx,
+src_zeropad(i+ix,j-iy,k+iz,scomp+n)
+src_zeropad(i-ix,j+iy,k+iz,scomp+n)
+src_zeropad(i+ix,j+iy,k+iz,scomp+n));
}
}
}

dst(i,j,k,dcomp+n) = d;
});
#elif AMREX_SPACEDIM == 2
AMREX_PARALLEL_FOR_4D ( tbx, ncomp, i, j, k, n,
{
Real d = 0.0;

// Pad source array with zeros beyond ghost cells
// for out-of-bound accesses due to large-stencil operations
const auto src_zeropad = [src] (const int jj, const int kk, const int ll, const int nn) noexcept
{
return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt;
};

for (int iz=0; iz < slen_local.z; ++iz){
for (int iy=0; iy < slen_local.y; ++iy){
for (int ix=0; ix < slen_local.x; ++ix){
Real sss = sx[ix]*sy[iy];
d += sss*( src_zeropad(i-ix,j-iy,k,scomp+n)
+src_zeropad(i+ix,j-iy,k,scomp+n)
+src_zeropad(i-ix,j+iy,k,scomp+n)
+src_zeropad(i+ix,j+iy,k,scomp+n));
}
}
}

dst(i,j,k,dcomp+n) = d;
});
#elif AMREX_SPACEDIM == 1
AMREX_PARALLEL_FOR_4D ( tbx, ncomp, i, j, k, n,
{
Real d = 0.0;

// Pad source array with zeros beyond ghost cells
// for out-of-bound accesses due to large-stencil operations
const auto src_zeropad = [src] (const int jj, const int kk, const int ll, const int nn) noexcept
{
return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt;
};

for (int iz=0; iz < slen_local.z; ++iz){
for (int iy=0; iy < slen_local.y; ++iy){
for (int ix=0; ix < slen_local.x; ++ix){
Real sss = sx[ix];
d += sss*( src_zeropad(i-ix,j,k,scomp+n)
+src_zeropad(i+ix,j,k,scomp+n));
#endif
}
}
}

dst(i,j,k,dcomp+n) = d;
});
#endif
}

#else
Expand Down

0 comments on commit 8a6e0e5

Please sign in to comment.