Skip to content

Commit

Permalink
Change (x,y,z) to (0,1,2) to be more general
Browse files Browse the repository at this point in the history
  • Loading branch information
dpgrote committed Sep 11, 2024
1 parent 2063761 commit ec08c22
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 66 deletions.
12 changes: 6 additions & 6 deletions Source/Filter/BilinearFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ void BilinearFilter::ComputeStencils(){
stencil_length_each_dir[i++] = static_cast<int>(el) + 1;
}

stencil_x.resize( 1u + npass_each_dir[0] );
compute_stencil(stencil_x, npass_each_dir[0]);
m_stencil_0.resize( 1u + npass_each_dir[0] );
compute_stencil(m_stencil_0, npass_each_dir[0]);
#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
stencil_y.resize( 1u + npass_each_dir[1] );
compute_stencil(stencil_y, npass_each_dir[1]);
m_stencil_1.resize( 1u + npass_each_dir[1] );
compute_stencil(m_stencil_1, npass_each_dir[1]);
#endif
#if defined(WARPX_DIM_3D)
stencil_z.resize( 1u + npass_each_dir[2] );
compute_stencil(stencil_z, npass_each_dir[2]);
m_stencil_2.resize( 1u + npass_each_dir[2] );
compute_stencil(m_stencil_2, npass_each_dir[2]);
#endif

slen = stencil_length_each_dir.dim3();
Expand Down
2 changes: 1 addition & 1 deletion Source/Filter/Filter.H
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public:

protected:
// Stencil along each direction.
amrex::Gpu::DeviceVector<amrex::Real> stencil_x, stencil_y, stencil_z;
amrex::Gpu::DeviceVector<amrex::Real> m_stencil_0, m_stencil_1, m_stencil_2;
// Length of each stencil, 1 for dimensions not included
amrex::Dim3 slen;

Expand Down
100 changes: 50 additions & 50 deletions Source/Filter/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ void Filter::DoFilter (const Box& tbx,
int scomp, int dcomp, int ncomp)
{
AMREX_D_TERM(
amrex::Real const* AMREX_RESTRICT sx = stencil_x.data();,
amrex::Real const* AMREX_RESTRICT sy = stencil_y.data();,
amrex::Real const* AMREX_RESTRICT sz = stencil_z.data();
amrex::Real const* AMREX_RESTRICT s0 = m_stencil_0.data();,
amrex::Real const* AMREX_RESTRICT s1 = m_stencil_1.data();,
amrex::Real const* AMREX_RESTRICT s2 = m_stencil_2.data();
)
Dim3 slen_local = slen;

Expand All @@ -113,18 +113,18 @@ void Filter::DoFilter (const Box& tbx,
return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt;
};

for (int iz=0; iz < slen_local.z; ++iz){
for (int iy=0; iy < slen_local.y; ++iy){
for (int ix=0; ix < slen_local.x; ++ix){
Real sss = sx[ix]*sy[iy]*sz[iz];
d += sss*( src_zeropad(i-ix,j-iy,k-iz,scomp+n)
+src_zeropad(i+ix,j-iy,k-iz,scomp+n)
+src_zeropad(i-ix,j+iy,k-iz,scomp+n)
+src_zeropad(i+ix,j+iy,k-iz,scomp+n)
+src_zeropad(i-ix,j-iy,k+iz,scomp+n)
+src_zeropad(i+ix,j-iy,k+iz,scomp+n)
+src_zeropad(i-ix,j+iy,k+iz,scomp+n)
+src_zeropad(i+ix,j+iy,k+iz,scomp+n));
for (int i2=0; i2 < slen_local.z; ++i2){
for (int i1=0; i1 < slen_local.y; ++i1){
for (int i0=0; i0 < slen_local.x; ++i0){
Real sss = s0[i0]*s1[i1]*s2[i2];
d += sss*( src_zeropad(i-i0,j-i1,k-i2,scomp+n)
+src_zeropad(i+i0,j-i1,k-i2,scomp+n)
+src_zeropad(i-i0,j+i1,k-i2,scomp+n)
+src_zeropad(i+i0,j+i1,k-i2,scomp+n)
+src_zeropad(i-i0,j-i1,k+i2,scomp+n)
+src_zeropad(i+i0,j-i1,k+i2,scomp+n)
+src_zeropad(i-i0,j+i1,k+i2,scomp+n)
+src_zeropad(i+i0,j+i1,k+i2,scomp+n));
}
}
}
Expand All @@ -143,14 +143,14 @@ void Filter::DoFilter (const Box& tbx,
return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt;
};

for (int iz=0; iz < slen_local.z; ++iz){
for (int iy=0; iy < slen_local.y; ++iy){
for (int ix=0; ix < slen_local.x; ++ix){
Real sss = sx[ix]*sy[iy];
d += sss*( src_zeropad(i-ix,j-iy,k,scomp+n)
+src_zeropad(i+ix,j-iy,k,scomp+n)
+src_zeropad(i-ix,j+iy,k,scomp+n)
+src_zeropad(i+ix,j+iy,k,scomp+n));
for (int i2=0; i2 < slen_local.z; ++i2){
for (int i1=0; i1 < slen_local.y; ++i1){
for (int i0=0; i0 < slen_local.x; ++i0){
Real sss = s0[i0]*s1[i1];
d += sss*( src_zeropad(i-i0,j-i1,k,scomp+n)
+src_zeropad(i+i0,j-i1,k,scomp+n)
+src_zeropad(i-i0,j+i1,k,scomp+n)
+src_zeropad(i+i0,j+i1,k,scomp+n));
}
}
}
Expand All @@ -169,12 +169,12 @@ void Filter::DoFilter (const Box& tbx,
return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt;
};

for (int iz=0; iz < slen_local.z; ++iz){
for (int iy=0; iy < slen_local.y; ++iy){
for (int ix=0; ix < slen_local.x; ++ix){
Real sss = sx[ix];
d += sss*( src_zeropad(i-ix,j,k,scomp+n)
+src_zeropad(i+ix,j,k,scomp+n));
for (int i2=0; i2 < slen_local.z; ++i2){
for (int i1=0; i1 < slen_local.y; ++i1){
for (int i0=0; i0 < slen_local.x; ++i0){
Real sss = s0[i0];
d += sss*( src_zeropad(i-i0,j,k,scomp+n)
+src_zeropad(i+i0,j,k,scomp+n));
}
}
}
Expand Down Expand Up @@ -274,9 +274,9 @@ void Filter::DoFilter (const Box& tbx,
const auto hi = amrex::ubound(tbx);
// tmp and dst are of type Array4 (Fortran ordering)
AMREX_D_TERM(
amrex::Real const* AMREX_RESTRICT sx = stencil_x.data();,
amrex::Real const* AMREX_RESTRICT sy = stencil_y.data();,
amrex::Real const* AMREX_RESTRICT sz = stencil_z.data();
amrex::Real const* AMREX_RESTRICT s0 = m_stencil_0.data();,
amrex::Real const* AMREX_RESTRICT s1 = m_stencil_1.data();,
amrex::Real const* AMREX_RESTRICT s2 = m_stencil_2.data();
)
for (int n = 0; n < ncomp; ++n) {
// Set dst value to 0.
Expand All @@ -288,32 +288,32 @@ void Filter::DoFilter (const Box& tbx,
}
}
// 3 nested loop on 3D stencil
for (int iz=0; iz < slen.z; ++iz){
for (int iy=0; iy < slen.y; ++iy){
for (int ix=0; ix < slen.x; ++ix){
const Real sss = AMREX_D_TERM(sx[ix], *sy[iy], *sz[iz]);
for (int i2=0; i2 < slen.z; ++i2){
for (int i1=0; i1 < slen.y; ++i1){
for (int i0=0; i0 < slen.x; ++i0){
const Real sss = AMREX_D_TERM(s0[i0], *s1[i1], *s2[i2]);
// 3 nested loop on 3D array
for (int k = lo.z; k <= hi.z; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
AMREX_PRAGMA_SIMD
for (int i = lo.x; i <= hi.x; ++i) {
#if AMREX_SPACEDIM == 3
dst(i,j,k,dcomp+n) += sss*(tmp(i-ix,j-iy,k-iz,scomp+n)
+tmp(i+ix,j-iy,k-iz,scomp+n)
+tmp(i-ix,j+iy,k-iz,scomp+n)
+tmp(i+ix,j+iy,k-iz,scomp+n)
+tmp(i-ix,j-iy,k+iz,scomp+n)
+tmp(i+ix,j-iy,k+iz,scomp+n)
+tmp(i-ix,j+iy,k+iz,scomp+n)
+tmp(i+ix,j+iy,k+iz,scomp+n));
dst(i,j,k,dcomp+n) += sss*(tmp(i-i0,j-i1,k-i2,scomp+n)
+tmp(i+i0,j-i1,k-i2,scomp+n)
+tmp(i-i0,j+i1,k-i2,scomp+n)
+tmp(i+i0,j+i1,k-i2,scomp+n)
+tmp(i-i0,j-i1,k+i2,scomp+n)
+tmp(i+i0,j-i1,k+i2,scomp+n)
+tmp(i-i0,j+i1,k+i2,scomp+n)
+tmp(i+i0,j+i1,k+i2,scomp+n));
#elif AMREX_SPACEDIM == 2
dst(i,j,k,dcomp+n) += sss*(tmp(i-ix,j-iy,k,scomp+n)
+tmp(i+ix,j-iy,k,scomp+n)
+tmp(i-ix,j+iy,k,scomp+n)
+tmp(i+ix,j+iy,k,scomp+n));
dst(i,j,k,dcomp+n) += sss*(tmp(i-i0,j-i1,k,scomp+n)
+tmp(i+i0,j-i1,k,scomp+n)
+tmp(i-i0,j+i1,k,scomp+n)
+tmp(i+i0,j+i1,k,scomp+n));
#elif AMREX_SPACEDIM == 1
dst(i,j,k,dcomp+n) += sss*(tmp(i-ix,j,k,scomp+n)
+tmp(i+ix,j,k,scomp+n));
dst(i,j,k,dcomp+n) += sss*(tmp(i-i0,j,k,scomp+n)
+tmp(i+i0,j,k,scomp+n));
#endif
}
}
Expand Down
18 changes: 9 additions & 9 deletions Source/Filter/NCIGodfreyFilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,17 @@ void NCIGodfreyFilter::ComputeStencils()
# endif
h_stencil_z[0] /= 2._rt;

stencil_x.resize(h_stencil_x.size());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_x.begin(),h_stencil_x.end(),stencil_x.begin());
m_stencil_0.resize(h_stencil_x.size());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_x.begin(),h_stencil_x.end(),m_stencil_0.begin());
# if defined(WARPX_DIM_3D)
stencil_y.resize(h_stencil_y.size());
stencil_z.resize(h_stencil_z.size());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_y.begin(),h_stencil_y.end(),stencil_y.begin());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),stencil_z.begin());
m_stencil_1.resize(h_stencil_y.size());
m_stencil_2.resize(h_stencil_z.size());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_y.begin(),h_stencil_y.end(),m_stencil_1.begin());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),m_stencil_2.begin());
# elif (AMREX_SPACEDIM == 2)
// In 2D, the filter applies stencil_y to the 2nd dimension
stencil_y.resize(h_stencil_z.size());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),stencil_y.begin());
// In 2D, the filter applies stencil_1 to the 2nd dimension
m_stencil_1.resize(h_stencil_z.size());
Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),m_stencil_1.begin());
# endif

Gpu::synchronize();
Expand Down

0 comments on commit ec08c22

Please sign in to comment.