diff --git a/Source/Filter/BilinearFilter.cpp b/Source/Filter/BilinearFilter.cpp index 470d43cf415..66976045943 100644 --- a/Source/Filter/BilinearFilter.cpp +++ b/Source/Filter/BilinearFilter.cpp @@ -67,15 +67,15 @@ void BilinearFilter::ComputeStencils(){ stencil_length_each_dir[i++] = static_cast(el) + 1; } - stencil_x.resize( 1u + npass_each_dir[0] ); - compute_stencil(stencil_x, npass_each_dir[0]); + m_stencil_0.resize( 1u + npass_each_dir[0] ); + compute_stencil(m_stencil_0, npass_each_dir[0]); #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D) - stencil_y.resize( 1u + npass_each_dir[1] ); - compute_stencil(stencil_y, npass_each_dir[1]); + m_stencil_1.resize( 1u + npass_each_dir[1] ); + compute_stencil(m_stencil_1, npass_each_dir[1]); #endif #if defined(WARPX_DIM_3D) - stencil_z.resize( 1u + npass_each_dir[2] ); - compute_stencil(stencil_z, npass_each_dir[2]); + m_stencil_2.resize( 1u + npass_each_dir[2] ); + compute_stencil(m_stencil_2, npass_each_dir[2]); #endif slen = stencil_length_each_dir.dim3(); diff --git a/Source/Filter/Filter.H b/Source/Filter/Filter.H index 90afbcae11d..584f6b151d7 100644 --- a/Source/Filter/Filter.H +++ b/Source/Filter/Filter.H @@ -41,7 +41,7 @@ public: protected: // Stencil along each direction. - amrex::Gpu::DeviceVector stencil_x, stencil_y, stencil_z; + amrex::Gpu::DeviceVector m_stencil_0, m_stencil_1, m_stencil_2; // Length of each stencil, 1 for dimensions not included amrex::Dim3 slen; diff --git a/Source/Filter/Filter.cpp b/Source/Filter/Filter.cpp index 7ea0003e291..40dfc54f8cb 100644 --- a/Source/Filter/Filter.cpp +++ b/Source/Filter/Filter.cpp @@ -95,9 +95,9 @@ void Filter::DoFilter (const Box& tbx, int scomp, int dcomp, int ncomp) { AMREX_D_TERM( - amrex::Real const* AMREX_RESTRICT sx = stencil_x.data();, - amrex::Real const* AMREX_RESTRICT sy = stencil_y.data();, - amrex::Real const* AMREX_RESTRICT sz = stencil_z.data(); + amrex::Real const* AMREX_RESTRICT s0 = m_stencil_0.data();, + amrex::Real const* AMREX_RESTRICT s1 = m_stencil_1.data();, + amrex::Real const* AMREX_RESTRICT s2 = m_stencil_2.data(); ) Dim3 slen_local = slen; @@ -113,18 +113,18 @@ void Filter::DoFilter (const Box& tbx, return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt; }; - for (int iz=0; iz < slen_local.z; ++iz){ - for (int iy=0; iy < slen_local.y; ++iy){ - for (int ix=0; ix < slen_local.x; ++ix){ - Real sss = sx[ix]*sy[iy]*sz[iz]; - d += sss*( src_zeropad(i-ix,j-iy,k-iz,scomp+n) - +src_zeropad(i+ix,j-iy,k-iz,scomp+n) - +src_zeropad(i-ix,j+iy,k-iz,scomp+n) - +src_zeropad(i+ix,j+iy,k-iz,scomp+n) - +src_zeropad(i-ix,j-iy,k+iz,scomp+n) - +src_zeropad(i+ix,j-iy,k+iz,scomp+n) - +src_zeropad(i-ix,j+iy,k+iz,scomp+n) - +src_zeropad(i+ix,j+iy,k+iz,scomp+n)); + for (int i2=0; i2 < slen_local.z; ++i2){ + for (int i1=0; i1 < slen_local.y; ++i1){ + for (int i0=0; i0 < slen_local.x; ++i0){ + Real sss = s0[i0]*s1[i1]*s2[i2]; + d += sss*( src_zeropad(i-i0,j-i1,k-i2,scomp+n) + +src_zeropad(i+i0,j-i1,k-i2,scomp+n) + +src_zeropad(i-i0,j+i1,k-i2,scomp+n) + +src_zeropad(i+i0,j+i1,k-i2,scomp+n) + +src_zeropad(i-i0,j-i1,k+i2,scomp+n) + +src_zeropad(i+i0,j-i1,k+i2,scomp+n) + +src_zeropad(i-i0,j+i1,k+i2,scomp+n) + +src_zeropad(i+i0,j+i1,k+i2,scomp+n)); } } } @@ -143,14 +143,14 @@ void Filter::DoFilter (const Box& tbx, return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt; }; - for (int iz=0; iz < slen_local.z; ++iz){ - for (int iy=0; iy < slen_local.y; ++iy){ - for (int ix=0; ix < slen_local.x; ++ix){ - Real sss = sx[ix]*sy[iy]; - d += sss*( src_zeropad(i-ix,j-iy,k,scomp+n) - +src_zeropad(i+ix,j-iy,k,scomp+n) - +src_zeropad(i-ix,j+iy,k,scomp+n) - +src_zeropad(i+ix,j+iy,k,scomp+n)); + for (int i2=0; i2 < slen_local.z; ++i2){ + for (int i1=0; i1 < slen_local.y; ++i1){ + for (int i0=0; i0 < slen_local.x; ++i0){ + Real sss = s0[i0]*s1[i1]; + d += sss*( src_zeropad(i-i0,j-i1,k,scomp+n) + +src_zeropad(i+i0,j-i1,k,scomp+n) + +src_zeropad(i-i0,j+i1,k,scomp+n) + +src_zeropad(i+i0,j+i1,k,scomp+n)); } } } @@ -169,12 +169,12 @@ void Filter::DoFilter (const Box& tbx, return src.contains(jj,kk,ll) ? src(jj,kk,ll,nn) : 0.0_rt; }; - for (int iz=0; iz < slen_local.z; ++iz){ - for (int iy=0; iy < slen_local.y; ++iy){ - for (int ix=0; ix < slen_local.x; ++ix){ - Real sss = sx[ix]; - d += sss*( src_zeropad(i-ix,j,k,scomp+n) - +src_zeropad(i+ix,j,k,scomp+n)); + for (int i2=0; i2 < slen_local.z; ++i2){ + for (int i1=0; i1 < slen_local.y; ++i1){ + for (int i0=0; i0 < slen_local.x; ++i0){ + Real sss = s0[i0]; + d += sss*( src_zeropad(i-i0,j,k,scomp+n) + +src_zeropad(i+i0,j,k,scomp+n)); } } } @@ -274,9 +274,9 @@ void Filter::DoFilter (const Box& tbx, const auto hi = amrex::ubound(tbx); // tmp and dst are of type Array4 (Fortran ordering) AMREX_D_TERM( - amrex::Real const* AMREX_RESTRICT sx = stencil_x.data();, - amrex::Real const* AMREX_RESTRICT sy = stencil_y.data();, - amrex::Real const* AMREX_RESTRICT sz = stencil_z.data(); + amrex::Real const* AMREX_RESTRICT s0 = m_stencil_0.data();, + amrex::Real const* AMREX_RESTRICT s1 = m_stencil_1.data();, + amrex::Real const* AMREX_RESTRICT s2 = m_stencil_2.data(); ) for (int n = 0; n < ncomp; ++n) { // Set dst value to 0. @@ -288,32 +288,32 @@ void Filter::DoFilter (const Box& tbx, } } // 3 nested loop on 3D stencil - for (int iz=0; iz < slen.z; ++iz){ - for (int iy=0; iy < slen.y; ++iy){ - for (int ix=0; ix < slen.x; ++ix){ - const Real sss = AMREX_D_TERM(sx[ix], *sy[iy], *sz[iz]); + for (int i2=0; i2 < slen.z; ++i2){ + for (int i1=0; i1 < slen.y; ++i1){ + for (int i0=0; i0 < slen.x; ++i0){ + const Real sss = AMREX_D_TERM(s0[i0], *s1[i1], *s2[i2]); // 3 nested loop on 3D array for (int k = lo.z; k <= hi.z; ++k) { for (int j = lo.y; j <= hi.y; ++j) { AMREX_PRAGMA_SIMD for (int i = lo.x; i <= hi.x; ++i) { #if AMREX_SPACEDIM == 3 - dst(i,j,k,dcomp+n) += sss*(tmp(i-ix,j-iy,k-iz,scomp+n) - +tmp(i+ix,j-iy,k-iz,scomp+n) - +tmp(i-ix,j+iy,k-iz,scomp+n) - +tmp(i+ix,j+iy,k-iz,scomp+n) - +tmp(i-ix,j-iy,k+iz,scomp+n) - +tmp(i+ix,j-iy,k+iz,scomp+n) - +tmp(i-ix,j+iy,k+iz,scomp+n) - +tmp(i+ix,j+iy,k+iz,scomp+n)); + dst(i,j,k,dcomp+n) += sss*(tmp(i-i0,j-i1,k-i2,scomp+n) + +tmp(i+i0,j-i1,k-i2,scomp+n) + +tmp(i-i0,j+i1,k-i2,scomp+n) + +tmp(i+i0,j+i1,k-i2,scomp+n) + +tmp(i-i0,j-i1,k+i2,scomp+n) + +tmp(i+i0,j-i1,k+i2,scomp+n) + +tmp(i-i0,j+i1,k+i2,scomp+n) + +tmp(i+i0,j+i1,k+i2,scomp+n)); #elif AMREX_SPACEDIM == 2 - dst(i,j,k,dcomp+n) += sss*(tmp(i-ix,j-iy,k,scomp+n) - +tmp(i+ix,j-iy,k,scomp+n) - +tmp(i-ix,j+iy,k,scomp+n) - +tmp(i+ix,j+iy,k,scomp+n)); + dst(i,j,k,dcomp+n) += sss*(tmp(i-i0,j-i1,k,scomp+n) + +tmp(i+i0,j-i1,k,scomp+n) + +tmp(i-i0,j+i1,k,scomp+n) + +tmp(i+i0,j+i1,k,scomp+n)); #elif AMREX_SPACEDIM == 1 - dst(i,j,k,dcomp+n) += sss*(tmp(i-ix,j,k,scomp+n) - +tmp(i+ix,j,k,scomp+n)); + dst(i,j,k,dcomp+n) += sss*(tmp(i-i0,j,k,scomp+n) + +tmp(i+i0,j,k,scomp+n)); #endif } } diff --git a/Source/Filter/NCIGodfreyFilter.cpp b/Source/Filter/NCIGodfreyFilter.cpp index ad298202775..a73efb0ec64 100644 --- a/Source/Filter/NCIGodfreyFilter.cpp +++ b/Source/Filter/NCIGodfreyFilter.cpp @@ -121,17 +121,17 @@ void NCIGodfreyFilter::ComputeStencils() # endif h_stencil_z[0] /= 2._rt; - stencil_x.resize(h_stencil_x.size()); - Gpu::copyAsync(Gpu::hostToDevice,h_stencil_x.begin(),h_stencil_x.end(),stencil_x.begin()); + m_stencil_0.resize(h_stencil_x.size()); + Gpu::copyAsync(Gpu::hostToDevice,h_stencil_x.begin(),h_stencil_x.end(),m_stencil_0.begin()); # if defined(WARPX_DIM_3D) - stencil_y.resize(h_stencil_y.size()); - stencil_z.resize(h_stencil_z.size()); - Gpu::copyAsync(Gpu::hostToDevice,h_stencil_y.begin(),h_stencil_y.end(),stencil_y.begin()); - Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),stencil_z.begin()); + m_stencil_1.resize(h_stencil_y.size()); + m_stencil_2.resize(h_stencil_z.size()); + Gpu::copyAsync(Gpu::hostToDevice,h_stencil_y.begin(),h_stencil_y.end(),m_stencil_1.begin()); + Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),m_stencil_2.begin()); # elif (AMREX_SPACEDIM == 2) - // In 2D, the filter applies stencil_y to the 2nd dimension - stencil_y.resize(h_stencil_z.size()); - Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),stencil_y.begin()); + // In 2D, the filter applies stencil_1 to the 2nd dimension + m_stencil_1.resize(h_stencil_z.size()); + Gpu::copyAsync(Gpu::hostToDevice,h_stencil_z.begin(),h_stencil_z.end(),m_stencil_1.begin()); # endif Gpu::synchronize();