From e88b832fac30a12aa9596c3085fe29935375c4c2 Mon Sep 17 00:00:00 2001 From: PhilipDeegan Date: Sat, 2 Nov 2024 15:26:46 +0100 Subject: [PATCH] ++ --- mkn.sh | 10 +-- .../particles/arrays/particle_array_pc.hpp | 81 +++++++++++++------ .../particles/arrays/particle_array_soa.hpp | 8 +- .../arrays/particle_array_soa_thrust.hpp | 36 +++++---- src/core/def/thrust.hpp | 11 ++- .../numerics/interpolator/interpolating.hpp | 4 +- src/core/numerics/pusher/boris_simpler.hpp | 5 +- src/core/numerics/pusher/multi_boris.hpp | 44 ++-------- ...{multi_boris.ok.hpp => multi_boris.no.hpp} | 52 ++++++++++-- .../particles/test_particles_selecting.cpp | 3 +- .../ion_updater/test_multi_updater.cpp | 2 +- .../ion_updater/test_updater_pp_main.cpp | 58 +++++++------ tools/mkn_profile.py | 26 +++--- 13 files changed, 194 insertions(+), 146 deletions(-) rename src/core/numerics/pusher/{multi_boris.ok.hpp => multi_boris.no.hpp} (80%) diff --git a/mkn.sh b/mkn.sh index fbc7771f6..a5e935a84 100755 --- a/mkn.sh +++ b/mkn.sh @@ -8,8 +8,11 @@ cls set -e TEST="-M tests/core/numerics/ion_updater/test_multi_updater.cpp" -ARGS="${TEST} -P mkn.base=gpu_ -x " -[ -d /opt/rocm/bin ] && ARGS+="res/mkn/hip" || ARGS+="res/mkn/clang_cuda " +XFILE="${XFILE:-res/mkn/clang_cuda}" +[ -d /opt/rocm/bin ] && XFILE="res/mkn/hip" + +ARGS="${TEST} -P mkn.base=gpu_" +[ -n "XFILE" ] && ARGS+=" -x ${XFILE}" set -x @@ -22,6 +25,3 @@ set -x mkn clean build -p test_core ${ARGS} $@ ) #1> >(tee $CWD/.mkn.sh.out ) 2> >(tee $CWD/.mkn.sh.err >&2 ) - -exit 0 # comment out to do soak test - diff --git a/src/core/data/particles/arrays/particle_array_pc.hpp b/src/core/data/particles/arrays/particle_array_pc.hpp index 1058a2f6e..77b9870f5 100644 --- a/src/core/data/particles/arrays/particle_array_pc.hpp +++ b/src/core/data/particles/arrays/particle_array_pc.hpp @@ -432,6 +432,10 @@ class PerCellVector { on_box_list(local_box().remove(shrink(local_box(box()), 1)), fn); }; + void on_ghost_layer_plus_2_domain(auto&& fn) const + { + on_box_list(local_box().remove(shrink(local_box(box()), 2)), fn); + }; }; // PerCellVector @@ -514,7 +518,7 @@ auto& PerCellVector::reserve_ppc(std::size_t const& ppc) if constexpr (type == ParticleType::Ghost) { - on_ghost_layer_plus_1_domain([&](auto const& bix) { + on_ghost_layer_plus_2_domain([&](auto const& bix) { particles_(bix).reserve(additional); reserve(gaps_(bix), additional); }); @@ -683,21 +687,31 @@ void PerCellVector::reset_index_wrapper_map() { resize(p2c_, total_size); + auto const fill = [](auto p, auto o, auto s, auto b) { std::fill(p + o, p + o + s, *b); }; std::size_t offset = 0; for (auto const& bix : local_box()) { auto const& cs = cell_size_(bix); - if (cs == 0) - continue; - resize(gaps_(bix), cs); off_sets_(bix) = offset; - if constexpr (alloc_mode == AllocatorMode::GPU_UNIFIED) - thrust::fill(thrust::device, p2c_.begin() + offset, p2c_.begin() + offset + cs, *bix); - else - std::fill(p2c_.begin() + offset, p2c_.begin() + offset + cs, *bix); + if (cs) + { + if constexpr (alloc_mode == AllocatorMode::GPU_UNIFIED) + { + PHARE_WITH_THRUST( // + thrust::fill(thrust::device, p2c_.begin() + offset, p2c_.begin() + offset + cs, + *bix)); + PHARE_WITH_THRUST_ELSE( + PHARE_LOG_LINE_SS("Thrust not found for PerCellVector::reset_index_wrapper_map"); // + fill(p2c_.begin(), offset, cs, bix); // + ) + } + else + fill(p2c_.begin(), offset, cs, bix); + } offset += cs; cap_(bix) = particles_(bix).capacity(); @@ -867,11 +881,12 @@ struct PerCellParticles : public Super_ using Op = Operators; // printf("L:%d i %llu ic %u,%u change \n", __LINE__, idx, cell[0], cell[1]); + Super::gaps_(cell)[Op{Super::gap_idx_(cell)}.increment_return_old()] = idx; + if (isIn(newcell, Super::ghost_box())) { auto const nc = Super::local_cell(newcell); Op{Super::add_into_(nc)}.increment_return_old(); - Super::gaps_(cell)[Op{Super::gap_idx_(cell)}.increment_return_old()] = idx; } } else @@ -1005,25 +1020,30 @@ struct PerCellParticles::iterator_impl template struct index_wrapper_storage; +#if PHARE_HAVE_THRUST template struct index_wrapper_storage { bool static constexpr is_const = std::is_const_v>; using per_cell_particles = typename std::decay_t::per_cell_particles; - using Particle_t = detail::SoAZipParticle; + using Particle_t = typename SoAZipParticle_t::value_type; - index_wrapper_storage(per_cell_particles* p, std::size_t const i) _PHARE_ALL_FN_ - : particles{p}, + template + index_wrapper_storage(PerCellParticles_t p, std::size_t const i) _PHARE_ALL_FN_ + : /*particles{p},*/ particle{*p, i} { } - auto& deref() _PHARE_ALL_FN_ { return particle; } - // auto& reset(std::size_t const i) { return particle = detail::SoAZipParticle(*particles, i); } + auto& operator*() _PHARE_ALL_FN_ { return particle; } + auto& operator*() const _PHARE_ALL_FN_ { return particle; } per_cell_particles* particles; Particle_t particle; }; +#else + +#endif // PHARE_HAVE_THRUST template struct index_wrapper_storage @@ -1033,16 +1053,17 @@ struct index_wrapper_storage using Particle_t = typename Particles::Particle_t; using Particle_p = std::conditional_t; - index_wrapper_storage(per_cell_particles* p, std::size_t const i) _PHARE_ALL_FN_ - : particles{p}, + template + index_wrapper_storage(PerCellParticles_t p, std::size_t const i) _PHARE_ALL_FN_ + : /*particles{p},*/ particle{&p->data()[i]} { } - auto& deref() _PHARE_ALL_FN_ { return *particle; } - // auto& reset(std::size_t const i) { return particle = p.data()[i]; } + auto& operator*() _PHARE_ALL_FN_ { return *particle; } + auto& operator*() const _PHARE_ALL_FN_ { return *particle; } - per_cell_particles* particles; + // per_cell_particles* particles; Particle_p particle; }; @@ -1065,7 +1086,6 @@ struct PerCellParticles::index_wrapper : public index_wrapper_su { using outer_t = std::decay_t; using Super = typename index_wrapper_super::value_type; - using Super::deref; auto static constexpr dimension = ParticlesSuper::dimension; bool static constexpr is_const = std::is_const_v>; @@ -1078,11 +1098,11 @@ struct PerCellParticles::index_wrapper : public index_wrapper_su pc_particles_ptr{pc_particles}, idx{idx_} { - PHARE_ASSERT(deref().iCell()[0] > -10 and deref().iCell()[0] < 1000); // bad memory + PHARE_ASSERT((**this).iCell()[0] > -10 and (**this).iCell()[0] < 1000); // bad memory if constexpr (dimension > 1) - PHARE_ASSERT(deref().iCell()[1] > -10 and deref().iCell()[1] < 1000); // bad memory + PHARE_ASSERT((**this).iCell()[1] > -10 and (**this).iCell()[1] < 1000); // bad memory if constexpr (dimension > 2) - PHARE_ASSERT(deref().iCell()[2] > -10 and deref().iCell()[2] < 1000); // bad memory + PHARE_ASSERT((**this).iCell()[2] > -10 and (**this).iCell()[2] < 1000); // bad memory } auto& c() const _PHARE_ALL_FN_ { return cell(pc_particles_ptr, idx); } @@ -1096,7 +1116,20 @@ struct PerCellParticles::index_wrapper : public index_wrapper_su auto icell_changer(std::array const& newcell) _PHARE_ALL_FN_ { - pc_particles_ptr->icell_changer(deref(), c(), i(), newcell); + pc_particles_ptr->icell_changer(**this, c(), i(), newcell); + } + + + Super& super() _PHARE_ALL_FN_ { return *this; } + Super const& super() const _PHARE_ALL_FN_ { return *this; } + + auto& operator*() _PHARE_ALL_FN_ { return *super(); } + auto& operator*() const _PHARE_ALL_FN_ { return *super(); } + + Particle copy() const _PHARE_ALL_FN_ + { + return {(**this).weight(), (**this).charge(), (**this).iCell(), (**this).delta(), + (**this).v()}; } diff --git a/src/core/data/particles/arrays/particle_array_soa.hpp b/src/core/data/particles/arrays/particle_array_soa.hpp index 8963c0b56..da53dd40f 100644 --- a/src/core/data/particles/arrays/particle_array_soa.hpp +++ b/src/core/data/particles/arrays/particle_array_soa.hpp @@ -518,11 +518,11 @@ class SoAParticles : public Super_ auto operator[](std::size_t const& s) const _PHARE_ALL_FN_ { -#if __has_include() - return detail::SoAZipConstParticle(*this, s); -#else + // #if __has_include() + // return detail::SoAZipConstParticle(*this, s); + // #else return copy(s); -#endif // __has_include() + // #endif // __has_include() } }; diff --git a/src/core/data/particles/arrays/particle_array_soa_thrust.hpp b/src/core/data/particles/arrays/particle_array_soa_thrust.hpp index 07d01d73c..f891a1ca0 100644 --- a/src/core/data/particles/arrays/particle_array_soa_thrust.hpp +++ b/src/core/data/particles/arrays/particle_array_soa_thrust.hpp @@ -156,40 +156,48 @@ struct SoAZipConstParticle std::declval()))>; SoAZipConstParticle(SoAParticles_t& ps, std::size_t const& i) _PHARE_ALL_FN_ - : it{SoAIteratorAdaptor::make(ps, i)}, - ref{weight(), charge(), iCell(), delta(), v()} + : it{SoAIteratorAdaptor::make(ps, i)} /*, + ref{weight(), charge(), iCell(), delta(), v()}*/ { } - auto& charge() _PHARE_ALL_FN_ { return SoAIteratorAdaptor::charge(*it); } + auto& charge() const _PHARE_ALL_FN_ { return SoAIteratorAdaptor::charge(*it); } - auto& weight() _PHARE_ALL_FN_ { return SoAIteratorAdaptor::weight(*it); } auto& weight() const _PHARE_ALL_FN_ { return SoAIteratorAdaptor::weight(*it); } - - auto& iCell() _PHARE_ALL_FN_ { return SoAIteratorAdaptor::iCell(*it); } auto& iCell() const _PHARE_ALL_FN_ { return SoAIteratorAdaptor::iCell(*it); } - auto& delta() _PHARE_ALL_FN_ { return SoAIteratorAdaptor::delta(*it); } auto& delta() const _PHARE_ALL_FN_ { return SoAIteratorAdaptor::delta(*it); } - - - auto& v() _PHARE_ALL_FN_ { return SoAIteratorAdaptor::v(*it); } auto& v() const _PHARE_ALL_FN_ { return SoAIteratorAdaptor::v(*it); } - auto& operator*() _PHARE_ALL_FN_ { return ref; } - auto& operator*() const _PHARE_ALL_FN_ { return ref; } + // auto& operator*() _PHARE_ALL_FN_ { return ref; } + // auto& operator*() const _PHARE_ALL_FN_ { return ref; } Iterator it; - SoAParticle_crt ref; + // SoAParticle_crt ref; }; - } // namespace PHARE::core::detail namespace PHARE::core { +template +struct SoAZipParticle_t +{ + bool static constexpr is_const + = _is_const || std::is_const_v>; + + using value_type = std::conditional_t, + detail::SoAZipParticle>; +}; + +template +auto particle_zip_iterator(Particles& ps, std::size_t const i) +{ + return typename SoAZipParticle_t::value_type{ps, i}; +} + template auto partitionner(detail::SoAIteratorAdaptor& begin, detail::SoAIteratorAdaptor& end, Box const& box) diff --git a/src/core/def/thrust.hpp b/src/core/def/thrust.hpp index d2b95e4a9..b15828ebc 100644 --- a/src/core/def/thrust.hpp +++ b/src/core/def/thrust.hpp @@ -3,23 +3,28 @@ #if __has_include() -#include #define PHARE_HAVE_THRUST 1 #define PHARE_WITH_THRUST(...) __VA_ARGS__ +#define PHARE_WITH_THRUST_ELSE(...) #define PHARE_WITH_THRUST_ELSE_THROW(...) __VA_ARGS__ -#else // !__has_include(...) +#else // !__has_include() #define PHARE_HAVE_THRUST 0 #define PHARE_WITH_THRUST(...) +#define PHARE_WITH_THRUST_ELSE(...) __VA_ARGS__ #define PHARE_WITH_THRUST_ELSE_THROW(...) throw std::runtime_error("Thrust not found!"); -#endif // __has_include(...) +#endif // __has_include() + #if PHARE_HAVE_THRUST + #include #include +#include + #endif // PHARE_HAVE_THRUST #endif /* PHARE_CORE_DEF_THRUST_HPP */ diff --git a/src/core/numerics/interpolator/interpolating.hpp b/src/core/numerics/interpolator/interpolating.hpp index 64e59db10..d06296b77 100644 --- a/src/core/numerics/interpolator/interpolating.hpp +++ b/src/core/numerics/interpolator/interpolating.hpp @@ -49,8 +49,8 @@ class Interpolating static_assert(atomic_ops, "GPU must be atomic"); PHARE_WITH_MKN_GPU( mkn::gpu::GDLauncher{particles.size()}([=] _PHARE_ALL_FN_() mutable { - Interpolator_t{}.particleToMesh(particles[mkn::gpu::idx()], density, flux, - layout, coef); + auto it = particles[mkn::gpu::idx()]; + Interpolator_t{}.particleToMesh(*it, density, flux, layout, coef); }); // ) } diff --git a/src/core/numerics/pusher/boris_simpler.hpp b/src/core/numerics/pusher/boris_simpler.hpp index 054051054..2f9d4da3f 100644 --- a/src/core/numerics/pusher/boris_simpler.hpp +++ b/src/core/numerics/pusher/boris_simpler.hpp @@ -276,13 +276,14 @@ class SimpleBorisPusher auto per_particle = [=] _PHARE_ALL_FN_() mutable { Interpolator interp; - auto particle = view[mkn::gpu::idx()]; + auto it = view[mkn::gpu::idx()]; + auto& particle = *it; if constexpr (accelerate) boris_accelerate(particle, interp.m2p(particle, em, layout), dto2m_); auto const& newCell = advancePosition_(particle, halfDtOverDl); if (!array_equals(newCell, particle.iCell())) { - particle.icell_changer(newCell); + it.icell_changer(newCell); particle.iCell() = newCell; } }; diff --git a/src/core/numerics/pusher/multi_boris.hpp b/src/core/numerics/pusher/multi_boris.hpp index ec63cc2ba..c8b397c07 100644 --- a/src/core/numerics/pusher/multi_boris.hpp +++ b/src/core/numerics/pusher/multi_boris.hpp @@ -29,6 +29,7 @@ #include "core/data/particles/arrays/particle_array_soa.hpp" + namespace PHARE::core::detail { auto static const multi_boris_threads = get_env_as("PHARE_ASYNC_THREADS", std::size_t{5}); @@ -54,11 +55,9 @@ struct MultiBoris using ParticleArray_v = typename ParticleArray_t::view_t; using Box_t = Box; using Boxes_t = std::vector; - // = decltype(*(*views[0].ions).getRunTimeResourcesViewList()[0].domainParticles()); - using Particles_ptrs = std::vector; - // using StreamLauncher = gpu::BoxStreamLauncher; - using StreamLauncher = gpu::ThreadedBoxStreamLauncher; - // using StreamLauncher = mkn::gpu::StreamLauncher<>; + using Particles_ptrs = std::vector; + using StreamLauncher = gpu::ThreadedBoxStreamLauncher; + static auto _particles(ModelViews& views) { @@ -212,42 +211,11 @@ class MultiBorisPusher } }; - auto per_ghost_particle = [=] _PHARE_DEV_FN_(auto const& i) mutable { - auto const& dto2m = dto2mspp[i]; - auto const& layout = layoutps[i]; - auto& view = pps[i]; - auto particle_iterator = view[mkn::gpu::idx()]; - auto& particle = particle_iterator.deref(); - auto const og_iCell = particle.iCell(); - - particle.iCell() = advancePosition_(particle, halfDtOverDl[i]); - { - Interpolator interp; - boris_accelerate(particle, interp.m2p(particle, emps[i], layout), dto2m); - } - particle.iCell() = advancePosition_(particle, halfDtOverDl[i]); - - if (!array_equals(particle.iCell(), og_iCell)) - view.icell_changer(particle, particle_iterator.c(), particle_iterator.i(), - particle.iCell()); - }; - auto& streamer = in.streamer; auto ip = ∈ // used in lambdas, copy address! NO REF! - // if constexpr (any_in(ParticleArray_v::layout_mode, LayoutMode::AoSPC, LayoutMode::SoAPC)) - streamer.host([ip = ip](auto const i) mutable { - if (ip->particles[i]->size() == 0 || ip->particle_type[i] == 0) - return; - ip->particles[i]->reset_index_wrapper_map(); - ip->particles[i]->reset_p2c(ip->pviews[i]); - }); - - streamer.async_dev_idx(3, 0, [=] _PHARE_DEV_FN_(auto const i) mutable { per_particle(i); }); - streamer.template async_dev_not_idx<2>( - 3, 0, [=] _PHARE_DEV_FN_(auto const i) mutable { per_ghost_particle(i); }); - + streamer.async_dev([=] _PHARE_DEV_FN_(auto const i) mutable { per_particle(i); }); streamer.host([ip = ip](auto const i) mutable { constexpr static std::uint32_t PHASE = 1; @@ -279,6 +247,4 @@ class MultiBorisPusher } // namespace PHARE::core - - #endif /* PHARE_CORE_PUSHER_MULTI_BORIS_2_HPP */ diff --git a/src/core/numerics/pusher/multi_boris.ok.hpp b/src/core/numerics/pusher/multi_boris.no.hpp similarity index 80% rename from src/core/numerics/pusher/multi_boris.ok.hpp rename to src/core/numerics/pusher/multi_boris.no.hpp index 70b1768b8..acb3e37ac 100644 --- a/src/core/numerics/pusher/multi_boris.ok.hpp +++ b/src/core/numerics/pusher/multi_boris.no.hpp @@ -28,6 +28,13 @@ #include "core/data/particles/arrays/particle_array_soa.hpp" + +namespace PHARE::core::detail +{ +auto static const multi_boris_threads = get_env_as("PHARE_ASYNC_THREADS", std::size_t{5}); + +} // namespace PHARE::core::detail + namespace PHARE::core { @@ -47,11 +54,9 @@ struct MultiBoris using ParticleArray_v = typename ParticleArray_t::view_t; using Box_t = Box; using Boxes_t = std::vector; - // = decltype(*(*views[0].ions).getRunTimeResourcesViewList()[0].domainParticles()); - using Particles_ptrs = std::vector; - // using StreamLauncher = gpu::BoxStreamLauncher; - using StreamLauncher = gpu::ThreadedBoxStreamLauncher; - // using StreamLauncher = mkn::gpu::StreamLauncher<>; + using Particles_ptrs = std::vector; + using StreamLauncher = gpu::ThreadedBoxStreamLauncher; + static auto _particles(ModelViews& views) { @@ -117,7 +122,7 @@ struct MultiBoris gpu::Vec_t> halfdt; gpu::Vec_t dto2ms; - StreamLauncher streamer{particles, boxes, 5}; + StreamLauncher streamer{particles, boxes, detail::multi_boris_threads}; auto static mesh(std::array const& ms, double const& ts) { @@ -205,11 +210,42 @@ class MultiBorisPusher } }; + auto per_ghost_particle = [=] _PHARE_DEV_FN_(auto const& i) mutable { + auto const& dto2m = dto2mspp[i]; + auto const& layout = layoutps[i]; + auto& view = pps[i]; + auto particle_iterator = view[mkn::gpu::idx()]; + auto& particle = *particle_iterator; + auto const og_iCell = particle.iCell(); + + particle.iCell() = advancePosition_(particle, halfDtOverDl[i]); + { + Interpolator interp; + boris_accelerate(particle, interp.m2p(particle, emps[i], layout), dto2m); + } + particle.iCell() = advancePosition_(particle, halfDtOverDl[i]); + + if (!array_equals(particle.iCell(), og_iCell)) + view.icell_changer(particle, particle_iterator.c(), particle_iterator.i(), + particle.iCell()); + }; + auto& streamer = in.streamer; auto ip = ∈ // used in lambdas, copy address! NO REF! - streamer.async_dev([=] _PHARE_DEV_FN_(auto const i) mutable { per_particle(i); }); + // if constexpr (any_in(ParticleArray_v::layout_mode, LayoutMode::AoSPC, LayoutMode::SoAPC)) + streamer.host([ip = ip](auto const i) mutable { + if (ip->particles[i]->size() == 0 || ip->particle_type[i] == 0) + return; + ip->particles[i]->reset_index_wrapper_map(); + ip->particles[i]->reset_p2c(ip->pviews[i]); + }); + + streamer.async_dev_idx(3, 0, [=] _PHARE_DEV_FN_(auto const i) mutable { per_particle(i); }); + streamer.template async_dev_not_idx<2>( + 3, 0, [=] _PHARE_DEV_FN_(auto const i) mutable { per_ghost_particle(i); }); + streamer.host([ip = ip](auto const i) mutable { constexpr static std::uint32_t PHASE = 1; @@ -241,4 +277,6 @@ class MultiBorisPusher } // namespace PHARE::core + + #endif /* PHARE_CORE_PUSHER_MULTI_BORIS_2_HPP */ diff --git a/tests/core/data/particles/test_particles_selecting.cpp b/tests/core/data/particles/test_particles_selecting.cpp index 5a953aefb..eb59bb17e 100644 --- a/tests/core/data/particles/test_particles_selecting.cpp +++ b/tests/core/data/particles/test_particles_selecting.cpp @@ -16,7 +16,7 @@ namespace PHARE::core { -auto static const bytes = get_env_as("PHARE_GPU_BYTES", std::uint64_t{8000000000}); +auto static const bytes = get_env_as("PHARE_GPU_BYTES", std::uint64_t{500000000}); auto static const cells = get_env_as("PHARE_CELLS", std::uint32_t{3}); auto static const ppc = get_env_as("PHARE_PPC", std::size_t{1}); bool static const premain = []() { @@ -30,7 +30,6 @@ bool static const premain = []() { PHARE_WITH_PHLOP( // PHARE_LOG_LINE_STR("cells: " << cells); // PHARE_LOG_LINE_STR("ppc : " << ppc); // - PHARE_LOG_LINE_STR("seed : " << seed); using namespace PHARE; // using namespace std::literals; diff --git a/tests/core/numerics/ion_updater/test_multi_updater.cpp b/tests/core/numerics/ion_updater/test_multi_updater.cpp index 901c23295..46973cebb 100644 --- a/tests/core/numerics/ion_updater/test_multi_updater.cpp +++ b/tests/core/numerics/ion_updater/test_multi_updater.cpp @@ -9,7 +9,7 @@ #include "core/numerics/ion_updater/ion_updater_def.hpp" -#define PHARE_UNDEF_ASSERT +// #define PHARE_UNDEF_ASSERT #define PHARE_SKIP_MPI_IN_CORE #include diff --git a/tests/core/numerics/ion_updater/test_updater_pp_main.cpp b/tests/core/numerics/ion_updater/test_updater_pp_main.cpp index 73ce13c40..f525620e8 100644 --- a/tests/core/numerics/ion_updater/test_updater_pp_main.cpp +++ b/tests/core/numerics/ion_updater/test_updater_pp_main.cpp @@ -28,7 +28,7 @@ void PrintTo(ParticleArray const& arr, std::ostream* os) // assert(arr.size()); *os << arr; } -auto static const bytes = 1024ull * 1024ull * 1024ull * 10; // == 10GB +auto static const bytes = get_env_as("PHARE_GPU_BYTES", std::uint64_t{500000000}); auto static const cells = get_env_as("PHARE_CELLS", std::uint32_t{3}); auto static const ppc = get_env_as("PHARE_PPC", std::size_t{3}); auto static const seed = get_env_as("PHARE_SEED", std::size_t{1012}); @@ -46,7 +46,7 @@ bool static const premain = []() { using namespace PHARE; // using namespace std::literals; if (auto e = core::get_env("PHARE_SCOPE_TIMING", "false"); e == "1" || e == "true") - phlop::ScopeTimerMan::INSTANCE() + phlop::threaded::ScopeTimerMan::INSTANCE() .file_name(".phare_times.0.txt") // .force_strings() // .headers("fn"s, "dim"s, "layout"s, "alloc"s, "storage"s, "time"s) @@ -143,8 +143,7 @@ auto from_ions(GridLayout_t const& layout, Ions const& from) auto& ions = *ions_p; EXPECT_EQ(ions.populations[0].particles.domain_particles.size(), 0); - auto _add_particles_from = [&](auto& src, auto& dst) - { + auto _add_particles_from = [&](auto& src, auto& dst) { ParticleArrayService::reserve_ppc_in(dst, ppc); add_particles_from(src, dst); }; @@ -275,35 +274,35 @@ struct IonUpdaterPPTest : public ::testing::Test // clang-format off using Permutations_t = testing::Types< // ! notice commas ! - // TestParam<1, LayoutMode::AoS> // 0 - // ,TestParam<1, LayoutMode::AoSPC> // 1 + TestParam<1, LayoutMode::AoS> // 0 + ,TestParam<1, LayoutMode::AoSPC> // 1 PHARE_WITH_MKN_GPU( - // ,TestParam<1, LayoutMode::SoA> // 2 - // ,TestParam<1, LayoutMode::AoS, AllocatorMode::GPU_UNIFIED> // 3 - // ,TestParam<1, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED> // 4 - // ,TestParam<1, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/1> // 5 - // ,TestParam<1, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/2> - // ,TestParam<1, LayoutMode::SoA, AllocatorMode::GPU_UNIFIED> + ,TestParam<1, LayoutMode::SoA> // 2 + ,TestParam<1, LayoutMode::AoS, AllocatorMode::GPU_UNIFIED> // 3 + ,TestParam<1, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED> // 4 + ,TestParam<1, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/1> // 5 + ,TestParam<1, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/2> + ,TestParam<1, LayoutMode::SoA, AllocatorMode::GPU_UNIFIED> ) - // ,TestParam<2, LayoutMode::AoS> - // ,TestParam<2, LayoutMode::AoSPC> + ,TestParam<2, LayoutMode::AoS> + ,TestParam<2, LayoutMode::AoSPC> PHARE_WITH_MKN_GPU( - // ,TestParam<2, LayoutMode::SoA> - // ,TestParam<2, LayoutMode::AoS, AllocatorMode::GPU_UNIFIED> - // ,TestParam<2, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED> - // ,TestParam<2, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/1> // 13 - // ,TestParam<2, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/2> // 14 - // ,TestParam<2, LayoutMode::SoA, AllocatorMode::GPU_UNIFIED> + ,TestParam<2, LayoutMode::SoA> + ,TestParam<2, LayoutMode::AoS, AllocatorMode::GPU_UNIFIED> + ,TestParam<2, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED> + ,TestParam<2, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/1> // 13 + ,TestParam<2, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/2> // 14 + ,TestParam<2, LayoutMode::SoA, AllocatorMode::GPU_UNIFIED> ) - // ,TestParam<3, LayoutMode::AoS> - // ,TestParam<3, LayoutMode::AoSPC> + ,TestParam<3, LayoutMode::AoS> + ,TestParam<3, LayoutMode::AoSPC> PHARE_WITH_MKN_GPU( - // ,TestParam<3, LayoutMode::SoA> - // ,TestParam<3, LayoutMode::AoS, AllocatorMode::GPU_UNIFIED> - // ,TestParam<3, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED> - // ,TestParam<3, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/1> // - /*,*/TestParam<3, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/2> // 22 - // ,TestParam<3, LayoutMode::SoA, AllocatorMode::GPU_UNIFIED> + ,TestParam<3, LayoutMode::SoA> + ,TestParam<3, LayoutMode::AoS, AllocatorMode::GPU_UNIFIED> + ,TestParam<3, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED> + ,TestParam<3, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/1> // + ,TestParam<3, LayoutMode::AoSPC, AllocatorMode::GPU_UNIFIED, /*impl=*/2> // 22 + ,TestParam<3, LayoutMode::SoA, AllocatorMode::GPU_UNIFIED> ) >; // clang-format on @@ -324,9 +323,8 @@ TYPED_TEST(IonUpdaterPPTest, updater) int main(int argc, char** argv) { - // assert(phlop::ScopeTimerMan::INSTANCE().active); ::testing::InitGoogleTest(&argc, argv); auto r = RUN_ALL_TESTS(); - PHARE_WITH_PHLOP(phlop::ScopeTimerMan::reset()); + PHARE_WITH_PHLOP(phlop::threaded::ScopeTimerMan::reset()); return r; } diff --git a/tools/mkn_profile.py b/tools/mkn_profile.py index 6ca42ba8e..bdf02b56c 100644 --- a/tools/mkn_profile.py +++ b/tools/mkn_profile.py @@ -6,14 +6,10 @@ from pathlib import Path import shutil -PATCHES = [1, 2, 3, 4, 5, 10] -CELLS = [5, 6, 7, 8, 9, 10, 12, 15, 20] -PPC = [10, 20, 50, 100] - -PATCHES = [10] -CELLS = [5] -PPC = [5, 10] +PATCHES = [1] +CELLS = [10] +PPC = [50] permutables = [ ("patches", PATCHES), @@ -34,6 +30,15 @@ 7: "Deposit", } +fn_strings = { + 0: "Boris::move_domain", + 1: "sync", + 2: "Group_barrier", + 3: "Domain_insert", + 4: "Group_barrier", + 5: "Deposit", +} + def run_permutation(patches, cells, ppc): times_dir = f"{PHARE_ASYNC_TIMES}/{patches}/{cells}/{ppc}" @@ -77,11 +82,8 @@ def plot_fn(times, fn): import matplotlib.pyplot as plt x_axis = [0.1, 0.2, 0.3] - fig, ax = plt.subplots(figsize=(8.0, 8.0)) - fn_0_times = [] - for times_dir, times_per_type_tuple in times.items(): times_per_type, patches, cells, ppc = times_per_type_tuple for type_id, bits_list in times_per_type.items(): @@ -110,13 +112,11 @@ def plot_fn(times, fn): shadow=True, ) ax.set_title(fn_strings[fn]) - - plt.ylabel("nanoseconds") - ax.set_xticks(x_axis) ax.set_xticklabels( ["domain", "patchghost", "levelghost"], rotation="vertical", fontsize=18 ) + plt.ylabel("nanoseconds") fig.savefig(f"profile_plot.{fn}.png")