From 2ec9e350fb923f9939bb3932695d79f7ff0070aa Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 19 Feb 2026 17:12:45 -0500 Subject: [PATCH 1/3] mpi unit tests now include all other tests + fixed some MPI tests --- cmake/tests.cmake | 15 +- src/framework/tests/CMakeLists.txt | 22 ++- .../tests/{comm_mpi.cpp => comm-mpi.cpp} | 0 src/framework/tests/comm-nompi.cpp | 148 ++++++++++++++ src/framework/tests/parameters.cpp | 9 +- src/framework/tests/particles.cpp | 21 +- src/global/tests/CMakeLists.txt | 1 - src/global/tests/param_container.cpp | 8 +- src/metrics/tests/minkowski.cpp | 39 ++-- src/output/tests/CMakeLists.txt | 2 +- src/output/tests/writer-mpi.cpp | 183 +++++++++--------- 11 files changed, 301 insertions(+), 147 deletions(-) rename src/framework/tests/{comm_mpi.cpp => comm-mpi.cpp} (100%) create mode 100644 src/framework/tests/comm-nompi.cpp diff --git a/cmake/tests.cmake b/cmake/tests.cmake index 44bbce44..bc397ea5 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -5,16 +5,11 @@ set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) set(TEST_DIRECTORIES "") -if(NOT ${mpi}) - list(APPEND TEST_DIRECTORIES global) - list(APPEND TEST_DIRECTORIES metrics) - list(APPEND TEST_DIRECTORIES kernels) - list(APPEND TEST_DIRECTORIES archetypes) - list(APPEND TEST_DIRECTORIES framework) -elseif(${mpi} AND ${output}) - list(APPEND TEST_DIRECTORIES framework) -endif() - +list(APPEND TEST_DIRECTORIES global) +list(APPEND TEST_DIRECTORIES metrics) +list(APPEND TEST_DIRECTORIES kernels) +list(APPEND TEST_DIRECTORIES archetypes) +list(APPEND TEST_DIRECTORIES framework) list(APPEND TEST_DIRECTORIES output) foreach(test_dir IN LISTS TEST_DIRECTORIES) diff --git a/src/framework/tests/CMakeLists.txt b/src/framework/tests/CMakeLists.txt index 92e327d8..3c6be0f6 100644 --- a/src/framework/tests/CMakeLists.txt +++ b/src/framework/tests/CMakeLists.txt @@ -35,17 +35,23 @@ function(gen_test title is_parallel) endif() endfunction() +gen_test(parameters false) if(${mpi}) - gen_test(comm_mpi true) + gen_test(particles true) else() - gen_test(parameters false) gen_test(particles false) - gen_test(fields false) - gen_test(grid_mesh false) - if(${DEBUG}) - gen_test(metadomain false) - endif() - gen_test(comm_nompi false) +endif() + +gen_test(fields false) +gen_test(grid_mesh false) +if(${DEBUG}) + gen_test(metadomain false) +endif() + +if(${mpi}) + gen_test(comm-mpi true) +else() + gen_test(comm-nompi false) endif() # this test is only run manually to ensure ... diff --git a/src/framework/tests/comm_mpi.cpp b/src/framework/tests/comm-mpi.cpp similarity index 100% rename from src/framework/tests/comm_mpi.cpp rename to src/framework/tests/comm-mpi.cpp diff --git a/src/framework/tests/comm-nompi.cpp b/src/framework/tests/comm-nompi.cpp new file mode 100644 index 00000000..c7646ef0 --- /dev/null +++ b/src/framework/tests/comm-nompi.cpp @@ -0,0 +1,148 @@ +#include "framework/domain/comm_nompi.hpp" + +#include "enums.h" +#include "global.h" + +#include "arch/directions.h" +#include "arch/kokkos_aliases.h" +#include "utils/numeric.h" + +#include +#include + +using namespace ntt; + +auto main(int argc, char* argv[]) -> int { + Kokkos::initialize(argc, argv); + + try { + const std::size_t nx1 = 15, nx2 = 15; + ndfield_t fld { "fld", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; + ndfield_t buff { "buff", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; + + Kokkos::parallel_for( + "Fill", + CreateRangePolicy({ 0, 0 }, + { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), + Lambda(index_t i1, index_t i2) { + if ((i1 >= 2 * N_GHOSTS) and (i1 < nx1) and (i2 >= 2 * N_GHOSTS) and + (i2 < nx2)) { + fld(i1, i2, 0) = 4.0; + fld(i1, i2, 1) = 12.0; + fld(i1, i2, 2) = 20.0; + } else if ( + ((i1 < 2 * N_GHOSTS or i1 >= nx1) and (i2 >= 2 * N_GHOSTS and i2 < nx2)) or + ((i2 < 2 * N_GHOSTS or i2 >= nx2) and (i1 >= 2 * N_GHOSTS and i1 < nx1))) { + fld(i1, i2, 0) = 2.0; + fld(i1, i2, 1) = 6.0; + fld(i1, i2, 2) = 10.0; + } else { + fld(i1, i2, 0) = 1.0; + fld(i1, i2, 1) = 3.0; + fld(i1, i2, 2) = 5.0; + } + }); + Kokkos::deep_copy(buff, ZERO); + + const auto send_slice = std::vector { + { nx1 + N_GHOSTS, nx1 + 2 * N_GHOSTS }, + { nx2 + N_GHOSTS, nx2 + 2 * N_GHOSTS } + }; + const auto recv_slice = std::vector { + { N_GHOSTS, 2 * N_GHOSTS }, + { N_GHOSTS, 2 * N_GHOSTS } + }; + const auto comp_slice = range_tuple_t(cur::jx1, cur::jx3 + 1); + + const auto i_min = [](in c) { + switch (c) { + case in::x1: + return (std::size_t)N_GHOSTS; + case in::x2: + return (std::size_t)N_GHOSTS; + case in::x3: + return (std::size_t)N_GHOSTS; + } + return (std::size_t)0; + }; + const auto i_max = [](in c) { + switch (c) { + case in::x1: + return nx1 + N_GHOSTS; + case in::x2: + return nx2 + N_GHOSTS; + case in::x3: + return (std::size_t)0; + } + return (std::size_t)0; + }; + const in components[] = { in::x1, in::x2, in::x3 }; + for (auto& direction : dir::Directions::all) { + auto send_slice = std::vector {}; + auto recv_slice = std::vector {}; + for (std::size_t d { 0 }; d < direction.size(); ++d) { + const auto c = components[d]; + const auto dir = direction[d]; + if (dir == 0) { + send_slice.emplace_back(i_min(c) - N_GHOSTS, i_max(c) + N_GHOSTS); + } else if (dir == 1) { + send_slice.emplace_back(i_max(c) - N_GHOSTS, i_max(c) + N_GHOSTS); + } else { + send_slice.emplace_back(i_min(c) - N_GHOSTS, i_min(c) + N_GHOSTS); + } + if (-dir == 0) { + recv_slice.emplace_back(i_min(c) - N_GHOSTS, i_max(c) + N_GHOSTS); + } else if (-dir == 1) { + recv_slice.emplace_back(i_max(c) - N_GHOSTS, i_max(c) + N_GHOSTS); + } else { + recv_slice.emplace_back(i_min(c) - N_GHOSTS, i_min(c) + N_GHOSTS); + } + } + comm::CommunicateField((unsigned int)0, + fld, + buff, + 0, + 0, + 0, + 0, + send_slice, + recv_slice, + comp_slice, + true); + } + // add buffers + Kokkos::parallel_for( + "Fill", + CreateRangePolicy({ 0, 0 }, + { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), + Lambda(index_t i1, index_t i2) { + for (auto k { 0 }; k < 3; ++k) { + fld(i1, i2, k) += buff(i1, i2, k); + } + }); + + // check + Kokkos::parallel_for( + "Fill", + CreateRangePolicy({ 0, 0 }, + { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), + Lambda(index_t i1, index_t i2) { + if (fld(i1, i2, 0) != 4.0) { + raise::KernelError(HERE, "fld0 wrong after comm"); + } + if (fld(i1, i2, 1) != 12.0) { + raise::KernelError(HERE, "fld1 wrong after comm"); + } + if (fld(i1, i2, 2) != 20.0) { + raise::KernelError(HERE, "fld2 wrong after comm"); + } + }); + } catch (std::exception& e) { + std::cerr << "Exception: " << e.what() << std::endl; + Kokkos::finalize(); + return 1; + } + + Kokkos::finalize(); + return 0; +} diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 3b59a332..238db11e 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -2,14 +2,15 @@ #include "defaults.h" #include "enums.h" +#include "global.h" #include "utils/comparators.h" #include "utils/error.h" -#include #include "framework/containers/species.h" #include +#include #include @@ -262,7 +263,7 @@ void assert_equal(const T& a, const T& b, const std::string& msg) { } auto main(int argc, char* argv[]) -> int { - Kokkos::initialize(argc, argv); + ntt::GlobalInitialize(argc, argv); try { using namespace ntt; @@ -656,11 +657,11 @@ auto main(int argc, char* argv[]) -> int { } catch (std::exception& err) { std::cerr << err.what() << std::endl; - Kokkos::finalize(); + ntt::GlobalFinalize(); return -1; } - Kokkos::finalize(); + ntt::GlobalFinalize(); return 0; } diff --git a/src/framework/tests/particles.cpp b/src/framework/tests/particles.cpp index cce6af28..20253b1c 100644 --- a/src/framework/tests/particles.cpp +++ b/src/framework/tests/particles.cpp @@ -15,6 +15,7 @@ void testParticles( float m, float ch, std::size_t maxnpart, + timestep_t spatial_sorting_interval, ntt::ParticlePusherFlags pusher, bool use_tracking, ntt::RadiativeDragFlags radiative_drag_flags, @@ -27,6 +28,7 @@ void testParticles( m, ch, maxnpart, + spatial_sorting_interval, pusher, use_tracking, radiative_drag_flags, @@ -38,7 +40,11 @@ void testParticles( raise::ErrorIf(p.mass() != m, "Mass mismatch", HERE); raise::ErrorIf(p.charge() != ch, "Charge mismatch", HERE); raise::ErrorIf(p.maxnpart() != maxnpart, "Max number of particles mismatch", HERE); + raise::ErrorIf(p.spatial_sorting_interval() != spatial_sorting_interval, + "Spatial sorting interval mismatch", + HERE); raise::ErrorIf(p.pusher() != pusher, "Pusher mismatch", HERE); + raise::ErrorIf(p.use_tracking() != use_tracking, "Use Tracking mismatch", HERE); raise::ErrorIf(p.radiative_drag_flags() != radiative_drag_flags, "Radiative drag mismatch", HERE); @@ -114,7 +120,7 @@ void testParticles( } auto main(int argc, char** argv) -> int { - Kokkos::initialize(argc, argv); + ntt::GlobalInitialize(argc, argv); try { using namespace ntt; testParticles(1, @@ -122,6 +128,7 @@ auto main(int argc, char** argv) -> int { 1.0, -1.0, 100, + 0u, ParticlePusher::BORIS, false, RadiativeDrag::SYNCHROTRON); @@ -129,19 +136,21 @@ auto main(int argc, char** argv) -> int { "p+", 100.0, -1.0, - 1000, + 100, + 1u, ParticlePusher::VAY, true, RadiativeDrag::SYNCHROTRON | RadiativeDrag::COMPTON, EmissionType::SYNCHROTRON, 2, - 1); + 2); testParticles(3, "ph", 0.0, 0.0, 100, + 12u, ParticlePusher::PHOTON, false, RadiativeDrag::NONE, @@ -152,6 +161,7 @@ auto main(int argc, char** argv) -> int { 1.0, 1.0, 100, + 123u, ParticlePusher::BORIS, true, RadiativeDrag::NONE, @@ -163,6 +173,7 @@ auto main(int argc, char** argv) -> int { 1.0, 1.0, 100, + 1234u, ParticlePusher::BORIS, false, RadiativeDrag::NONE, @@ -171,9 +182,9 @@ auto main(int argc, char** argv) -> int { 2); } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; - Kokkos::finalize(); + ntt::GlobalFinalize(); return 1; } - Kokkos::finalize(); + ntt::GlobalFinalize(); return 0; } diff --git a/src/global/tests/CMakeLists.txt b/src/global/tests/CMakeLists.txt index c9b47940..c206f85b 100644 --- a/src/global/tests/CMakeLists.txt +++ b/src/global/tests/CMakeLists.txt @@ -35,4 +35,3 @@ gen_test(comparators) gen_test(numeric) gen_test(param_container) gen_test(sorting) -# gen_test(traits) diff --git a/src/global/tests/param_container.cpp b/src/global/tests/param_container.cpp index 60c5495c..2ad75e9e 100644 --- a/src/global/tests/param_container.cpp +++ b/src/global/tests/param_container.cpp @@ -1,6 +1,7 @@ #include "utils/param_container.h" #include "enums.h" +#include "global.h" #include #include @@ -13,8 +14,9 @@ void errorIf(bool condition, const std::string& message) { } } -auto main() -> int { +auto main(int argc, char* argv[]) -> int { using namespace ntt; + GlobalInitialize(argc, argv); auto p = prm::Parameters(); const auto d_vec = std::vector { true, true, false }; @@ -84,7 +86,9 @@ auto main() -> int { } catch (std::exception& exc) { std::cerr << exc.what() << "\n"; + GlobalFinalize(); return 1; } + GlobalFinalize(); return 0; -} \ No newline at end of file +} diff --git a/src/metrics/tests/minkowski.cpp b/src/metrics/tests/minkowski.cpp index 2386d65d..d225919c 100644 --- a/src/metrics/tests/minkowski.cpp +++ b/src/metrics/tests/minkowski.cpp @@ -19,7 +19,8 @@ void errorIf(bool condition, const std::string& message) { inline static constexpr auto epsilon = std::numeric_limits::epsilon(); template -Inline auto equal(const coord_t& a, const coord_t& b, real_t acc = ONE) -> bool { +Inline auto equal(const coord_t& a, const coord_t& b, real_t acc = ONE) + -> bool { for (auto d { 0u }; d < D; ++d) { if (not cmp::AlmostEqual(a[d], b[d], epsilon * acc)) { Kokkos::printf("%d : %.12f != %.12f\n", d, a[d], b[d]); @@ -30,27 +31,11 @@ Inline auto equal(const coord_t& a, const coord_t& b, real_t acc = ONE) -> } auto main(int argc, char* argv[]) -> int { - Kokkos::initialize(argc, argv); + ntt::GlobalInitialize(argc, argv); try { using namespace ntt; using namespace metric; - { - // catch unequal dx error - try { - Minkowski( - { - 256, - 128, - 64 - }, - { { -2.0, 2.0 }, { -1.0, 1.0 }, { -1.0, 1.0 } }); - } catch (const std::exception& e) { - errorIf(std::string(e.what()).find("dx3 must be equal to dx1 in 3D") == - std::string::npos, - "unequal dx error not caught"); - } - } constexpr unsigned int nx = 128; constexpr unsigned int ny = 64; @@ -64,20 +49,20 @@ auto main(int argc, char* argv[]) -> int { { { -2.0, 2.0 }, { -1.0, 1.0 }, { -0.5, 0.5 } }); const auto dx = (real_t)(4.0 / nx); coord_t dummy { ZERO, ZERO, ZERO }; - errorIf(not cmp::AlmostEqual(M.dxMin(), dx / (real_t)math::sqrt(3.0)), + errorIf(not cmp::AlmostEqual_host(M.dxMin(), dx / (real_t)math::sqrt(3.0)), "dxMin not set correctly"); - errorIf(not cmp::AlmostEqual(M.h_<1, 1>(dummy), SQR(dx)), + errorIf(not cmp::AlmostEqual_host(M.h_<1, 1>(dummy), SQR(dx)), "h_11 not set correctly"); - errorIf(not cmp::AlmostEqual(M.h_<2, 2>(dummy), SQR(dx)), + errorIf(not cmp::AlmostEqual_host(M.h_<2, 2>(dummy), SQR(dx)), "h_22 not set correctly"); - errorIf(not cmp::AlmostEqual(M.h_<3, 3>(dummy), SQR(dx)), + errorIf(not cmp::AlmostEqual_host(M.h_<3, 3>(dummy), SQR(dx)), "h_33 not set correctly"); - errorIf(not cmp::AlmostEqual(M.sqrt_det_h(dummy), CUBE(dx)), + errorIf(not cmp::AlmostEqual_host(M.sqrt_det_h(dummy), CUBE(dx)), "sqrt_det_h not set correctly"); // coord transformations // code <-> phys <-> sph - unsigned long all_wrongs = 0; + unsigned long all_wrongs = 0u; Kokkos::parallel_reduce( "code-phys", CreateRangePolicy({ N_GHOSTS, N_GHOSTS, N_GHOSTS }, @@ -102,12 +87,12 @@ auto main(int argc, char* argv[]) -> int { math::atan2(x_Phys[1], -x_Phys[0]) }); }, all_wrongs); - errorIf(all_wrongs != 0, "code-phys-sph transformations failed"); + errorIf(all_wrongs != 0u, "code-phys-sph transformations failed"); } catch (std::exception& e) { std::cerr << e.what() << std::endl; - Kokkos::finalize(); + ntt::GlobalFinalize(); return 1; } - Kokkos::finalize(); + ntt::GlobalFinalize(); return 0; } diff --git a/src/output/tests/CMakeLists.txt b/src/output/tests/CMakeLists.txt index f6f460ae..aa138813 100644 --- a/src/output/tests/CMakeLists.txt +++ b/src/output/tests/CMakeLists.txt @@ -32,8 +32,8 @@ endfunction() gen_test(stats false) if(${output}) + gen_test(fields false) if(NOT ${mpi}) - gen_test(fields false) gen_test(writer-nompi false) else() gen_test(writer-mpi true) diff --git a/src/output/tests/writer-mpi.cpp b/src/output/tests/writer-mpi.cpp index 382f880c..61f9daaa 100644 --- a/src/output/tests/writer-mpi.cpp +++ b/src/output/tests/writer-mpi.cpp @@ -17,7 +17,7 @@ void cleanup() { namespace fs = std::filesystem; - fs::path tempfile_path { "test.bp" }; + fs::path tempfile_path { "test" }; fs::remove_all(tempfile_path); } @@ -26,8 +26,7 @@ void cleanup() { math::ceil(static_cast(a) / static_cast(b)))) auto main(int argc, char* argv[]) -> int { - Kokkos::initialize(argc, argv); - MPI_Init(&argc, &argv); + ntt::GlobalInitialize(argc, argv); int mpi_rank, mpi_size; MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); @@ -61,7 +60,7 @@ auto main(int argc, char* argv[]) -> int { { // write auto writer = out::Writer(); - writer.init(&adios, "BPFile", "test", false); + writer.init(&adios, "BPFile", "test"); writer.defineMeshLayout({ static_cast(mpi_size) * nx1 }, { static_cast(mpi_rank) * nx1 }, { nx1 }, @@ -92,90 +91,98 @@ auto main(int argc, char* argv[]) -> int { // read adios2::IO io = adios.DeclareIO("read-test"); io.SetEngine("BPFile"); - adios2::Engine reader = io.Open("test.bp", adios2::Mode::Read); - for (auto step = 0u; reader.BeginStep() == adios2::StepStatus::OK; ++step) { - raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, - "NGhosts is not correct", - HERE); - raise::ErrorIf(io.InquireAttribute("Dimension").Data()[0] != 1, - "Dimension is not correct", - HERE); - - timestep_t step_read; - simtime_t time_read; - - reader.Get(io.InquireVariable("Step"), - &step_read, - adios2::Mode::Sync); - reader.Get(io.InquireVariable("Time"), - &time_read, - adios2::Mode::Sync); - raise::ErrorIf(step_read != step, "Step is not correct", HERE); - raise::ErrorIf((float)time_read != (float)step * 0.1f, - "Time is not correct", - HERE); - - const auto l_size = nx1; - const auto l_offset = nx1 * mpi_rank; - - const double n = l_size; - const double d = dwn1; - const double l = l_offset; - const double f = math::ceil(l / d) * d - l; - - const auto first_cell = static_cast(f); - const auto l_size_dwn = static_cast(math::ceil((n - f) / d)); - const auto l_corner_dwn = static_cast(math::ceil(l / d)); - - array_t field_read {}; - int cntr = 0; - for (const auto& name : field_names) { - auto fieldVar = io.InquireVariable(name); - if (fieldVar) { - raise::ErrorIf(fieldVar.Shape().size() != 1, - fmt::format("%s is not 1D", name.c_str()), - HERE); - auto dims = fieldVar.Shape(); - std::size_t nx1_r = dims[0]; - raise::ErrorIf((nx1_r != CEILDIV(nx1 * mpi_size, dwn1)), - fmt::format("%s = %ld is not %d", - name.c_str(), - nx1_r, - CEILDIV(nx1 * mpi_size, dwn1)), - HERE); - - fieldVar.SetSelection( - adios2::Box({ l_corner_dwn }, { l_size_dwn })); - field_read = array_t(name, l_size_dwn); - auto field_read_h = Kokkos::create_mirror_view(field_read); - reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); - Kokkos::deep_copy(field_read, field_read_h); - - Kokkos::parallel_for( - "check", - CreateRangePolicy({ 0 }, { l_size_dwn }), - Lambda(index_t i1) { - if (not cmp::AlmostEqual( - field_read(i1), - field(i1 * dwn1 + first_cell + i1min, cntr))) { - Kokkos::printf("\n:::::::::::::::\nfield_read(%ld) = %f != " - "field(%ld, %d) = %f\n:::::::::::::::\n", - i1, - field_read(i1), - i1 * dwn1 + first_cell + i1min, - cntr, - field(i1 * dwn1 + first_cell + i1min, cntr)); - raise::KernelError(HERE, "Field is not read correctly"); - } - }); - } else { - raise::Error("Field not found", HERE); + + for (const auto time : { 0u, 1u }) { + namespace fs = std::filesystem; + adios2::Engine reader = io.Open( + fs::path("test") / fs::path("fields") / + fs::path(fmt::format("fields.%08u.bp", time)), + adios2::Mode::Read); + for (auto st = 0u; reader.BeginStep() == adios2::StepStatus::OK; ++st) { + raise::ErrorIf(io.InquireAttribute("NGhosts").Data()[0] != 0, + "NGhosts is not correct", + HERE); + raise::ErrorIf( + io.InquireAttribute("Dimension").Data()[0] != 1, + "Dimension is not correct", + HERE); + + timestep_t step_read; + simtime_t time_read; + + reader.Get(io.InquireVariable("Step"), + &step_read, + adios2::Mode::Sync); + reader.Get(io.InquireVariable("Time"), + &time_read, + adios2::Mode::Sync); + raise::ErrorIf(step_read != time, "Step is not correct", HERE); + raise::ErrorIf((float)time_read != (float)time * 0.1f, + "Time is not correct", + HERE); + + const auto l_size = nx1; + const auto l_offset = nx1 * mpi_rank; + + const double n = l_size; + const double d = dwn1; + const double l = l_offset; + const double f = math::ceil(l / d) * d - l; + + const auto first_cell = static_cast(f); + const auto l_size_dwn = static_cast(math::ceil((n - f) / d)); + const auto l_corner_dwn = static_cast(math::ceil(l / d)); + + array_t field_read {}; + int cntr = 0; + for (const auto& name : field_names) { + auto fieldVar = io.InquireVariable(name); + if (fieldVar) { + raise::ErrorIf(fieldVar.Shape().size() != 1, + fmt::format("%s is not 1D", name.c_str()), + HERE); + auto dims = fieldVar.Shape(); + std::size_t nx1_r = dims[0]; + raise::ErrorIf((nx1_r != CEILDIV(nx1 * mpi_size, dwn1)), + fmt::format("%s = %ld is not %d", + name.c_str(), + nx1_r, + CEILDIV(nx1 * mpi_size, dwn1)), + HERE); + + fieldVar.SetSelection( + adios2::Box({ l_corner_dwn }, { l_size_dwn })); + field_read = array_t(name, l_size_dwn); + auto field_read_h = Kokkos::create_mirror_view(field_read); + reader.Get(fieldVar, field_read_h.data(), adios2::Mode::Sync); + Kokkos::deep_copy(field_read, field_read_h); + + Kokkos::parallel_for( + "check", + CreateRangePolicy({ 0 }, { l_size_dwn }), + Lambda(index_t i1) { + if (not cmp::AlmostEqual( + field_read(i1), + field(i1 * dwn1 + first_cell + i1min, cntr))) { + Kokkos::printf("\n:::::::::::::::\nfield_read(%ld) = %f != " + "field(%ld, %d) = %f\n:::::::::::::::\n", + i1, + field_read(i1), + i1 * dwn1 + first_cell + i1min, + cntr, + field(i1 * dwn1 + first_cell + i1min, cntr)); + raise::KernelError(HERE, "Field is not read correctly"); + } + }); + } else { + raise::Error("Field not found", HERE); + } + ++cntr; } - ++cntr; + reader.EndStep(); } - reader.EndStep(); + reader.Close(); } - reader.Close(); } } catch (std::exception& e) { @@ -183,15 +190,13 @@ auto main(int argc, char* argv[]) -> int { CallOnce([]() { cleanup(); }); - MPI_Finalize(); - Kokkos::finalize(); + ntt::GlobalFinalize(); return 1; } CallOnce([]() { cleanup(); }); - MPI_Finalize(); - Kokkos::finalize(); + ntt::GlobalFinalize(); return 0; } From 576dcd794aa871e62e0e9cb4660d15a229dd979d Mon Sep 17 00:00:00 2001 From: haykh Date: Thu, 19 Feb 2026 17:15:21 -0500 Subject: [PATCH 2/3] new particle_sort interval parameter (global & per species) + refactor metadomain/particles cpp files --- input.example.toml | 13 +- src/engines/engine.hpp | 5 +- src/engines/grpic.hpp | 10 +- src/engines/srpic/srpic.hpp | 10 +- src/framework/CMakeLists.txt | 22 +- src/framework/containers/particles.cpp | 191 ++--------------- src/framework/containers/particles.h | 6 + src/framework/containers/particles_io.cpp | 9 +- src/framework/containers/particles_sort.cpp | 196 ++++++++++++++++++ src/framework/containers/species.h | 17 ++ src/framework/domain/metadomain.h | 23 +- .../{checkpoint.cpp => metadomain_chckpt.cpp} | 0 ...communications.cpp => metadomain_comm.cpp} | 53 ++--- .../domain/{output.cpp => metadomain_io.cpp} | 0 src/framework/domain/metadomain_sort.cpp | 43 ++++ .../{stats.cpp => metadomain_stats.cpp} | 0 src/framework/parameters/parameters.cpp | 17 +- src/framework/parameters/parameters.h | 1 + src/framework/parameters/particles.cpp | 22 +- src/framework/parameters/particles.h | 7 +- src/framework/tests/comm_nompi.cpp | 148 ------------- src/global/global.h | 18 +- src/global/utils/diag.cpp | 7 +- src/global/utils/diag.h | 2 +- src/global/utils/timer.cpp | 11 +- 25 files changed, 408 insertions(+), 423 deletions(-) create mode 100644 src/framework/containers/particles_sort.cpp rename src/framework/domain/{checkpoint.cpp => metadomain_chckpt.cpp} (100%) rename src/framework/domain/{communications.cpp => metadomain_comm.cpp} (95%) rename src/framework/domain/{output.cpp => metadomain_io.cpp} (100%) create mode 100644 src/framework/domain/metadomain_sort.cpp rename src/framework/domain/{stats.cpp => metadomain_stats.cpp} (100%) delete mode 100644 src/framework/tests/comm_nompi.cpp diff --git a/input.example.toml b/input.example.toml index 3a511249..f235425d 100644 --- a/input.example.toml +++ b/input.example.toml @@ -390,11 +390,16 @@ # @type: bool # @default: false use_weights = "" - # Timesteps between particle re-sorting (removing dead particles) + # Timesteps between particle re-sorting by tags (removing dead particles) # @type: uint # @default: 100 # @note: Set to 0 to disable re-sorting clear_interval = "" + # Timesteps between spatial sorting of particles (for better cache performance) + # @type: uint + # @default: 0 + # @note: Set to 0 to disable spatial sorting + spatial_sorting_interval = "" # @inferred: # - nspec @@ -453,6 +458,12 @@ # @note: Only one emission mechanism allowed # @note: Appropriate radiation drag flag will be applied automatically (unless explicitly set to "None") emission = "" + # Timesteps between spatial sorting of particles for given species + # @type: uint + # @default: 0 + # @note: Set to 0 to disable spatial sorting + # @note: Overrides `particles.spatial_sorting_interval` for the given species + spatial_sorting_interval = "" # Parameters for specific problem generators and setups [setup] diff --git a/src/engines/engine.hpp b/src/engines/engine.hpp index f232c798..597de517 100644 --- a/src/engines/engine.hpp +++ b/src/engines/engine.hpp @@ -22,7 +22,6 @@ #include "utils/diag.h" #include "utils/reporter.h" #include "utils/timer.h" -#include #include "archetypes/field_setter.h" #include "archetypes/traits.h" @@ -35,6 +34,8 @@ #include "pgen.hpp" +#include + #if defined(OUTPUT_ENABLED) #include #endif @@ -248,7 +249,7 @@ namespace ntt { "ParticlePusher", "FieldBoundaries", "ParticleBoundaries", "Communications", "Injector", "Custom", - "PrtlClear", "Output", + "ParticleSort", "Output", "Checkpoint" }, []() { Kokkos::fence(); diff --git a/src/engines/grpic.hpp b/src/engines/grpic.hpp index 78415e26..d204ad74 100644 --- a/src/engines/grpic.hpp +++ b/src/engines/grpic.hpp @@ -19,7 +19,6 @@ #include "utils/log.h" #include "utils/numeric.h" #include "utils/timer.h" -#include #include "framework/domain/domain.h" #include "framework/parameters/parameters.h" @@ -36,6 +35,7 @@ #include #include +#include #include #include @@ -532,11 +532,9 @@ namespace ntt { timers.stop("FieldBoundaries"); } - if (clear_interval > 0 and step % clear_interval == 0 and step > 0) { - timers.start("PrtlClear"); - m_metadomain.RemoveDeadParticles(dom); - timers.stop("PrtlClear"); - } + timers.start("ParticleSort"); + m_metadomain.SortParticles(time, step, m_params, dom); + timers.stop("ParticleSort"); /** * Finally: em0::B at n-1/2 diff --git a/src/engines/srpic/srpic.hpp b/src/engines/srpic/srpic.hpp index e09e07a1..3d39951f 100644 --- a/src/engines/srpic/srpic.hpp +++ b/src/engines/srpic/srpic.hpp @@ -18,7 +18,6 @@ #include "utils/numeric.h" #include "utils/timer.h" -#include #include "engines/srpic/currents.h" #include "engines/srpic/fields_bcs.h" @@ -34,6 +33,7 @@ #include #include +#include namespace ntt { @@ -186,11 +186,9 @@ namespace ntt { timers.stop("Injector"); } - if (clear_interval > 0 and step % clear_interval == 0 and step > 0) { - timers.start("PrtlClear"); - m_metadomain.RemoveDeadParticles(dom); - timers.stop("PrtlClear"); - } + timers.start("ParticleSort"); + m_metadomain.SortParticles(time, step, m_params, dom); + timers.stop("ParticleSort"); } }; diff --git a/src/framework/CMakeLists.txt b/src/framework/CMakeLists.txt index 907b04f2..d1c6b886 100644 --- a/src/framework/CMakeLists.txt +++ b/src/framework/CMakeLists.txt @@ -13,12 +13,16 @@ # * simulation.cpp # * domain/grid.cpp # * domain/metadomain.cpp -# * domain/communications.cpp -# * domain/checkpoint.cpp +# * domain/metadomain_sort.cpp +# * domain/metadomain_comm.cpp +# * domain/metadomain_chckpt.cpp +# * domain/metadomain_stats.cpp +# * domain/metadomain_io.cpp # * containers/particles.cpp +# * containers/particles_comm.cpp +# * containers/particles_io.cpp +# * containers/particles_sort.cpp # * containers/fields.cpp -# * domain/stats.cpp -# * domain/output.cpp # # @includes: # @@ -50,13 +54,15 @@ set(SOURCES ${SRC_DIR}/parameters/extra.cpp ${SRC_DIR}/domain/grid.cpp ${SRC_DIR}/domain/metadomain.cpp - ${SRC_DIR}/domain/communications.cpp - ${SRC_DIR}/domain/stats.cpp + ${SRC_DIR}/domain/metadomain_comm.cpp + ${SRC_DIR}/domain/metadomain_sort.cpp + ${SRC_DIR}/domain/metadomain_stats.cpp ${SRC_DIR}/containers/particles.cpp + ${SRC_DIR}/containers/particles_sort.cpp ${SRC_DIR}/containers/fields.cpp) if(${output}) - list(APPEND SOURCES ${SRC_DIR}/domain/output.cpp) - list(APPEND SOURCES ${SRC_DIR}/domain/checkpoint.cpp) + list(APPEND SOURCES ${SRC_DIR}/domain/metadomain_io.cpp) + list(APPEND SOURCES ${SRC_DIR}/domain/metadomain_chckpt.cpp) list(APPEND SOURCES ${SRC_DIR}/containers/fields_io.cpp) list(APPEND SOURCES ${SRC_DIR}/containers/particles_io.cpp) endif() diff --git a/src/framework/containers/particles.cpp b/src/framework/containers/particles.cpp index b1b77497..a210a650 100644 --- a/src/framework/containers/particles.cpp +++ b/src/framework/containers/particles.cpp @@ -12,32 +12,34 @@ #include #include -#include namespace ntt { + template Particles::Particles(spidx_t index, const std::string& label, float m, float ch, npart_t maxnpart, + timestep_t spatial_sorting_interval, ParticlePusherFlags particle_pusher_flags, bool use_tracking, RadiativeDragFlags radiative_drag_flags, EmissionTypeFlag emission_policy_flag, unsigned short npld_r, unsigned short npld_i) - : ParticleSpecies(index, - label, - m, - ch, - maxnpart, - particle_pusher_flags, - use_tracking, - radiative_drag_flags, - emission_policy_flag, - npld_r, - npld_i) { + : ParticleSpecies { index, + label, + m, + ch, + maxnpart, + spatial_sorting_interval, + particle_pusher_flags, + use_tracking, + radiative_drag_flags, + emission_policy_flag, + npld_r, + npld_i } { if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { i1 = array_t { label + "_i1", maxnpart }; @@ -80,171 +82,6 @@ namespace ntt { } } - template - auto Particles::NpartsPerTagAndOffsets() const - -> std::pair, array_t> { - auto this_tag = tag; - const auto num_tags = ntags(); - array_t npptag { "nparts_per_tag", ntags() }; - - // count # of particles per each tag - auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); - Kokkos::parallel_for( - "NpartPerTag", - rangeActiveParticles(), - Lambda(index_t p) { - auto npptag_acc = npptag_scat.access(); - if (this_tag(p) < 0 || this_tag(p) >= static_cast(num_tags)) { - raise::KernelError(HERE, "Invalid tag value"); - } - npptag_acc(this_tag(p)) += 1; - }); - Kokkos::Experimental::contribute(npptag, npptag_scat); - - // copy the count to a vector on the host - auto npptag_h = Kokkos::create_mirror_view(npptag); - Kokkos::deep_copy(npptag_h, npptag); - std::vector npptag_vec(num_tags); - for (auto t { 0u }; t < num_tags; ++t) { - npptag_vec[t] = npptag_h(t); - } - - // count the offsets on the host and copy to device - array_t tag_offsets("tag_offsets", num_tags - 3); - auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); - - tag_offsets_h(0) = npptag_vec[2]; // offset for tag = 3 - for (auto t { 1u }; t < num_tags - 3; ++t) { - tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); - } - Kokkos::deep_copy(tag_offsets, tag_offsets_h); - - return { npptag_vec, tag_offsets }; - } - - template - void RemoveDeadInArray(array_t& arr, const array_t& indices_alive) { - npart_t n_alive = indices_alive.extent(0); - auto buffer = Kokkos::View("buffer", n_alive); - Kokkos::parallel_for( - "PopulateBufferAlive", - n_alive, - Lambda(index_t p) { buffer(p) = arr(indices_alive(p)); }); - - Kokkos::deep_copy( - Kokkos::subview(arr, std::make_pair(static_cast(0), n_alive)), - buffer); - } - - template - void RemoveDeadInArray(array_t& arr, const array_t& indices_alive) { - npart_t n_alive = indices_alive.extent(0); - auto buffer = array_t { "buffer", n_alive, arr.extent(1) }; - Kokkos::parallel_for( - "PopulateBufferAlive", - CreateRangePolicy({ 0, 0 }, { n_alive, arr.extent(1) }), - Lambda(index_t p, index_t l) { buffer(p, l) = arr(indices_alive(p), l); }); - - Kokkos::deep_copy( - Kokkos::subview(arr, - std::make_pair(static_cast(0), n_alive), - Kokkos::ALL), - buffer); - } - - template - void Particles::RemoveDead() { - npart_t n_alive = 0, n_dead = 0; - auto& this_tag = tag; - - Kokkos::parallel_reduce( - "CountDeadAlive", - rangeActiveParticles(), - Lambda(index_t p, npart_t & nalive, npart_t & ndead) { - nalive += (this_tag(p) == ParticleTag::alive); - ndead += (this_tag(p) == ParticleTag::dead); - if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { - raise::KernelError(HERE, "wrong particle tag"); - } - }, - n_alive, - n_dead); - - array_t indices_alive { "indices_alive", n_alive }; - array_t alive_counter { "counter_alive", 1 }; - - Kokkos::parallel_for( - "AliveIndices", - rangeActiveParticles(), - Lambda(index_t p) { - if (this_tag(p) == ParticleTag::alive) { - const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); - indices_alive(idx) = p; - } - }); - - { - auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); - Kokkos::deep_copy(alive_counter_h, alive_counter); - raise::ErrorIf(alive_counter_h(0) != n_alive, - "error in finding alive particle indices", - HERE); - } - - if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { - RemoveDeadInArray(i1, indices_alive); - RemoveDeadInArray(i1_prev, indices_alive); - RemoveDeadInArray(dx1, indices_alive); - RemoveDeadInArray(dx1_prev, indices_alive); - } - - if constexpr (D == Dim::_2D or D == Dim::_3D) { - RemoveDeadInArray(i2, indices_alive); - RemoveDeadInArray(i2_prev, indices_alive); - RemoveDeadInArray(dx2, indices_alive); - RemoveDeadInArray(dx2_prev, indices_alive); - } - - if constexpr (D == Dim::_3D) { - RemoveDeadInArray(i3, indices_alive); - RemoveDeadInArray(i3_prev, indices_alive); - RemoveDeadInArray(dx3, indices_alive); - RemoveDeadInArray(dx3_prev, indices_alive); - } - - RemoveDeadInArray(ux1, indices_alive); - RemoveDeadInArray(ux2, indices_alive); - RemoveDeadInArray(ux3, indices_alive); - RemoveDeadInArray(weight, indices_alive); - - if constexpr (D == Dim::_2D && C != Coord::Cart) { - RemoveDeadInArray(phi, indices_alive); - } - - if (npld_r() > 0) { - RemoveDeadInArray(pld_r, indices_alive); - } - - if (npld_i() > 0) { - RemoveDeadInArray(pld_i, indices_alive); - } - - Kokkos::Experimental::fill( - "TagAliveParticles", - Kokkos::DefaultExecutionSpace(), - Kokkos::subview(this_tag, std::make_pair(static_cast(0), n_alive)), - ParticleTag::alive); - - Kokkos::Experimental::fill( - "TagDeadParticles", - Kokkos::DefaultExecutionSpace(), - Kokkos::subview(this_tag, std::make_pair(n_alive, n_alive + n_dead)), - ParticleTag::dead); - - set_npart(n_alive); - m_is_sorted = true; - } - template struct Particles; template struct Particles; template struct Particles; diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index f265425e..8c38fbf3 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -5,6 +5,9 @@ * - ntt::Particles<> : ntt::ParticleSpecies * @cpp: * - particles.cpp + * - particles_io.cpp + * - particles_comm.cpp + * - particles_sort.cpp * @macros: * - MPI_ENABLED */ @@ -83,6 +86,7 @@ namespace ntt { * @param m The mass of the species * @param ch The charge of the species * @param maxnpart The maximum number of allocated particles for the species + * @param spatial_sorting_interval The interval for spatial sorting of the particles * @param particle_pusher_flags The pusher(s) assigned for the species * @param use_tracking Use particle tracking for the species * @param radiative_drag_flags The radiative drag mechanism(s) assigned for the species @@ -95,6 +99,7 @@ namespace ntt { float m, float ch, npart_t maxnpart, + timestep_t spatial_sorting_interval, ParticlePusherFlags particle_pusher_flags, bool use_tracking, RadiativeDragFlags radiative_drag_flags, @@ -113,6 +118,7 @@ namespace ntt { spec.mass(), spec.charge(), spec.maxnpart(), + spec.spatial_sorting_interval(), spec.pusher(), spec.use_tracking(), spec.radiative_drag_flags(), diff --git a/src/framework/containers/particles_io.cpp b/src/framework/containers/particles_io.cpp index 2dc1bf21..d35ec59f 100644 --- a/src/framework/containers/particles_io.cpp +++ b/src/framework/containers/particles_io.cpp @@ -5,13 +5,6 @@ #include "utils/formatting.h" #include "utils/log.h" -#include "metrics/kerr_schild.h" -#include "metrics/kerr_schild_0.h" -#include "metrics/minkowski.h" -#include "metrics/qkerr_schild.h" -#include "metrics/qspherical.h" -#include "metrics/spherical.h" - #include "framework/containers/particles.h" #include "framework/specialization_registry.h" #include "output/utils/readers.h" @@ -814,6 +807,8 @@ namespace ntt { PARTICLES_OUTPUT_DECLARE(Dim::_3D, Coord::Cart) PARTICLES_OUTPUT_DECLARE(Dim::_2D, Coord::Sph) PARTICLES_OUTPUT_DECLARE(Dim::_2D, Coord::Qsph) + PARTICLES_OUTPUT_DECLARE(Dim::_3D, Coord::Sph) + PARTICLES_OUTPUT_DECLARE(Dim::_3D, Coord::Qsph) #undef PARTICLES_OUTPUT_DECLARE #define PARTICLES_OUTPUT_WRITE(S, M, D) \ diff --git a/src/framework/containers/particles_sort.cpp b/src/framework/containers/particles_sort.cpp new file mode 100644 index 00000000..081b9c4d --- /dev/null +++ b/src/framework/containers/particles_sort.cpp @@ -0,0 +1,196 @@ +#include "enums.h" +#include "global.h" + +#include "arch/kokkos_aliases.h" + +#include "framework/containers/particles.h" + +#include +#include +#include + +#include +#include + +namespace ntt { + + template + auto Particles::NpartsPerTagAndOffsets() const + -> std::pair, array_t> { + auto this_tag = tag; + const auto num_tags = ntags(); + array_t npptag { "nparts_per_tag", ntags() }; + + // count # of particles per each tag + auto npptag_scat = Kokkos::Experimental::create_scatter_view(npptag); + Kokkos::parallel_for( + "NpartPerTag", + rangeActiveParticles(), + Lambda(index_t p) { + auto npptag_acc = npptag_scat.access(); + if (this_tag(p) < 0 || this_tag(p) >= static_cast(num_tags)) { + raise::KernelError(HERE, "Invalid tag value"); + } + npptag_acc(this_tag(p)) += 1; + }); + Kokkos::Experimental::contribute(npptag, npptag_scat); + + // copy the count to a vector on the host + auto npptag_h = Kokkos::create_mirror_view(npptag); + Kokkos::deep_copy(npptag_h, npptag); + std::vector npptag_vec(num_tags); + for (auto t { 0u }; t < num_tags; ++t) { + npptag_vec[t] = npptag_h(t); + } + + // count the offsets on the host and copy to device + array_t tag_offsets("tag_offsets", num_tags - 3); + auto tag_offsets_h = Kokkos::create_mirror_view(tag_offsets); + + tag_offsets_h(0) = npptag_vec[2]; // offset for tag = 3 + for (auto t { 1u }; t < num_tags - 3; ++t) { + tag_offsets_h(t) = npptag_vec[t + 2] + tag_offsets_h(t - 1); + } + Kokkos::deep_copy(tag_offsets, tag_offsets_h); + + return { npptag_vec, tag_offsets }; + } + + template + void RemoveDeadInArray(array_t& arr, const array_t& indices_alive) { + npart_t n_alive = indices_alive.extent(0); + auto buffer = Kokkos::View("buffer", n_alive); + Kokkos::parallel_for( + "PopulateBufferAlive", + n_alive, + Lambda(index_t p) { buffer(p) = arr(indices_alive(p)); }); + + Kokkos::deep_copy( + Kokkos::subview(arr, std::make_pair(static_cast(0), n_alive)), + buffer); + } + + template + void RemoveDeadInArray(array_t& arr, const array_t& indices_alive) { + npart_t n_alive = indices_alive.extent(0); + auto buffer = array_t { "buffer", n_alive, arr.extent(1) }; + Kokkos::parallel_for( + "PopulateBufferAlive", + CreateRangePolicy({ 0, 0 }, { n_alive, arr.extent(1) }), + Lambda(index_t p, index_t l) { buffer(p, l) = arr(indices_alive(p), l); }); + + Kokkos::deep_copy( + Kokkos::subview(arr, + std::make_pair(static_cast(0), n_alive), + Kokkos::ALL), + buffer); + } + + template + void Particles::RemoveDead() { + npart_t n_alive = 0, n_dead = 0; + auto& this_tag = tag; + + Kokkos::parallel_reduce( + "CountDeadAlive", + rangeActiveParticles(), + Lambda(index_t p, npart_t & nalive, npart_t & ndead) { + nalive += (this_tag(p) == ParticleTag::alive); + ndead += (this_tag(p) == ParticleTag::dead); + if (this_tag(p) != ParticleTag::alive and this_tag(p) != ParticleTag::dead) { + raise::KernelError(HERE, "wrong particle tag"); + } + }, + n_alive, + n_dead); + + array_t indices_alive { "indices_alive", n_alive }; + array_t alive_counter { "counter_alive", 1 }; + + Kokkos::parallel_for( + "AliveIndices", + rangeActiveParticles(), + Lambda(index_t p) { + if (this_tag(p) == ParticleTag::alive) { + const auto idx = Kokkos::atomic_fetch_add(&alive_counter(0), 1); + indices_alive(idx) = p; + } + }); + + { + auto alive_counter_h = Kokkos::create_mirror_view(alive_counter); + Kokkos::deep_copy(alive_counter_h, alive_counter); + raise::ErrorIf(alive_counter_h(0) != n_alive, + "error in finding alive particle indices", + HERE); + } + + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i1, indices_alive); + RemoveDeadInArray(i1_prev, indices_alive); + RemoveDeadInArray(dx1, indices_alive); + RemoveDeadInArray(dx1_prev, indices_alive); + } + + if constexpr (D == Dim::_2D or D == Dim::_3D) { + RemoveDeadInArray(i2, indices_alive); + RemoveDeadInArray(i2_prev, indices_alive); + RemoveDeadInArray(dx2, indices_alive); + RemoveDeadInArray(dx2_prev, indices_alive); + } + + if constexpr (D == Dim::_3D) { + RemoveDeadInArray(i3, indices_alive); + RemoveDeadInArray(i3_prev, indices_alive); + RemoveDeadInArray(dx3, indices_alive); + RemoveDeadInArray(dx3_prev, indices_alive); + } + + RemoveDeadInArray(ux1, indices_alive); + RemoveDeadInArray(ux2, indices_alive); + RemoveDeadInArray(ux3, indices_alive); + RemoveDeadInArray(weight, indices_alive); + + if constexpr (D == Dim::_2D && C != Coord::Cart) { + RemoveDeadInArray(phi, indices_alive); + } + + if (npld_r() > 0) { + RemoveDeadInArray(pld_r, indices_alive); + } + + if (npld_i() > 0) { + RemoveDeadInArray(pld_i, indices_alive); + } + + Kokkos::Experimental::fill( + "TagAliveParticles", + Kokkos::DefaultExecutionSpace(), + Kokkos::subview(this_tag, std::make_pair(static_cast(0), n_alive)), + ParticleTag::alive); + + Kokkos::Experimental::fill( + "TagDeadParticles", + Kokkos::DefaultExecutionSpace(), + Kokkos::subview(this_tag, std::make_pair(n_alive, n_alive + n_dead)), + ParticleTag::dead); + + set_npart(n_alive); + m_is_sorted = true; + } + +#define PARTICLES_SORT(D, C) \ + template auto Particles::NpartsPerTagAndOffsets() const \ + -> std::pair, array_t>; \ + template void Particles::RemoveDead(); + + PARTICLES_SORT(Dim::_1D, Coord::Cart) + PARTICLES_SORT(Dim::_2D, Coord::Cart) + PARTICLES_SORT(Dim::_3D, Coord::Cart) + PARTICLES_SORT(Dim::_2D, Coord::Sph) + PARTICLES_SORT(Dim::_2D, Coord::Qsph) + PARTICLES_SORT(Dim::_3D, Coord::Sph) + PARTICLES_SORT(Dim::_3D, Coord::Qsph) +#undef PARTICLES_SORT + +} // namespace ntt diff --git a/src/framework/containers/species.h b/src/framework/containers/species.h index 8297dcdb..a537f260 100644 --- a/src/framework/containers/species.h +++ b/src/framework/containers/species.h @@ -32,6 +32,8 @@ namespace ntt { const float m_charge; // Max number of allocated particles for the species npart_t m_maxnpart; + // Toggle for spatial sorting + const timestep_t m_spatial_sorting_interval; // Pusher assigned for the species const ParticlePusherFlags m_particle_pusher_flags; @@ -56,6 +58,7 @@ namespace ntt { , m_mass { 0.0 } , m_charge { 0.0 } , m_maxnpart { 0 } + , m_spatial_sorting_interval { 0u } , m_particle_pusher_flags { ParticlePusher::NONE } , m_use_tracking { false } , m_radiative_drag_flags { RadiativeDrag::NONE } @@ -83,6 +86,7 @@ namespace ntt { float m, float ch, npart_t maxnpart, + timestep_t spatial_sorting_interval, ParticlePusherFlags particle_pusher_flags, bool use_tracking, RadiativeDragFlags radiative_drag_flags, @@ -94,6 +98,7 @@ namespace ntt { , m_mass { m } , m_charge { ch } , m_maxnpart { maxnpart } + , m_spatial_sorting_interval { spatial_sorting_interval } , m_particle_pusher_flags { particle_pusher_flags } , m_use_tracking { use_tracking } , m_radiative_drag_flags { radiative_drag_flags } @@ -146,6 +151,11 @@ namespace ntt { return m_maxnpart; } + [[nodiscard]] + auto spatial_sorting_interval() const -> timestep_t { + return m_spatial_sorting_interval; + } + [[nodiscard]] auto pusher() const -> ParticlePusherFlags { return m_particle_pusher_flags; @@ -186,6 +196,13 @@ namespace ntt { reporter::AddParam(report, 6, "Mass", "%.1f", mass()); reporter::AddParam(report, 6, "Charge", "%.1f", charge()); reporter::AddParam(report, 6, "Max #", "%d [per domain]", maxnpart()); + reporter::AddParam(report, + 6, + "Spatial sorting interval", + "%s", + spatial_sorting_interval() == 0u + ? "OFF" + : fmt::format("%d", spatial_sorting_interval()).c_str()); reporter::AddParam(report, 6, "Pusher", diff --git a/src/framework/domain/metadomain.h b/src/framework/domain/metadomain.h index 3319cafd..d04433fc 100644 --- a/src/framework/domain/metadomain.h +++ b/src/framework/domain/metadomain.h @@ -5,6 +5,10 @@ * - ntt::Metadomain<> * @cpp: * - metadomain.cpp + * - metadomain_comm.cpp + * - metadomain_chckpt.cpp + * - metadomain_io.cpp + * - metadomain_stats.cpp * @namespaces: * - ntt:: * @macros: @@ -92,10 +96,15 @@ namespace ntt { } } - void CommunicateFields(Domain&, CommTags); - void SynchronizeFields(Domain&, CommTags, const range_tuple_t& = { 0, 0 }); - void CommunicateParticles(Domain&); - void RemoveDeadParticles(Domain&); + void CommunicateFields(Domain&, CommTags) const; + void SynchronizeFields(Domain&, + CommTags, + const range_tuple_t& = { 0, 0 }) const; + void CommunicateParticles(Domain&) const; + void SortParticles(simtime_t, + timestep_t, + const SimulationParams&, + Domain&) const; /** * @param global_ndomains total number of domains @@ -181,6 +190,12 @@ namespace ntt { return &g_subdomains[idx]; } + [[nodiscard]] + auto subdomain_ptr(unsigned int idx) const -> const Domain* { + raise::ErrorIf(idx >= g_subdomains.size(), "subdomain_ptr() failed", HERE); + return &g_subdomains[idx]; + } + [[nodiscard]] auto mesh() const -> const Mesh& { return g_mesh; diff --git a/src/framework/domain/checkpoint.cpp b/src/framework/domain/metadomain_chckpt.cpp similarity index 100% rename from src/framework/domain/checkpoint.cpp rename to src/framework/domain/metadomain_chckpt.cpp diff --git a/src/framework/domain/communications.cpp b/src/framework/domain/metadomain_comm.cpp similarity index 95% rename from src/framework/domain/communications.cpp rename to src/framework/domain/metadomain_comm.cpp index 66ee4e06..45bf6101 100644 --- a/src/framework/domain/communications.cpp +++ b/src/framework/domain/metadomain_comm.cpp @@ -28,12 +28,12 @@ namespace ntt { using comm_params_t = std::pair>; template - auto GetSendRecvRanks( - Metadomain* metadomain, - Domain& domain, - dir::direction_t direction) -> std::pair { - Domain* send_to_nghbr_ptr = nullptr; - Domain* recv_from_nghbr_ptr = nullptr; + auto GetSendRecvRanks(const Metadomain* const metadomain, + Domain& domain, + dir::direction_t direction) + -> std::pair { + const Domain* send_to_nghbr_ptr = nullptr; + const Domain* recv_from_nghbr_ptr = nullptr; // set pointers to the correct send/recv domains // can coincide with the current domain if periodic if (domain.mesh.flds_bc_in(direction) == FldsBC::PERIODIC) { @@ -111,11 +111,11 @@ namespace ntt { } template - auto GetSendRecvParams( - Metadomain* metadomain, - Domain& domain, - dir::direction_t direction, - bool synchronize) -> std::pair { + auto GetSendRecvParams(const Metadomain* const metadomain, + Domain& domain, + dir::direction_t direction, + bool synchronize) + -> std::pair { const auto [send_indrank, recv_indrank] = GetSendRecvRanks(metadomain, domain, direction); const auto [send_ind, send_rank] = send_indrank; @@ -198,11 +198,8 @@ namespace ntt { template requires IsCompatibleWithMetadomain - void Metadomain::CommunicateFields(Domain& domain, CommTags tags) { - // const auto comm_fields = (tags & Comm::E) or (tags & Comm::B) or - // (tags & Comm::J) or (tags & Comm::D) or - // (tags & Comm::D0) or (tags & Comm::B0) or - // (tags & Comm::H); + void Metadomain::CommunicateFields(Domain& domain, + CommTags tags) const { const auto comm_em = ((S == SimEngine::SRPIC) and ((tags & Comm::E) or (tags & Comm::B))) or ((S == SimEngine::GRPIC) and @@ -419,9 +416,9 @@ namespace ntt { template requires IsCompatibleWithMetadomain - void Metadomain::SynchronizeFields(Domain& domain, - CommTags tags, - const range_tuple_t& components) { + void Metadomain::SynchronizeFields(Domain& domain, + CommTags tags, + const range_tuple_t& components) const { const bool comm_j = (tags & Comm::J); const bool comm_bckp = (tags & Comm::Bckp); const bool comm_buff = (tags & Comm::Buff); @@ -576,7 +573,7 @@ namespace ntt { template requires IsCompatibleWithMetadomain - void Metadomain::CommunicateParticles(Domain& domain) { + void Metadomain::CommunicateParticles(Domain& domain) const { #if defined(MPI_ENABLED) logger::Checkpoint("Communicating particles\n", HERE); for (auto& species : domain.species) { @@ -666,22 +663,14 @@ namespace ntt { #endif } - template - requires IsCompatibleWithMetadomain - void Metadomain::RemoveDeadParticles(Domain& domain) { - for (auto& species : domain.species) { - species.RemoveDead(); - } - } - #define METADOMAIN_COMM(S, M, D) \ template void Metadomain>::CommunicateFields(Domain>&, \ - CommTags); \ + CommTags) const; \ template void Metadomain>::SynchronizeFields(Domain>&, \ CommTags, \ - const range_tuple_t&); \ - template void Metadomain>::CommunicateParticles(Domain>&); \ - template void Metadomain>::RemoveDeadParticles(Domain>&); + const range_tuple_t&) \ + const; \ + template void Metadomain>::CommunicateParticles(Domain>&) const; NTT_FOREACH_SPECIALIZATION(METADOMAIN_COMM) diff --git a/src/framework/domain/output.cpp b/src/framework/domain/metadomain_io.cpp similarity index 100% rename from src/framework/domain/output.cpp rename to src/framework/domain/metadomain_io.cpp diff --git a/src/framework/domain/metadomain_sort.cpp b/src/framework/domain/metadomain_sort.cpp new file mode 100644 index 00000000..fcd81484 --- /dev/null +++ b/src/framework/domain/metadomain_sort.cpp @@ -0,0 +1,43 @@ +#include "enums.h" + +#include "framework/domain/metadomain.h" +#include "framework/parameters/parameters.h" +#include "framework/specialization_registry.h" + +#include + +namespace ntt { + + template + requires IsCompatibleWithMetadomain + void Metadomain::SortParticles(simtime_t, + timestep_t step, + const SimulationParams& params, + Domain& domain) const { + const auto clear_interval = params.template get( + "particles.clear_interval"); + if ((clear_interval > 0u) and (step % clear_interval == 0u) and (step > 0u)) { + for (auto& species : domain.species) { + species.RemoveDead(); + } + } + for (auto& species : domain.species) { + const auto spatial_sorting_interval = species.spatial_sorting_interval(); + if ((spatial_sorting_interval > 0u) and + (step % spatial_sorting_interval == 0u)) { + // ... + } + } + } + +#define METADOMAIN_COMM(S, M, D) \ + template void Metadomain>::SortParticles(simtime_t, \ + timestep_t, \ + const SimulationParams&, \ + Domain>&) const; + + NTT_FOREACH_SPECIALIZATION(METADOMAIN_COMM) + +#undef METADOMAIN_COMM + +} // namespace ntt diff --git a/src/framework/domain/stats.cpp b/src/framework/domain/metadomain_stats.cpp similarity index 100% rename from src/framework/domain/stats.cpp rename to src/framework/domain/metadomain_stats.cpp diff --git a/src/framework/parameters/parameters.cpp b/src/framework/parameters/parameters.cpp index 71f5d72e..abb8ecc5 100644 --- a/src/framework/parameters/parameters.cpp +++ b/src/framework/parameters/parameters.cpp @@ -7,7 +7,6 @@ #include "utils/error.h" #include "utils/formatting.h" #include "utils/numeric.h" -#include #include "framework/containers/species.h" #include "framework/parameters/algorithms.h" @@ -16,6 +15,8 @@ #include "framework/parameters/output.h" #include "framework/parameters/particles.h" +#include + #if defined(MPI_ENABLED) #include #endif @@ -62,6 +63,12 @@ namespace ntt { raise::ErrorIf(ppc0 <= 0.0, "ppc0 must be positive", HERE); set("particles.use_weights", toml::find_or(toml_data, "particles", "use_weights", false)); + const auto global_spatial_sorting_interval = toml::find_or( + toml_data, + "particles", + "spatial_sorting_interval", + 0u); + set("particles.spatial_sorting_interval", global_spatial_sorting_interval); set("scales.n0", ppc0 / get("scales.V0")); set("scales.q0", get("scales.V0") / (ppc0 * SQR(skindepth0))); @@ -76,7 +83,12 @@ namespace ntt { spidx_t idx = 1; for (const auto& sp : species_tab) { - species.emplace_back(params::GetParticleSpecies(this, engine_enum, idx, sp)); + species.emplace_back( + params::GetParticleSpecies(this, + engine_enum, + idx, + sp, + global_spatial_sorting_interval)); idx += 1; } set("particles.species", species); @@ -133,6 +145,7 @@ namespace ntt { particle_species.mass(), particle_species.charge(), maxnpart, + particle_species.spatial_sorting_interval(), particle_species.pusher(), particle_species.use_tracking(), particle_species.radiative_drag_flags(), diff --git a/src/framework/parameters/parameters.h b/src/framework/parameters/parameters.h index 175f3025..a9eadc04 100644 --- a/src/framework/parameters/parameters.h +++ b/src/framework/parameters/parameters.h @@ -18,6 +18,7 @@ #define FRAMEWORK_PARAMETERS_PARAMETERS_H #include "utils/param_container.h" + #include #include diff --git a/src/framework/parameters/particles.cpp b/src/framework/parameters/particles.cpp index 09572754..856955bc 100644 --- a/src/framework/parameters/particles.cpp +++ b/src/framework/parameters/particles.cpp @@ -6,11 +6,12 @@ #include "utils/error.h" #include "utils/formatting.h" -#include #include "framework/containers/species.h" #include "framework/parameters/parameters.h" +#include + #include namespace ntt { @@ -96,7 +97,9 @@ namespace ntt { auto GetParticleSpecies(SimulationParams* params, const SimEngine& engine_enum, spidx_t idx, - const toml::value& sp) -> ParticleSpecies { + const toml::value& sp, + timestep_t global_spatial_sorting_interval) + -> ParticleSpecies { const auto label = toml::find_or(sp, "label", "s" + std::to_string(idx)); @@ -105,11 +108,15 @@ namespace ntt { raise::ErrorIf((charge != 0.0f) && (mass == 0.0f), "mass of the charged species must be non-zero", HERE); - const auto is_massless = (mass == 0.0f) && (charge == 0.0f); - const auto def_pusher = (is_massless ? defaults::ph_pusher - : defaults::em_pusher); - const auto maxnpart_real = toml::find(sp, "maxnpart"); - const auto maxnpart = static_cast(maxnpart_real); + const auto is_massless = (mass == 0.0f) && (charge == 0.0f); + const auto def_pusher = (is_massless ? defaults::ph_pusher + : defaults::em_pusher); + const auto maxnpart_real = toml::find(sp, "maxnpart"); + const auto maxnpart = static_cast(maxnpart_real); + const auto spatial_sorting_interval = toml::find_or( + sp, + "spatial_sorting_interval", + global_spatial_sorting_interval); auto pusher_str = toml::find_or(sp, "pusher", std::string(def_pusher)); const auto npayloads_real = toml::find_or(sp, "n_payloads_real", @@ -199,6 +206,7 @@ namespace ntt { mass, charge, maxnpart, + spatial_sorting_interval, particle_pusher_flags, use_tracking, radiative_drag_flags, diff --git a/src/framework/parameters/particles.h b/src/framework/parameters/particles.h index 8f5880e5..a6f0ec6b 100644 --- a/src/framework/parameters/particles.h +++ b/src/framework/parameters/particles.h @@ -13,11 +13,11 @@ #include "enums.h" -#include - #include "framework/containers/species.h" #include "framework/parameters/parameters.h" +#include + namespace ntt { namespace params { @@ -25,7 +25,8 @@ namespace ntt { auto GetParticleSpecies(SimulationParams*, const SimEngine&, spidx_t, - const toml::value&) -> ParticleSpecies; + const toml::value&, + timestep_t) -> ParticleSpecies; } // namespace params diff --git a/src/framework/tests/comm_nompi.cpp b/src/framework/tests/comm_nompi.cpp deleted file mode 100644 index c7646ef0..00000000 --- a/src/framework/tests/comm_nompi.cpp +++ /dev/null @@ -1,148 +0,0 @@ -#include "framework/domain/comm_nompi.hpp" - -#include "enums.h" -#include "global.h" - -#include "arch/directions.h" -#include "arch/kokkos_aliases.h" -#include "utils/numeric.h" - -#include -#include - -using namespace ntt; - -auto main(int argc, char* argv[]) -> int { - Kokkos::initialize(argc, argv); - - try { - const std::size_t nx1 = 15, nx2 = 15; - ndfield_t fld { "fld", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; - ndfield_t buff { "buff", nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }; - - Kokkos::parallel_for( - "Fill", - CreateRangePolicy({ 0, 0 }, - { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), - Lambda(index_t i1, index_t i2) { - if ((i1 >= 2 * N_GHOSTS) and (i1 < nx1) and (i2 >= 2 * N_GHOSTS) and - (i2 < nx2)) { - fld(i1, i2, 0) = 4.0; - fld(i1, i2, 1) = 12.0; - fld(i1, i2, 2) = 20.0; - } else if ( - ((i1 < 2 * N_GHOSTS or i1 >= nx1) and (i2 >= 2 * N_GHOSTS and i2 < nx2)) or - ((i2 < 2 * N_GHOSTS or i2 >= nx2) and (i1 >= 2 * N_GHOSTS and i1 < nx1))) { - fld(i1, i2, 0) = 2.0; - fld(i1, i2, 1) = 6.0; - fld(i1, i2, 2) = 10.0; - } else { - fld(i1, i2, 0) = 1.0; - fld(i1, i2, 1) = 3.0; - fld(i1, i2, 2) = 5.0; - } - }); - Kokkos::deep_copy(buff, ZERO); - - const auto send_slice = std::vector { - { nx1 + N_GHOSTS, nx1 + 2 * N_GHOSTS }, - { nx2 + N_GHOSTS, nx2 + 2 * N_GHOSTS } - }; - const auto recv_slice = std::vector { - { N_GHOSTS, 2 * N_GHOSTS }, - { N_GHOSTS, 2 * N_GHOSTS } - }; - const auto comp_slice = range_tuple_t(cur::jx1, cur::jx3 + 1); - - const auto i_min = [](in c) { - switch (c) { - case in::x1: - return (std::size_t)N_GHOSTS; - case in::x2: - return (std::size_t)N_GHOSTS; - case in::x3: - return (std::size_t)N_GHOSTS; - } - return (std::size_t)0; - }; - const auto i_max = [](in c) { - switch (c) { - case in::x1: - return nx1 + N_GHOSTS; - case in::x2: - return nx2 + N_GHOSTS; - case in::x3: - return (std::size_t)0; - } - return (std::size_t)0; - }; - const in components[] = { in::x1, in::x2, in::x3 }; - for (auto& direction : dir::Directions::all) { - auto send_slice = std::vector {}; - auto recv_slice = std::vector {}; - for (std::size_t d { 0 }; d < direction.size(); ++d) { - const auto c = components[d]; - const auto dir = direction[d]; - if (dir == 0) { - send_slice.emplace_back(i_min(c) - N_GHOSTS, i_max(c) + N_GHOSTS); - } else if (dir == 1) { - send_slice.emplace_back(i_max(c) - N_GHOSTS, i_max(c) + N_GHOSTS); - } else { - send_slice.emplace_back(i_min(c) - N_GHOSTS, i_min(c) + N_GHOSTS); - } - if (-dir == 0) { - recv_slice.emplace_back(i_min(c) - N_GHOSTS, i_max(c) + N_GHOSTS); - } else if (-dir == 1) { - recv_slice.emplace_back(i_max(c) - N_GHOSTS, i_max(c) + N_GHOSTS); - } else { - recv_slice.emplace_back(i_min(c) - N_GHOSTS, i_min(c) + N_GHOSTS); - } - } - comm::CommunicateField((unsigned int)0, - fld, - buff, - 0, - 0, - 0, - 0, - send_slice, - recv_slice, - comp_slice, - true); - } - // add buffers - Kokkos::parallel_for( - "Fill", - CreateRangePolicy({ 0, 0 }, - { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), - Lambda(index_t i1, index_t i2) { - for (auto k { 0 }; k < 3; ++k) { - fld(i1, i2, k) += buff(i1, i2, k); - } - }); - - // check - Kokkos::parallel_for( - "Fill", - CreateRangePolicy({ 0, 0 }, - { nx1 + 2 * N_GHOSTS, nx2 + 2 * N_GHOSTS }), - Lambda(index_t i1, index_t i2) { - if (fld(i1, i2, 0) != 4.0) { - raise::KernelError(HERE, "fld0 wrong after comm"); - } - if (fld(i1, i2, 1) != 12.0) { - raise::KernelError(HERE, "fld1 wrong after comm"); - } - if (fld(i1, i2, 2) != 20.0) { - raise::KernelError(HERE, "fld2 wrong after comm"); - } - }); - } catch (std::exception& e) { - std::cerr << "Exception: " << e.what() << std::endl; - Kokkos::finalize(); - return 1; - } - - Kokkos::finalize(); - return 0; -} diff --git a/src/global/global.h b/src/global/global.h index 45da4f04..4aa98f5f 100644 --- a/src/global/global.h +++ b/src/global/global.h @@ -232,15 +232,15 @@ typedef int PrepareOutputFlags; namespace Timer { enum TimerFlags_ { - None = 0, - PrintTotal = 1 << 0, - PrintTitle = 1 << 1, - AutoConvert = 1 << 2, - PrintOutput = 1 << 3, - PrintPrtlClear = 1 << 4, - PrintCheckpoint = 1 << 5, - PrintNormed = 1 << 6, - Default = PrintNormed | PrintTotal | PrintTitle | AutoConvert, + None = 0, + PrintTotal = 1 << 0, + PrintTitle = 1 << 1, + AutoConvert = 1 << 2, + PrintOutput = 1 << 3, + PrintParticleSort = 1 << 4, + PrintCheckpoint = 1 << 5, + PrintNormed = 1 << 6, + Default = PrintNormed | PrintTotal | PrintTitle | AutoConvert, }; } // namespace Timer diff --git a/src/global/utils/diag.cpp b/src/global/utils/diag.cpp index f5f9338d..173584bb 100644 --- a/src/global/utils/diag.cpp +++ b/src/global/utils/diag.cpp @@ -22,9 +22,8 @@ #include namespace diag { - auto npart_stats( - npart_t npart, - npart_t maxnpart) -> std::vector> { + auto npart_stats(npart_t npart, npart_t maxnpart) + -> std::vector> { auto stats = std::vector>(); const auto percentage = [](npart_t part, npart_t maxpart) -> unsigned short { return static_cast( @@ -96,7 +95,7 @@ namespace diag { diag_flags ^= Diag::Species; } if (print_prtl_clear) { - timer_flags |= Timer::PrintPrtlClear; + timer_flags |= Timer::PrintParticleSort; } if (print_output) { timer_flags |= Timer::PrintOutput; diff --git a/src/global/utils/diag.h b/src/global/utils/diag.h index 6d3c5937..266c9902 100644 --- a/src/global/utils/diag.h +++ b/src/global/utils/diag.h @@ -34,7 +34,7 @@ namespace diag { * @param species_labels (vector of particle labels) * @param npart (per each species) * @param maxnpart (per each species) - * @param prtlclear (if true, dead particles were removed) + * @param particlesort (if true, dead particles were removed) * @param output (if true, output was written) * @param checkpoint (if true, checkpoint was written) * @param colorful_print (if true, print with colors) diff --git a/src/global/utils/timer.cpp b/src/global/utils/timer.cpp index 19c7c914..89538084 100644 --- a/src/global/utils/timer.cpp +++ b/src/global/utils/timer.cpp @@ -130,10 +130,9 @@ namespace timer { return timer_stats; } - auto Timers::printAll(TimerFlags flags, - npart_t npart, - ncells_t ncells) const -> std::string { - const std::vector extras { "PrtlClear", "Output", "Checkpoint" }; + auto Timers::printAll(TimerFlags flags, npart_t npart, ncells_t ncells) const + -> std::string { + const std::vector extras { "ParticleSort", "Output", "Checkpoint" }; const auto stats = gather(extras, npart, ncells); if (stats.empty()) { return ""; @@ -256,8 +255,8 @@ namespace timer { } } - // print extra timers for output/checkpoint/prtlClear - const std::vector extras_f { Timer::PrintPrtlClear, + // print extra timers for output/checkpoint/particleSort + const std::vector extras_f { Timer::PrintParticleSort, Timer::PrintOutput, Timer::PrintCheckpoint }; for (auto i { 0u }; i < extras.size(); ++i) { From 4de11eb9b346f6852629dd538c9eb5571b1cc827 Mon Sep 17 00:00:00 2001 From: haykh Date: Tue, 24 Feb 2026 17:37:02 -0500 Subject: [PATCH 3/3] particle spatial sorting implemented + unit test --- src/framework/containers/particles.h | 7 + src/framework/containers/particles_sort.cpp | 160 ++++++++++- src/framework/domain/metadomain_sort.cpp | 2 +- src/framework/tests/CMakeLists.txt | 2 + src/framework/tests/parameters.cpp | 279 +++++++++++--------- src/framework/tests/particles_sort.cpp | 204 ++++++++++++++ src/global/arch/kokkos_aliases.h | 39 +-- 7 files changed, 544 insertions(+), 149 deletions(-) create mode 100644 src/framework/tests/particles_sort.cpp diff --git a/src/framework/containers/particles.h b/src/framework/containers/particles.h index 8c38fbf3..33607eac 100644 --- a/src/framework/containers/particles.h +++ b/src/framework/containers/particles.h @@ -24,6 +24,7 @@ #include "utils/formatting.h" #include "framework/containers/species.h" +#include "framework/domain/grid.h" #include @@ -255,6 +256,12 @@ namespace ntt { */ void RemoveDead(); + /** + * @brief Sort particles spatially by their cell indices + * @param grid The grid object to get the cell information for sorting + */ + void SortSpatially(const Grid&); + /** * @brief Copy particle data from device to host. */ diff --git a/src/framework/containers/particles_sort.cpp b/src/framework/containers/particles_sort.cpp index 081b9c4d..84ec2f7d 100644 --- a/src/framework/containers/particles_sort.cpp +++ b/src/framework/containers/particles_sort.cpp @@ -4,8 +4,10 @@ #include "arch/kokkos_aliases.h" #include "framework/containers/particles.h" +#include "framework/domain/grid.h" #include +#include #include #include @@ -179,10 +181,86 @@ namespace ntt { m_is_sorted = true; } + template + void Particles::SortSpatially(const Grid& grid) { + const auto& tag_p = tag; + const auto& i1_p = i1; + const auto& i2_p = i2; + const auto& i3_p = i3; + + const auto nx1 = grid.n_active(in::x1); + const auto nx2 = grid.n_active(in::x2); + const auto nx3 = grid.n_active(in::x3); + const auto total_cells = grid.num_active(); + + array_t cell_indices { "cell_indices", npart() }; + + Kokkos::parallel_for( + "FillCellIndices", + rangeActiveParticles(), + Lambda(index_t p) { + if (tag_p(p) != ParticleTag::alive) { + cell_indices(p) = total_cells + 1u; + } else { + if constexpr (D == Dim::_1D) { + cell_indices(p) = i1_p(p); + } else if constexpr (D == Dim::_2D) { + cell_indices(p) = i1_p(p) * nx2 + i2_p(p); + } else if constexpr (D == Dim::_3D) { + cell_indices(p) = (i1_p(p) * nx2 + i2_p(p)) * nx3 + i3_p(p); + } else { + raise::KernelError(HERE, "Wrong D in SortSpatially"); + } + } + }); + const auto slice = range_tuple_t(0, npart()); + + using sorter_op_t = Kokkos::BinOp1D; + using sorter_t = Kokkos::BinSort; + auto bin_op = sorter_op_t { static_cast(total_cells + 1u), + 0u, + total_cells + 1u }; + auto sorter = sorter_t { cell_indices, bin_op, false }; + sorter.create_permute_vector(); + if constexpr (D == Dim::_1D or D == Dim::_2D or D == Dim::_3D) { + sorter.sort(Kokkos::subview(i1, slice)); + sorter.sort(Kokkos::subview(i1_prev, slice)); + sorter.sort(Kokkos::subview(dx1, slice)); + sorter.sort(Kokkos::subview(dx1_prev, slice)); + } + if constexpr (D == Dim::_2D or D == Dim::_3D) { + sorter.sort(Kokkos::subview(i2, slice)); + sorter.sort(Kokkos::subview(i2_prev, slice)); + sorter.sort(Kokkos::subview(dx2, slice)); + sorter.sort(Kokkos::subview(dx2_prev, slice)); + } + if constexpr (D == Dim::_3D) { + sorter.sort(Kokkos::subview(i3, slice)); + sorter.sort(Kokkos::subview(i3_prev, slice)); + sorter.sort(Kokkos::subview(dx3, slice)); + sorter.sort(Kokkos::subview(dx3_prev, slice)); + } + sorter.sort(Kokkos::subview(ux1, slice)); + sorter.sort(Kokkos::subview(ux2, slice)); + sorter.sort(Kokkos::subview(ux3, slice)); + sorter.sort(Kokkos::subview(weight, slice)); + sorter.sort(Kokkos::subview(tag, slice)); + if constexpr (D == Dim::_2D and C != Coord::Cart) { + sorter.sort(Kokkos::subview(phi, slice)); + } + for (auto pldr { 0u }; pldr < npld_r(); ++pldr) { + sorter.sort(Kokkos::subview(pld_r, slice, pldr)); + } + for (auto pldi { 0u }; pldi < npld_i(); ++pldi) { + sorter.sort(Kokkos::subview(pld_i, slice, pldi)); + } + } + #define PARTICLES_SORT(D, C) \ template auto Particles::NpartsPerTagAndOffsets() const \ -> std::pair, array_t>; \ - template void Particles::RemoveDead(); + template void Particles::RemoveDead(); \ + template void Particles::SortSpatially(const Grid&); PARTICLES_SORT(Dim::_1D, Coord::Cart) PARTICLES_SORT(Dim::_2D, Coord::Cart) @@ -194,3 +272,83 @@ namespace ntt { #undef PARTICLES_SORT } // namespace ntt + +// template +// void AllocateArrayOnGrid(nddata_t& arr, +// const Grid& grid, +// const std::string& name) { +// if constexpr (D == Dim::_1D) { +// arr = nddata_t { name, grid.n_active(in::x1) }; +// } else if constexpr (D == Dim::_2D) { +// arr = nddata_t { name, grid.n_active(in::x1), grid.n_active(in::x2) }; +// } else if constexpr (D == Dim::_3D) { +// arr = nddata_t { name, +// grid.n_active(in::x1), +// grid.n_active(in::x2), +// grid.n_active(in::x3) }; +// } else { +// raise::Error("Unsupported dimension for array allocation", HERE); +// } +// } +// +// +// array_t cell_idx { "cell_indices", npart() }; +// const auto num_cells = grid.num_active(); +// +// nddata_t num_ppc; +// nddata_t disp_map; +// AllocateArrayOnGrid(num_ppc, grid, "num_ppc"); +// AllocateArrayOnGrid(disp_map, grid, "disp_map"); +// auto num_ppc_scatter = +// Kokkos::Experimental::create_scatter_view(num_ppc); Kokkos::parallel_for( +// "ComputeNumPPC", +// rangeActiveParticles(), +// Lambda(index_t p) { +// if (tag_p(p) != ParticleTag::alive) { +// return; +// } +// auto num_ppc_acc = num_ppc_scatter.access(); +// if constexpr (D == Dim::_1D) { +// num_ppc_acc(i1_p(p)) += 1u; +// } else if constexpr (D == Dim::_2D) { +// num_ppc_acc(i1_p(p), i2_p(p)) += 1u; +// } else { +// num_ppc_acc(i1_p(p), i2_p(p), i3_p(p)) += 1u; +// } +// }); +// Kokkos::Experimental::contribute(num_ppc, num_ppc_scatter); +// +// npart_t total_sum = 0u; +// Kokkos::parallel_scan( +// "ComputeDisplacementMap", +// total_cells, +// Lambda(index_t cell, npart_t & cumulative_sum, bool is_final) { +// ncells_t i1, i2, i3; +// if constexpr (D == Dim::_1D) { +// i1 = cell; +// } else if constexpr (D == Dim::_2D) { +// i1 = cell / nx2; +// i2 = cell % nx2; +// } else { +// i1 = cell / (nx2 * nx3); +// i2 = (cell % (nx2 * nx3)) / nx3; +// i3 = cell % nx3; +// } +// if (is_final) { +// if constexpr (D == Dim::_1D) { +// disp_map(i1) = cumulative_sum; +// } else if constexpr (D == Dim::_2D) { +// disp_map(i1, i2) = cumulative_sum; +// } else { +// disp_map(i1, i2, i3) = cumulative_sum; +// } +// } +// if constexpr (D == Dim::_1D) { +// cumulative_sum += num_ppc(i1); +// } else if constexpr (D == Dim::_2D) { +// cumulative_sum += num_ppc(i1, i2); +// } else { +// cumulative_sum += num_ppc(i1, i2, i3); +// } +// }, +// total_sum); diff --git a/src/framework/domain/metadomain_sort.cpp b/src/framework/domain/metadomain_sort.cpp index fcd81484..c4c829b9 100644 --- a/src/framework/domain/metadomain_sort.cpp +++ b/src/framework/domain/metadomain_sort.cpp @@ -25,7 +25,7 @@ namespace ntt { const auto spatial_sorting_interval = species.spatial_sorting_interval(); if ((spatial_sorting_interval > 0u) and (step % spatial_sorting_interval == 0u)) { - // ... + species.SortSpatially(domain.mesh); } } } diff --git a/src/framework/tests/CMakeLists.txt b/src/framework/tests/CMakeLists.txt index 3c6be0f6..090985e1 100644 --- a/src/framework/tests/CMakeLists.txt +++ b/src/framework/tests/CMakeLists.txt @@ -38,8 +38,10 @@ endfunction() gen_test(parameters false) if(${mpi}) gen_test(particles true) + gen_test(particles_sort true) else() gen_test(particles false) + gen_test(particles_sort false) endif() gen_test(fields false) diff --git a/src/framework/tests/parameters.cpp b/src/framework/tests/parameters.cpp index 238db11e..1acf4262 100644 --- a/src/framework/tests/parameters.cpp +++ b/src/framework/tests/parameters.cpp @@ -59,6 +59,7 @@ const auto mink_1d = u8R"( [particles] ppc0 = 10.0 clear_interval = 100 + spatial_sorting_interval = 10 [[particles.species]] label = "e-" @@ -68,6 +69,7 @@ const auto mink_1d = u8R"( pusher = "boris" n_payloads_real = 3 tracking = true + spatial_sorting_interval = 100 [[particles.species]] label = "p+" @@ -175,6 +177,7 @@ const auto sph_2d = u8R"( pusher = "boris,gca" radiative_drag = "synchrotron" n_payloads_int = 2 + spatial_sorting_interval = 123 [[particles.species]] label = "ph" @@ -268,135 +271,146 @@ auto main(int argc, char* argv[]) -> int { try { using namespace ntt; - // { - // auto params_mink_1d = SimulationParams(); - // params_mink_1d.setImmutableParams(mink_1d); - // params_mink_1d.setMutableParams(mink_1d); - // params_mink_1d.setSetupParams(mink_1d); - // params_mink_1d.checkPromises(); - // - // assert_equal(params_mink_1d.get("grid.metric.metric"), - // Metric::Minkowski, - // "grid.metric.metric"); - // // engine - // assert_equal( - // params_mink_1d.get("simulation.engine"), - // SimEngine::SRPIC, - // "simulation.engine"); - // - // assert_equal(params_mink_1d.get("scales.dx0"), - // (real_t)0.0078125, - // "scales.dx0"); - // assert_equal(params_mink_1d.get("scales.V0"), - // (real_t)0.0078125, - // "scales.V0"); - // boundaries_t fbc = { - // { FldsBC::MATCH, FldsBC::MATCH } - // }; - // assert_equal( - // params_mink_1d.get>("grid.boundaries.fields")[0].first, - // fbc[0].first, - // "grid.boundaries.fields[0].first"); - // assert_equal( - // params_mink_1d.get>("grid.boundaries.fields")[0].second, - // fbc[0].second, - // "grid.boundaries.fields[0].second"); - // assert_equal( - // params_mink_1d.get>("grid.boundaries.fields").size(), - // fbc.size(), - // "grid.boundaries.fields.size()"); - // assert_equal( - // params_mink_1d.get>("grid.boundaries.match.ds")[0].first, - // (real_t)0.025, - // "grid.boundaries.match.ds[0].first"); - // assert_equal( - // params_mink_1d.get>("grid.boundaries.match.ds")[0].second, - // (real_t)0.1, - // "grid.boundaries.match.ds[0].first"); - // - // const auto species = params_mink_1d.get>( - // "particles.species"); - // assert_equal(species[0].label(), "e-", "species[0].label"); - // assert_equal(species[0].mass(), 1.0f, "species[0].mass"); - // assert_equal(species[0].charge(), -1.0f, "species[0].charge"); - // assert_equal(species[0].maxnpart(), 100, "species[0].maxnpart"); - // assert_equal(species[0].pusher(), - // ParticlePusher::BORIS, - // "species[0].pusher"); - // assert_equal(species[0].npld_r(), 3, "species[0].npld_r"); - // assert_equal(species[0].npld_i(), 1, "species[0].npld_i"); - // assert_equal(species[0].use_tracking(), true, "species[0].tracking"); - // - // assert_equal(species[1].label(), "p+", "species[1].label"); - // assert_equal(species[1].mass(), 1.0f, "species[1].mass"); - // assert_equal(species[1].charge(), 200.0f, "species[1].charge"); - // assert_equal(species[1].maxnpart(), 100, "species[1].maxnpart"); - // assert_equal(species[1].pusher(), - // ParticlePusher::VAY, - // "species[1].pusher"); - // assert_equal(species[1].npld_r(), 0, "species[1].npld_r"); - // - // assert_equal(params_mink_1d.get("setup.myfloat"), - // (real_t)(1e-2), - // "setup.myfloat"); - // assert_equal(params_mink_1d.get("setup.myint"), - // (int)(123), - // "setup.myint"); - // assert_equal(params_mink_1d.get("setup.mybool"), - // true, - // "setup.mybool"); - // const auto myarr = params_mink_1d.get>("setup.myarr"); - // assert_equal(myarr[0], 1.0, "setup.myarr[0]"); - // assert_equal(myarr[1], 2.0, "setup.myarr[1]"); - // assert_equal(myarr[2], 3.0, "setup.myarr[2]"); - // assert_equal(params_mink_1d.get("setup.mystr"), - // "hi", - // "setup.mystr"); - // - // const auto output_stride = params_mink_1d.get>( - // "output.fields.downsampling"); - // assert_equal(output_stride.size(), - // 1, - // "output.fields.downsampling.size()"); - // assert_equal(output_stride[0], 4, "output.fields.downsampling[0]"); - // - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.delta_x"), - // (real_t)(1.0), - // "algorithms.fieldsolver.delta_x"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.delta_y"), - // (real_t)(2.0), - // "algorithms.fieldsolver.delta_y"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.delta_z"), - // (real_t)(3.0), - // "algorithms.fieldsolver.delta_z"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.beta_xy"), - // (real_t)(4.0), - // "algorithms.fieldsolver.beta_xy"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.beta_yx"), - // (real_t)(5.0), - // "algorithms.fieldsolver.beta_yx"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.beta_xz"), - // (real_t)(6.0), - // "algorithms.fieldsolver.beta_xz"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.beta_zx"), - // (real_t)(7.0), - // "algorithms.fieldsolver.beta_zx"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.beta_yz"), - // (real_t)(8.0), - // "algorithms.fieldsolver.beta_yz"); - // assert_equal( - // params_mink_1d.get("algorithms.fieldsolver.beta_zy"), - // (real_t)(9.0), - // "algorithms.fieldsolver.beta_zy"); - // } + { + auto params_mink_1d = SimulationParams(); + params_mink_1d.setImmutableParams(mink_1d); + params_mink_1d.setMutableParams(mink_1d); + params_mink_1d.setSetupParams(mink_1d); + params_mink_1d.checkPromises(); + + assert_equal(params_mink_1d.get("grid.metric.metric"), + Metric::Minkowski, + "grid.metric.metric"); + // engine + assert_equal( + params_mink_1d.get("simulation.engine"), + SimEngine::SRPIC, + "simulation.engine"); + + assert_equal(params_mink_1d.get("scales.dx0"), + (real_t)0.0078125, + "scales.dx0"); + assert_equal(params_mink_1d.get("scales.V0"), + (real_t)0.0078125, + "scales.V0"); + boundaries_t fbc = { + { FldsBC::MATCH, FldsBC::MATCH } + }; + assert_equal( + params_mink_1d.get>("grid.boundaries.fields")[0].first, + fbc[0].first, + "grid.boundaries.fields[0].first"); + assert_equal( + params_mink_1d.get>("grid.boundaries.fields")[0].second, + fbc[0].second, + "grid.boundaries.fields[0].second"); + assert_equal( + params_mink_1d.get>("grid.boundaries.fields").size(), + fbc.size(), + "grid.boundaries.fields.size()"); + assert_equal( + params_mink_1d.get>("grid.boundaries.match.ds")[0].first, + (real_t)0.025, + "grid.boundaries.match.ds[0].first"); + assert_equal( + params_mink_1d.get>("grid.boundaries.match.ds")[0].second, + (real_t)0.1, + "grid.boundaries.match.ds[0].first"); + + const auto species = params_mink_1d.get>( + "particles.species"); + assert_equal(species[0].label(), "e-", "species[0].label"); + assert_equal(species[0].mass(), 1.0f, "species[0].mass"); + assert_equal(species[0].charge(), -1.0f, "species[0].charge"); + assert_equal(species[0].maxnpart(), 100, "species[0].maxnpart"); + assert_equal(species[0].spatial_sorting_interval(), + static_cast(100u), + "species[0].spatial_sorting_interval"); + assert_equal(species[0].pusher(), + ParticlePusher::BORIS, + "species[0].pusher"); + assert_equal(species[0].npld_r(), 3, "species[0].npld_r"); +#if defined(MPI_ENABLED) + const auto npld_i = 2u; +#else + const auto npld_i = 1u; +#endif + assert_equal(species[0].npld_i(), npld_i, "species[0].npld_i"); + assert_equal(species[0].use_tracking(), true, "species[0].tracking"); + + assert_equal(species[1].label(), "p+", "species[1].label"); + assert_equal(species[1].mass(), 1.0f, "species[1].mass"); + assert_equal(species[1].charge(), 200.0f, "species[1].charge"); + assert_equal(species[1].maxnpart(), 100, "species[1].maxnpart"); + assert_equal(species[1].spatial_sorting_interval(), + static_cast(10u), + "species[1].spatial_sorting_interval"); + assert_equal(species[1].pusher(), + ParticlePusher::VAY, + "species[1].pusher"); + assert_equal(species[1].npld_r(), 0, "species[1].npld_r"); + + assert_equal(params_mink_1d.get("setup.myfloat"), + (real_t)(1e-2), + "setup.myfloat"); + assert_equal(params_mink_1d.get("setup.myint"), + (int)(123), + "setup.myint"); + assert_equal(params_mink_1d.get("setup.mybool"), + true, + "setup.mybool"); + const auto myarr = params_mink_1d.get>("setup.myarr"); + assert_equal(myarr[0], 1.0, "setup.myarr[0]"); + assert_equal(myarr[1], 2.0, "setup.myarr[1]"); + assert_equal(myarr[2], 3.0, "setup.myarr[2]"); + assert_equal(params_mink_1d.get("setup.mystr"), + "hi", + "setup.mystr"); + + const auto output_stride = params_mink_1d.get>( + "output.fields.downsampling"); + assert_equal(output_stride.size(), + 1, + "output.fields.downsampling.size()"); + assert_equal(output_stride[0], 4, "output.fields.downsampling[0]"); + + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.delta_x"), + (real_t)(1.0), + "algorithms.fieldsolver.delta_x"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.delta_y"), + (real_t)(2.0), + "algorithms.fieldsolver.delta_y"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.delta_z"), + (real_t)(3.0), + "algorithms.fieldsolver.delta_z"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_xy"), + (real_t)(4.0), + "algorithms.fieldsolver.beta_xy"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_yx"), + (real_t)(5.0), + "algorithms.fieldsolver.beta_yx"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_xz"), + (real_t)(6.0), + "algorithms.fieldsolver.beta_xz"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_zx"), + (real_t)(7.0), + "algorithms.fieldsolver.beta_zx"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_yz"), + (real_t)(8.0), + "algorithms.fieldsolver.beta_yz"); + assert_equal( + params_mink_1d.get("algorithms.fieldsolver.beta_zy"), + (real_t)(9.0), + "algorithms.fieldsolver.beta_zy"); + } { auto params_sph_2d = SimulationParams(); @@ -500,6 +514,10 @@ auto main(int argc, char* argv[]) -> int { assert_equal(species[0].mass(), 1.0f, "species[0].mass"); assert_equal(species[0].charge(), -1.0f, "species[0].charge"); assert_equal(species[0].maxnpart(), 100, "species[0].maxnpart"); + assert_equal(species[0].spatial_sorting_interval(), + static_cast(0u), + "species[0].spatial_sorting_interval"); + assert_equal(species[0].pusher(), ParticlePusher::BORIS | ParticlePusher::GCA, "species[0].pusher"); @@ -517,6 +535,9 @@ auto main(int argc, char* argv[]) -> int { assert_equal(species[1].mass(), 1.0f, "species[1].mass"); assert_equal(species[1].charge(), 1.0f, "species[1].charge"); assert_equal(species[1].maxnpart(), 100, "species[1].maxnpart"); + assert_equal(species[1].spatial_sorting_interval(), + static_cast(123u), + "species[1].spatial_sorting_interval"); assert_equal(species[1].pusher(), ParticlePusher::BORIS | ParticlePusher::GCA, "species[1].pusher"); diff --git a/src/framework/tests/particles_sort.cpp b/src/framework/tests/particles_sort.cpp new file mode 100644 index 00000000..917c806a --- /dev/null +++ b/src/framework/tests/particles_sort.cpp @@ -0,0 +1,204 @@ +#include "enums.h" +#include "global.h" + +#include "utils/error.h" + +#include "framework/containers/particles.h" +#include "framework/domain/grid.h" + +#include + +auto main(int argc, char* argv[]) -> int { + ntt::GlobalInitialize(argc, argv); + try { + { + // 2D + auto grid = ntt::Grid { + { 10u, 30u }, + { { -5.0, 5.0 }, { -15.0, 15.0 } } + }; + auto prtls = ntt::Particles( + 1, + "test", + 1.0f, + 1.0f, + 100u, + 0u, + ntt::ParticlePusher::BORIS, + false, + ntt::RadiativeDrag::NONE, + ntt::EmissionType::NONE, + 2u, + 1u); + auto& i1_p = prtls.i1; + auto& i2_p = prtls.i2; + auto& tag_p = prtls.tag; + auto& weight_p = prtls.weight; + auto& pld_r = prtls.pld_r; + auto& pld_i = prtls.pld_i; + Kokkos::parallel_for( + "InitParticles", + prtls.maxnpart(), + Lambda(index_t p) { + if (p < 66u) { + tag_p(p) = (p % 10u == 0u) ? ntt::ParticleTag::dead + : ntt::ParticleTag::alive; + if (p % 4u == 0u) { + i1_p(p) = 8u; + i2_p(p) = 2u; + weight_p(p) = 0.0; + } else if (p % 4u == 1u) { + i1_p(p) = 2u; + i2_p(p) = 8u; + weight_p(p) = 1.0; + } else if (p % 4u == 2u) { + i1_p(p) = 5u; + i2_p(p) = 15u; + weight_p(p) = 2.0; + } else { + i1_p(p) = 0u; + i2_p(p) = 23u; + weight_p(p) = 3.0; + } + pld_r(p, 0) = weight_p(p) + 0.5; + pld_r(p, 1) = weight_p(p) + 10.5; + pld_i(p, 0) = static_cast(weight_p(p) + 10.0); + } else { + tag_p(p) = ntt::ParticleTag::dead; + } + if (tag_p(p) == ntt::ParticleTag::dead) { + weight_p(p) = -1.0; + } + }); + prtls.set_npart(66u); + + prtls.SortSpatially(grid); + + auto weight_h = Kokkos::create_mirror_view(prtls.weight); + auto pld_r_h = Kokkos::create_mirror_view(prtls.pld_r); + auto pld_i_h = Kokkos::create_mirror_view(prtls.pld_i); + Kokkos::deep_copy(weight_h, prtls.weight); + Kokkos::deep_copy(pld_r_h, prtls.pld_r); + Kokkos::deep_copy(pld_i_h, prtls.pld_i); + + for (auto p { 0u }; p < 75u; ++p) { + if (p < 16u) { + raise::ErrorIf(weight_h(p) != 3.0, "error in sorting particles", HERE); + } else if (p < 33u) { + raise::ErrorIf(weight_h(p) != 1.0, "error in sorting particles", HERE); + } else if (p < 46u) { + raise::ErrorIf(weight_h(p) != 2.0, "error in sorting particles", HERE); + } else if (p < 59u) { + raise::ErrorIf(weight_h(p) != 0.0, "error in sorting particles", HERE); + } else { + raise::ErrorIf(weight_h(p) != -1.0, "error in sorting particles", HERE); + } + if (p < 59u) { + raise::ErrorIf(pld_r_h(p, 0) != weight_h(p) + 0.5, + "error in sorting particle real payload 0", + HERE); + raise::ErrorIf(pld_r_h(p, 1) != weight_h(p) + 10.5, + "error in sorting particle real payload 1", + HERE); + raise::ErrorIf(pld_i_h(p, 0) != static_cast(weight_h(p) + 10.0), + "error in sorting particle integer payload 0", + HERE); + } + } + } + { + // 3D + auto grid = ntt::Grid { + { 6u, 7u, 8u }, + { { -3.0, 3.0 }, { -3.5, 3.5 }, { -4.0, 4.0 } } + }; + auto prtls = ntt::Particles( + 1, + "test", + 1.0f, + 1.0f, + 100u, + 0u, + ntt::ParticlePusher::BORIS, + false, + ntt::RadiativeDrag::NONE, + ntt::EmissionType::NONE, + 0u, + 0u); + auto& i1_p = prtls.i1; + auto& i2_p = prtls.i2; + auto& i3_p = prtls.i3; + auto& tag_p = prtls.tag; + auto& weight_p = prtls.weight; + Kokkos::parallel_for( + "InitParticles", + prtls.maxnpart(), + Lambda(index_t p) { + if (p < 66u) { + tag_p(p) = (p % 10u == 0u) ? ntt::ParticleTag::dead + : ntt::ParticleTag::alive; + if (p % 5u == 0u) { + i1_p(p) = 3u; + i2_p(p) = 2u; + i3_p(p) = 7u; + weight_p(p) = 0.0; + } else if (p % 5u == 1u) { + i1_p(p) = 2u; + i2_p(p) = 4u; + i3_p(p) = 3u; + weight_p(p) = 1.0; + } else if (p % 5u == 2u) { + i1_p(p) = 2u; + i2_p(p) = 6u; + i3_p(p) = 6u; + weight_p(p) = 2.0; + } else if (p % 5u == 3u) { + i1_p(p) = 3u; + i2_p(p) = 6u; + i3_p(p) = 6u; + weight_p(p) = 3.0; + } else { + i1_p(p) = 0u; + i2_p(p) = 6u; + i3_p(p) = 7u; + weight_p(p) = 4.0; + } + } else { + tag_p(p) = ntt::ParticleTag::dead; + } + if (tag_p(p) == ntt::ParticleTag::dead) { + weight_p(p) = -1.0; + } + }); + prtls.set_npart(66u); + + prtls.SortSpatially(grid); + + auto weight_h = Kokkos::create_mirror_view(prtls.weight); + Kokkos::deep_copy(weight_h, prtls.weight); + for (auto p { 0u }; p < 75u; ++p) { + if (p < 13u) { + raise::ErrorIf(weight_h(p) != 4.0, "error in sorting particles", HERE); + } else if (p < 26u) { + raise::ErrorIf(weight_h(p) != 1.0, "error in sorting particles", HERE); + } else if (p < 39u) { + raise::ErrorIf(weight_h(p) != 2.0, "error in sorting particles", HERE); + } else if (p < 46u) { + raise::ErrorIf(weight_h(p) != 0.0, "error in sorting particles", HERE); + } else if (p < 59u) { + raise::ErrorIf(weight_h(p) != 3.0, "error in sorting particles", HERE); + } else { + raise::ErrorIf(weight_h(p) != -1.0, "error in sorting particles", HERE); + } + } + std::cout << std::endl; + } + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + ntt::GlobalFinalize(); + return 1; + } + ntt::GlobalFinalize(); + return 0; +} diff --git a/src/global/arch/kokkos_aliases.h b/src/global/arch/kokkos_aliases.h index ea10ea4e..f7b57218 100644 --- a/src/global/arch/kokkos_aliases.h +++ b/src/global/arch/kokkos_aliases.h @@ -47,37 +47,40 @@ using array_mirror_t = typename array_t::host_mirror_type; template using scatter_array_t = Kokkos::Experimental::ScatterView; -// Array aliases of arbitrary type and dimensions (up to 3) +// Array aliases of arbitrary type and dimensions (up to 4) namespace kokkos_aliases_hidden { // c++ magic - template - struct ndarray_impl { + template + struct nddata_impl { using type = void; }; - template <> - struct ndarray_impl<1> { - using type = array_t; + template + struct nddata_impl<1, T> { + using type = array_t; }; - template <> - struct ndarray_impl<2> { - using type = array_t; + template + struct nddata_impl<2, T> { + using type = array_t; }; - template <> - struct ndarray_impl<3> { - using type = array_t; + template + struct nddata_impl<3, T> { + using type = array_t; }; - template <> - struct ndarray_impl<4> { - using type = array_t; + template + struct nddata_impl<4, T> { + using type = array_t; }; } // namespace kokkos_aliases_hidden +template +using nddata_t = typename kokkos_aliases_hidden::nddata_impl::type; + template -using ndarray_t = typename kokkos_aliases_hidden::ndarray_impl::type; +using ndarray_t = typename kokkos_aliases_hidden::nddata_impl::type; namespace kokkos_aliases_hidden { // c++ magic @@ -237,8 +240,8 @@ auto CreateParticleRangePolicy(npart_t, npart_t) -> range_t; * @returns Kokkos::RangePolicy or Kokkos::MDRangePolicy in the accelerator execution space. */ template -auto CreateRangePolicy(const tuple_t&, - const tuple_t&) -> range_t; +auto CreateRangePolicy(const tuple_t&, const tuple_t&) + -> range_t; /** * @brief Function template for generating ND Kokkos range policy on the host.