From 07409edc368ef59995c8b981e6c06b52ad51f823 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 16 Feb 2026 23:34:55 +0100 Subject: [PATCH 01/13] Add a Validation check for LandauDamping, also fix merge queue dashboard labels --- alpine/CMakeLists.txt | 10 +- alpine/validation/CMakeLists.txt | 23 ++ .../validation/FieldLandau_valid_result.csv | 27 ++ .../validation/LandauDampingCorrectness.cpp | 333 ++++++++++++++++++ ci/cscs/common.yml | 4 +- ci/cscs/cuda/build_sm90.yml | 17 +- ci/cscs/dashboard-configure-build.cmake | 7 +- ci/cscs/openmp/build_openmp.yml | 18 +- ci/cscs/rocm/build_rocm-6.3.yml | 16 +- 9 files changed, 426 insertions(+), 29 deletions(-) create mode 100644 alpine/validation/CMakeLists.txt create mode 100644 alpine/validation/FieldLandau_valid_result.csv create mode 100644 alpine/validation/LandauDampingCorrectness.cpp diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt index 4db043d4f..ff9856ba3 100644 --- a/alpine/CMakeLists.txt +++ b/alpine/CMakeLists.txt @@ -16,9 +16,13 @@ endfunction() if(IPPL_ENABLE_TESTS) # cmake-format: off + # Landau will write a CSV file to the data directory that we will validate later + make_directory("${PROJECT_BINARY_DIR}/alpine/data") + # Add the test add_ippl_integration_test(LandauDamping - ARGS 16 16 16 10000000 10 FFT 0.01 LeapFrog --overallocate 2.0 --info 10 - LABELS alpine integration) + ARGS "16" "16" "16" "10000000" "25" "FFT" "0.01" "LeapFrog" "--overallocate" "2.0" "--info" "10" + LABELS alpine integration + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/alpine/") # cmake-format: on else() add_alpine_example(LandauDamping) @@ -26,3 +30,5 @@ endif() add_alpine_example(PenningTrap) add_alpine_example(BumponTailInstability) + +add_subdirectory(validation) diff --git a/alpine/validation/CMakeLists.txt b/alpine/validation/CMakeLists.txt new file mode 100644 index 000000000..5236a9ba0 --- /dev/null +++ b/alpine/validation/CMakeLists.txt @@ -0,0 +1,23 @@ +# ----------------------------------------------------------------------------- +# validation check for LandauDamping test +# ----------------------------------------------------------------------------- + +if(BUILD_TESTING) + # Build the C++ correctness validation test + add_executable(LandauDampingCorrectness LandauDampingCorrectness.cpp) + + message("Adding test: LandauDampingCorrectness") + + # command line params are : output, reference, tolerance + add_test( + NAME LandauDampingCorrectnessValidation + COMMAND LandauDampingCorrectness "${PROJECT_BINARY_DIR}/alpine/data/FieldLandau_2_manager.csv" + "${PROJECT_SOURCE_DIR}/alpine/validation/FieldLandau_valid_result.csv" "4E-1" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/alpine/") + + set_tests_properties( + LandauDampingCorrectnessValidation + PROPERTIES LABELS "validation;alpine" TIMEOUT 30 + # This test should only run if the LandauDamping integration test passed + DEPENDS LandauDamping) +endif() diff --git a/alpine/validation/FieldLandau_valid_result.csv b/alpine/validation/FieldLandau_valid_result.csv new file mode 100644 index 000000000..9eb047189 --- /dev/null +++ b/alpine/validation/FieldLandau_valid_result.csv @@ -0,0 +1,27 @@ +time, Ex_field_energy, Ex_max_norm +0.0000000000000000e+00 9.6742715140721582e+00 1.0825163128256524e-01 +5.0000000000000003e-02 9.6368948973459112e+00 1.0808994906148438e-01 +1.0000000000000001e-01 9.5415075224669970e+00 1.0740779743767520e-01 +1.5000000000000002e-01 9.3882773328014082e+00 1.0630178072727425e-01 +2.0000000000000001e-01 9.1791183371635334e+00 1.0482602738140372e-01 +2.5000000000000000e-01 8.9172134334449193e+00 1.0306560288960222e-01 +2.9999999999999999e-01 8.6061662103776140e+00 1.0113820775231776e-01 +3.4999999999999998e-01 8.2505504653792148e+00 9.8691447720593309e-02 +3.9999999999999997e-01 7.8550417931569703e+00 9.5598706257118735e-02 +4.4999999999999996e-01 7.4249700904468465e+00 9.2872937917417214e-02 +4.9999999999999994e-01 6.9653510910124385e+00 9.0033318617501135e-02 +5.4999999999999993e-01 6.4822476651919327e+00 8.7257335757243495e-02 +5.9999999999999998e-01 5.9816257151911403e+00 8.4644036660629890e-02 +6.5000000000000002e-01 5.4696405485153790e+00 8.2154906259950061e-02 +7.0000000000000007e-01 4.9535573676676501e+00 7.9019365369956629e-02 +7.5000000000000011e-01 4.4392121571874581e+00 7.5190519676643197e-02 +8.0000000000000016e-01 3.9321278953403862e+00 7.1054552970285123e-02 +8.5000000000000020e-01 3.4384135389096344e+00 6.6356647790906387e-02 +9.0000000000000024e-01 2.9638108812800437e+00 6.1694793644303261e-02 +9.5000000000000029e-01 2.5131142881540285e+00 5.7498634682739851e-02 +1.0000000000000002e+00 2.0909400590908556e+00 5.3113907328247389e-02 +1.0500000000000003e+00 1.7017166009804880e+00 4.8965793799131234e-02 +1.1000000000000003e+00 1.3486230947669464e+00 4.4410412852319857e-02 +1.1500000000000004e+00 1.0343505136217537e+00 3.9926242617971765e-02 +1.2000000000000004e+00 7.6105675926529781e-01 3.5819185614705762e-02 +1.2500000000000004e+00 5.3020078549605387e-01 3.1521337179656390e-02 diff --git a/alpine/validation/LandauDampingCorrectness.cpp b/alpine/validation/LandauDampingCorrectness.cpp new file mode 100644 index 000000000..065c6c477 --- /dev/null +++ b/alpine/validation/LandauDampingCorrectness.cpp @@ -0,0 +1,333 @@ +// LandauDamping Correctness Validation Test +// +// This test compares the CSV output generated by the LandauDamping simulation +// with a reference (valid) result file. All numerical values must be within +// a tolerance of 1E-7 for the test to pass. +// +// Usage: +// ./LandauDampingCorrectness [output_csv] [reference_csv] [tolerance] +// +// Where: +// output_csv : Path to the generated FieldLandau_*.csv file +// (default: data/FieldLandau_*_manager.csv - finds first match) +// reference_csv : Path to the reference file +// (default: ../../alpine/validation/FieldLandau_valid_result.csv) +// tolerance : Absolute tolerance for comparison (default: 1e-7) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +// Structure to hold a row of CSV data +struct CSVRow { + double time; + double Ex_field_energy; + double Ex_max_norm; + + CSVRow() + : time(0.0) + , Ex_field_energy(0.0) + , Ex_max_norm(0.0) {} + + CSVRow(double t, double e, double n) + : time(t) + , Ex_field_energy(e) + , Ex_max_norm(n) {} +}; + +// Trim whitespace from string +std::string trim(const std::string& str) { + size_t first = str.find_first_not_of(" \t\r\n"); + if (first == std::string::npos) + return ""; + size_t last = str.find_last_not_of(" \t\r\n"); + return str.substr(first, (last - first + 1)); +} + +// Find first CSV file matching pattern in directory +std::string findCSVFile(const std::string& pattern) { + // If pattern is a file that exists, return it + if (fs::exists(pattern) && fs::is_regular_file(pattern)) { + return pattern; + } + + // Try to find a matching file in data/ directory + std::string dataDir = "data"; + if (fs::exists(dataDir) && fs::is_directory(dataDir)) { + for (const auto& entry : fs::directory_iterator(dataDir)) { + if (entry.is_regular_file() && entry.path().extension() == ".csv") { + std::string filename = entry.path().filename().string(); + if (filename.find("FieldLandau_") == 0 + && filename.find("_manager.csv") != std::string::npos) { + return entry.path().string(); + } + } + } + } + + return pattern; // Return original pattern if not found +} + +// Load CSV file into vector of CSVRow +bool loadCSV(const std::string& filepath, std::vector& data, bool& hasMaxNorm, + std::string& errorMsg) { + std::ifstream file(filepath); + if (!file.is_open()) { + errorMsg = "Could not open file: " + filepath; + return false; + } + + std::string line; + bool isFirstLine = true; + int lineNum = 0; + hasMaxNorm = false; + + while (std::getline(file, line)) { + lineNum++; + line = trim(line); + + // Skip empty lines + if (line.empty()) + continue; + + // Check header line + if (isFirstLine) { + isFirstLine = false; + // Check if we have Ex_max_norm column + hasMaxNorm = (line.find("Ex_max_norm") != std::string::npos); + continue; + } + + // Parse data line + std::istringstream iss(line); + double time, energy, maxNorm = 0.0; + + if (!(iss >> time >> energy)) { + errorMsg = "Failed to parse line " + std::to_string(lineNum) + ": " + line; + return false; + } + + // Try to read third column if present + if (hasMaxNorm) { + if (!(iss >> maxNorm)) { + // If header says we have max_norm but can't read it, it's an error + errorMsg = "Expected Ex_max_norm column but failed to parse at line " + + std::to_string(lineNum); + return false; + } + } + + data.emplace_back(time, energy, maxNorm); + } + + file.close(); + + if (data.empty()) { + errorMsg = "No data rows found in file: " + filepath; + return false; + } + + return true; +} + +// Compare two CSV datasets +bool compareCSVData(const std::vector& output, const std::vector& reference, + bool outputHasMaxNorm, bool refHasMaxNorm, double tolerance, + std::vector& errors) { + // Check row count + if (output.size() != reference.size()) { + std::ostringstream oss; + oss << "Row count mismatch: output has " << output.size() << " rows, reference has " + << reference.size() << " rows"; + errors.push_back(oss.str()); + return false; + } + + // Compare each row + std::vector timeErrors, energyErrors, normErrors; + double maxTimeDiff = 0.0, maxEnergyDiff = 0.0, maxNormDiff = 0.0; + + for (size_t i = 0; i < output.size(); ++i) { + // Compare time + double timeDiff = std::abs(output[i].time - reference[i].time); + if (timeDiff > tolerance) { + timeErrors.push_back(static_cast(i)); + maxTimeDiff = std::max(maxTimeDiff, timeDiff); + } + + // Compare Ex_field_energy + double energyDiff = std::abs(output[i].Ex_field_energy - reference[i].Ex_field_energy); + if (energyDiff > tolerance) { + energyErrors.push_back(static_cast(i)); + maxEnergyDiff = std::max(maxEnergyDiff, energyDiff); + } + + // Compare Ex_max_norm if both have it + if (outputHasMaxNorm && refHasMaxNorm) { + double normDiff = std::abs(output[i].Ex_max_norm - reference[i].Ex_max_norm); + if (normDiff > tolerance) { + normErrors.push_back(static_cast(i)); + maxNormDiff = std::max(maxNormDiff, normDiff); + } + } + } + + // Report errors + bool hasErrors = false; + + if (!timeErrors.empty()) { + hasErrors = true; + std::ostringstream oss; + oss << "Column 'time': " << timeErrors.size() << " value(s) outside tolerance " + << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxTimeDiff + << ") at rows ["; + for (size_t i = 0; i < std::min(timeErrors.size(), size_t(10)); ++i) { + if (i > 0) + oss << ", "; + oss << timeErrors[i]; + } + if (timeErrors.size() > 10) + oss << ", ..."; + oss << "]"; + errors.push_back(oss.str()); + } + + if (!energyErrors.empty()) { + hasErrors = true; + std::ostringstream oss; + oss << "Column 'Ex_field_energy': " << energyErrors.size() << " value(s) outside tolerance " + << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxEnergyDiff + << ") at rows ["; + for (size_t i = 0; i < std::min(energyErrors.size(), size_t(10)); ++i) { + if (i > 0) + oss << ", "; + oss << energyErrors[i]; + } + if (energyErrors.size() > 10) + oss << ", ..."; + oss << "]"; + errors.push_back(oss.str()); + } + + if (!normErrors.empty()) { + hasErrors = true; + std::ostringstream oss; + oss << "Column 'Ex_max_norm': " << normErrors.size() << " value(s) outside tolerance " + << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxNormDiff + << ") at rows ["; + for (size_t i = 0; i < std::min(normErrors.size(), size_t(10)); ++i) { + if (i > 0) + oss << ", "; + oss << normErrors[i]; + } + if (normErrors.size() > 10) + oss << ", ..."; + oss << "]"; + errors.push_back(oss.str()); + } + + return !hasErrors; +} + +int main(int argc, char* argv[]) { + // Default values + std::string outputCSV = "data/FieldLandau_*_manager.csv"; + std::string referenceCSV = "../../alpine/validation/FieldLandau_valid_result.csv"; + double tolerance = 1e-7; + + // Parse command line arguments + if (argc > 1) { + if (std::string(argv[1]) == "--help" || std::string(argv[1]) == "-h") { + std::cout << "LandauDamping Correctness Validation Test\n\n" + << "Usage: " << argv[0] << " [output_csv] [reference_csv] [tolerance]\n\n" + << "Arguments:\n" + << " output_csv : Path to generated CSV file (default: " + "data/FieldLandau_*_manager.csv)\n" + << " reference_csv : Path to reference CSV file (default: " + "../../alpine/validation/FieldLandau_valid_result.csv)\n" + << " tolerance : Absolute tolerance for comparison (default: 1e-7)\n\n" + << "Exit codes:\n" + << " 0 : PASS - Output matches reference within tolerance\n" + << " 1 : FAIL - Output does not match reference\n" + << " 2 : ERROR - File I/O or parsing error\n\n" + << "Examples:\n" + << " " << argv[0] << "\n" + << " " << argv[0] << " data/FieldLandau_2_manager.csv\n" + << " " << argv[0] << " output.csv reference.csv 1e-6\n"; + return 0; + } + outputCSV = argv[1]; + } + if (argc > 2) { + referenceCSV = argv[2]; + } + if (argc > 3) { + tolerance = std::atof(argv[3]); + } + + // Find output file if pattern given + outputCSV = findCSVFile(outputCSV); + + std::cout << "LandauDamping Correctness Validation\n"; + std::cout << "=====================================\n"; + std::cout << "Output file: " << outputCSV << "\n"; + std::cout << "Reference file: " << referenceCSV << "\n"; + std::cout << "Tolerance: " << std::scientific << tolerance << "\n"; + std::cout << std::endl; + + // Load output CSV + std::vector outputData; + bool outputHasMaxNorm = false; + std::string errorMsg; + + if (!loadCSV(outputCSV, outputData, outputHasMaxNorm, errorMsg)) { + std::cerr << "✗ ERROR: Failed to load output CSV\n"; + std::cerr << " " << errorMsg << std::endl; + return 2; + } + + std::cout << "Loaded output: " << outputData.size() << " rows"; + if (outputHasMaxNorm) + std::cout << " (with Ex_max_norm column)"; + std::cout << std::endl; + + // Load reference CSV + std::vector referenceData; + bool refHasMaxNorm = false; + + if (!loadCSV(referenceCSV, referenceData, refHasMaxNorm, errorMsg)) { + std::cerr << "✗ ERROR: Failed to load reference CSV\n"; + std::cerr << " " << errorMsg << std::endl; + return 2; + } + + std::cout << "Loaded reference: " << referenceData.size() << " rows"; + if (refHasMaxNorm) + std::cout << " (with Ex_max_norm column)"; + std::cout << std::endl; + std::cout << std::endl; + + // Compare data + std::vector errors; + bool passed = compareCSVData(outputData, referenceData, outputHasMaxNorm, refHasMaxNorm, + tolerance, errors); + + if (passed) { + std::cout << "✓ PASS: Output matches reference within tolerance" << std::endl; + return 0; + } else { + std::cout << "✗ FAIL: Output does not match reference" << std::endl; + for (const auto& error : errors) { + std::cout << " - " << error << std::endl; + } + return 1; + } +} diff --git a/ci/cscs/common.yml b/ci/cscs/common.yml index 8439b1afd..aab7f3bef 100644 --- a/ci/cscs/common.yml +++ b/ci/cscs/common.yml @@ -6,7 +6,9 @@ include: - echo "CI_PROJECT_URL=$CI_PROJECT_URL" - echo "CI_COMMIT_SHORT_SHA=$CI_COMMIT_SHORT_SHA" - | - if [[ "$CI_COMMIT_REF_NAME" =~ pr([0-9]+)$ ]]; then + if [[ "$CI_COMMIT_REF_NAME" =~ gh-readonly-queue.*-pr-([0-9]+)- ]]; then + export CDASH_LABEL="Merge-Queue-PR-${BASH_REMATCH[1]}" + elif [[ "$CI_COMMIT_REF_NAME" =~ pr([0-9]+)$ ]]; then export CDASH_LABEL="PR-${BASH_REMATCH[1]}" else export CDASH_LABEL="$CI_COMMIT_REF_SLUG" diff --git a/ci/cscs/cuda/build_sm90.yml b/ci/cscs/cuda/build_sm90.yml index b54ffdbb4..6175f92c5 100644 --- a/ci/cscs/cuda/build_sm90.yml +++ b/ci/cscs/cuda/build_sm90.yml @@ -27,17 +27,18 @@ variables: - env | sort | grep CI script: - >- - ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake + ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake -DCTEST_SITE="$CTEST_SITE" - -DPRESET=alps-gh200 - -DCDASH_LABEL=$CDASH_LABEL + -DPRESET=alps-gh200 + -DCDASH_LABEL=$CDASH_LABEL -DBUILD_TYPE=$BUILD_TYPE -DBUILD_DIR=$BUILD_PATH - -DBUILD_ARCH=$BUILD_ARCH - -DKokkos_VERSION=git.4.7.02 - -DKokkos_ARCH_FLAG=Kokkos_ARCH_HOPPER90 - -DMPIEXEC_EXECUTABLE=/usr/bin/srun - -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" + -DBUILD_ARCH=$BUILD_ARCH + -DHeffte_VERSION=git.v2.4.1 + -DKokkos_VERSION=git.4.7.02 + -DKokkos_ARCH_FLAG=Kokkos_ARCH_HOPPER90 + -DMPIEXEC_EXECUTABLE=/usr/bin/srun + -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" -DMPIEXEC_MAX_NUMPROCS=4 - echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1) - find $BUILD_PATH -name \*.o -delete diff --git a/ci/cscs/dashboard-configure-build.cmake b/ci/cscs/dashboard-configure-build.cmake index b12f67b12..ed460f165 100644 --- a/ci/cscs/dashboard-configure-build.cmake +++ b/ci/cscs/dashboard-configure-build.cmake @@ -50,7 +50,12 @@ set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_EXECUTABLE=${M set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_PREFLAGS=${MPIEXEC_PREFLAGS}") set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_MAX_NUMPROCS=${MPIEXEC_MAX_NUMPROCS}") -set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DKokkos_VERSION=${Kokkos_VERSION}") +if(DEFINED Heffte_VERSION) + set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DHeffte_VERSION=${Heffte_VERSION}") +endif() +if(DEFINED Kokkos_VERSION) + set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DKokkos_VERSION=${Kokkos_VERSION}") +endif() set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -D${Kokkos_ARCH_FLAG}=ON") # --- configure & build --- diff --git a/ci/cscs/openmp/build_openmp.yml b/ci/cscs/openmp/build_openmp.yml index ff4896956..4387273f2 100644 --- a/ci/cscs/openmp/build_openmp.yml +++ b/ci/cscs/openmp/build_openmp.yml @@ -28,19 +28,19 @@ variables: - export CXX=g++ script: - >- - ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake + ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake -DCTEST_SITE="$CTEST_SITE" - -DPRESET=release-testing - -DCDASH_LABEL=$CDASH_LABEL - -DBUILD_TYPE=$BUILD_TYPE + -DPRESET=release-testing + -DCDASH_LABEL=$CDASH_LABEL + -DBUILD_TYPE=$BUILD_TYPE -DBUILD_DIR=$BUILD_PATH - -DBUILD_ARCH=$BUILD_ARCH + -DBUILD_ARCH=$BUILD_ARCH -DIPPL_PLATFORMS=OPENMP -DIPPL_OPENMP_THREADS=32 - -DKokkos_VERSION=git.4.7.02 - -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU - -DMPIEXEC_EXECUTABLE=/usr/bin/srun - -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" + -DKokkos_VERSION=git.4.7.02 + -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU + -DMPIEXEC_EXECUTABLE=/usr/bin/srun + -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" -DMPIEXEC_MAX_NUMPROCS=4 - echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1) - find $BUILD_PATH -name \*.o -delete diff --git a/ci/cscs/rocm/build_rocm-6.3.yml b/ci/cscs/rocm/build_rocm-6.3.yml index d09a7de6f..4735f8fa8 100644 --- a/ci/cscs/rocm/build_rocm-6.3.yml +++ b/ci/cscs/rocm/build_rocm-6.3.yml @@ -32,17 +32,17 @@ variables: - export CMAKE_PREFIX_PATH=$HSA_PATH:$CMAKE_PREFIX_PATH script: - >- - ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake + ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake -DCTEST_SITE="$CTEST_SITE" - -DPRESET=alps-mi300 - -DCDASH_LABEL=$CDASH_LABEL + -DPRESET=alps-mi300 + -DCDASH_LABEL=$CDASH_LABEL -DBUILD_TYPE=$BUILD_TYPE -DBUILD_DIR=$BUILD_PATH - -DBUILD_ARCH=$BUILD_ARCH - -DKokkos_VERSION=git.4.7.02 - -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU - -DMPIEXEC_EXECUTABLE=/usr/bin/srun - -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" + -DBUILD_ARCH=$BUILD_ARCH + -DKokkos_VERSION=git.4.7.02 + -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU + -DMPIEXEC_EXECUTABLE=/usr/bin/srun + -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" -DMPIEXEC_MAX_NUMPROCS=4 # -DHeffte_ENABLE_GPU_AWARE_MPI=OFF - echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1) From 212997fc11bd14e35ddac6cb34492192652d1acf Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 12 Nov 2025 15:37:05 +0000 Subject: [PATCH 02/13] Automatically set UENV in scripts if using alps --- scripts/CMakeLists.txt | 14 ++++++++++++++ .../strong-scaling-alps/jobscript-gh200.slurm | 4 ++-- .../strong-scaling-alps/jobscript-mi300.slurm | 4 ++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 4731c2f76..cad844a9d 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -9,6 +9,20 @@ message(STATUS "IPPL_MACHINENAME for script generation is: ${IPPL_MACHINENAME}") # ------------------------------------------------------------------------------ set(IPPL_JOB_SUBMISSION_ACCOUNT "c41" CACHE STRING "Account to use for job submission templates") +# populate IPPL_SCRIPTS_UENV from environment variable UENV_MOUNT_LIST (if present) +if(DEFINED ENV{UENV_MOUNT_LIST}) + set(IPPL_SCRIPTS_UENV "$ENV{UENV_MOUNT_LIST}" + CACHE STRING "UENV to use on alps when running scripts mount") + set(IPPL_SCRIPTS_UENV_VIEW "default" CACHE STRING "View to set in uenv") + colour_message(STATUS ${LightBlue} + "IPPL_SCRIPTS_UENV set from UENV_MOUNT_LIST: ${IPPL_SCRIPTS_UENV}") +else() + # Remove any cached and normal definitions so the variable is completely unset + unset(IPPL_SCRIPTS_UENV CACHE) + unset(IPPL_SCRIPTS_UENV_VIEW CACHE) + colour_message(STATUS ${LightBlue} "No UENV detected") +endif() + # ------------------------------------------------------------------------------ # utility function to get target path/name since we can't use generator expressions to set variables # directly diff --git a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm index 4c40a5bc2..14542f1f3 100644 --- a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm +++ b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm @@ -12,8 +12,8 @@ #SBATCH --cpus-per-task=72 #SBATCH --exclusive -#SBATCH --uenv=/capstor/store/cscs/cscs/public/uenvs/opal-x-gh200-mpich-gcc-2025-09-28.squashfs -#SBATCH --view=develop +#SBATCH --uenv=@IPPL_SCRIPTS_UENV@ +#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@ #SBATCH --output=landau__n_.out #SBATCH --error=landau__n_.error diff --git a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm index de29cc9dc..1bd459192 100644 --- a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm +++ b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm @@ -13,8 +13,8 @@ #SBATCH --cpus-per-task=48 #SBATCH --exclusive -#SBATCH --uenv=/capstor/scratch/cscs/biddisco/opal-x-mi300-mpich-gcc-2025-10-27.squashfs -#SBATCH --view=default +#SBATCH --uenv=@IPPL_SCRIPTS_UENV@ +#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@ #SBATCH --output=landau__n_.out #SBATCH --error=landau__n_.error From ee748fdcab364042514917e0f09230fe71b1196c Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Fri, 12 Dec 2025 15:52:24 +0100 Subject: [PATCH 03/13] Add option for IPC ON/OFF in job script for gh200 testing --- .../strong-scaling-alps/jobscript-gh200.slurm | 15 ++++++++++++--- .../landau/strong-scaling-alps/wrapper-gh200.sh | 10 ++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm index 14542f1f3..338c7f682 100644 --- a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm +++ b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm @@ -41,10 +41,9 @@ export JOB_DIR=job_dir export EXE=@LANDAU_BINARY@ export WRAPPER=@PROJECT_BINARY_DIR@/@JOB_SCRIPTS_PATH@/@JOB_WRAPPER_SCRIPT@ -mkdir -p data export OMP_PROC_BIND=spread export OMP_PLACES=threads -export OMP_NUM_THREADS=72 +export OMP_NUM_THREADS=2 # -------------------- # cray-mpich debug putput @@ -62,4 +61,14 @@ echo "N Particles: $nparticles" echo "Iterations: $iterations" # -------------------- -srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10 +mkdir -p ipc1 +pushd ipc1 +mkdir -p data +IPC_ON=1 srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10 +popd + +mkdir -p ipc0 +pushd ipc0 +mkdir -p data +IPC_ON=0 srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10 +popd diff --git a/scripts/landau/strong-scaling-alps/wrapper-gh200.sh b/scripts/landau/strong-scaling-alps/wrapper-gh200.sh index b8a02caab..6522117af 100755 --- a/scripts/landau/strong-scaling-alps/wrapper-gh200.sh +++ b/scripts/landau/strong-scaling-alps/wrapper-gh200.sh @@ -88,9 +88,15 @@ export CUDA_VISIBLE_DEVICES=$gpu # --------------- # cray-mpich : see https://cpe.ext.hpe.com/docs/24.03/mpt/mpich/intro_mpi.html#general-mpich-environment-variables # --------------- -export MPICH_GPU_SUPPORT_ENABLED=1 +# profiling and debugging options #export MPICH_OFI_CXI_COUNTER_REPORT=1 -export MPICH_GPU_IPC_ENABLED=0 + +# gpu support +export MPICH_GPU_IPC_ENABLED=1 +export MPICH_GPU_IPC_CACHE_MAX_SIZE=256 +if [ "$IPC_ON" -eq "0" ]; then + export MPICH_GPU_IPC_ENABLED=0 +fi # --------------- # OpenMPI mappings for MCA variables From 9fc8683e71f2dd8cc61c56bf312b7021c4e1d9f1 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 12 Nov 2025 15:48:49 +0000 Subject: [PATCH 04/13] Remove use of "-ffile-prefix-map" in CMake as it interferes with gdb --- CMakeLists.txt | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 737c30386..54c104885 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,21 +99,6 @@ if(DEFINED USE_ALTERNATIVE_VARIANT) set(IPPL_USE_ALTERNATIVE_VARIANT ${USE_ALTERNATIVE_VARIANT} CACHE BOOL "" FORCE) endif() -# ------------------------------------------------------------------------------ -# Debug: This tells the compiler to replace occurrences of ${} with in debug info and error -# messages. -# ------------------------------------------------------------------------------ -add_compile_options( - $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.> - $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.>) - -if(DEFINED FETCHCONTENT_BASE_DIR) - add_compile_options( - $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p> - $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p> - ) -endif() - # ------------------------------------------------------------------------------ # Define sources for project # ------------------------------------------------------------------------------ From a1db0515bc72273463b725a161f7901d8f956ad7 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Tue, 28 Oct 2025 13:18:22 +0100 Subject: [PATCH 05/13] Add startup "attach debugger" option to LandauDamping test --- src/Ippl.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Ippl.cpp b/src/Ippl.cpp index 3892e4ae9..fc9563535 100644 --- a/src/Ippl.cpp +++ b/src/Ippl.cpp @@ -71,6 +71,14 @@ namespace ippl { } auto factor = detail::getNumericalOption(argv[nargs]); Comm->setDefaultOverallocation(factor); + } else if (detail::checkOption(argv[nargs], "--debug", "-g")) { + ++nargs; + if (Comm->rank() == 0) { + std::cout << "Please attach debugger and hit return" << std::endl; + char c; + std::cin >> c; + } + Comm->barrier(); } else if (nargs > 0 && std::strstr(argv[nargs], "--kokkos") == nullptr) { notparsed.push_back(argv[nargs]); } From 17e126ac4751ea883860b60e1c8749b45e7e7fd2 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Sat, 6 Dec 2025 22:52:45 +0100 Subject: [PATCH 06/13] Add support for spdlog logging (and fmt formatting) --- CMakeLists.txt | 13 ++++++++ alpine/LandauDamping.cpp | 9 ++++-- cmake/Dependencies.cmake | 17 ++++++++++ src/CMakeLists.txt | 8 +++++ src/Utility/Logging.h | 39 ++++++++++++++++++++++ src/Utility/PrintType.h | 70 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 src/Utility/Logging.h create mode 100644 src/Utility/PrintType.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 54c104885..8a7d1a73c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,19 @@ option(IPPL_MARK_FAILING_TESTS "Prefix names of tests that are known to fail with 'known_fail' for filtering with ctest" OFF) option(IPPL_ENABLE_SCRIPTS "Generate job script templates for some benchmarks/tests" OFF) +option(IPPL_GPU_AWARE_MPI "Allow MPI to/from from device memory buffers" OFF) +# logging options +set(IPPL_LOG_LEVEL "off" CACHE STRING "Enable logging for messages of >=level") +set_property( + CACHE IPPL_LOG_LEVEL + PROPERTY STRINGS + off + trace + debug + info + warn + error + critical) # "Build IPPL as a shared library (ON) or static library (OFF)" OFF) if(IPPL_DYL) # set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) message(WARNING "IPPL_DYL is deprecated; use diff --git a/alpine/LandauDamping.cpp b/alpine/LandauDamping.cpp index 718672513..dc8cadfed 100644 --- a/alpine/LandauDamping.cpp +++ b/alpine/LandauDamping.cpp @@ -37,17 +37,22 @@ const char* TestName = "LandauDamping"; #include "Manager/datatypes.h" #include "Utility/IpplTimings.h" +#include "Utility/Logging.h" #include "LandauDampingManager.h" #include "Manager/PicManager.h" int main(int argc, char* argv[]) { +#if defined(SPDLOG_ACTIVE_LEVEL) && (SPDLOG_ACTIVE_LEVEL != SPDLOG_LEVEL_OFF) + spdlog::set_pattern("[%^%-8l%$]%t| %v"); + spdlog::set_level(spdlog::level::trace); +#endif ippl::initialize(argc, argv); { Inform msg(TestName); Inform msg2all(TestName, INFORM_ALL_NODES); - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); static IpplTimings::TimerRef initializeTimer = IpplTimings::getTimer("initialize"); IpplTimings::startTimer(mainTimer); IpplTimings::startTimer(initializeTimer); @@ -82,7 +87,7 @@ int main(int argc, char* argv[]) { manager.pre_run(); IpplTimings::stopTimer(initializeTimer); - + manager.setTime(0.0); msg << "Starting iterations ..." << endl; diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 70bb39314..704d87a8b 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -44,6 +44,23 @@ if("OPENMP" IN_LIST IPPL_PLATFORMS) colour_message(STATUS ${Green} "✅ OpenMP platform requested OpenMP found ${OPENMP_VERSION}") endif() +# ------------------------------------------------------------------------------ +# spdlog logging library +# ------------------------------------------------------------------------------ +string(TOUPPER ${IPPL_LOG_LEVEL} IPPL_LOG_LEVEL_UPPERCASE) +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + find_package(spdlog REQUIRED) + colour_message(STATUS ${Green} "✅ spdlog found ${spdlog_VERSION}") +endif() + +# ------------------------------------------------------------------------------ +# fmt library (for formatting nice log messages) +# ------------------------------------------------------------------------------ +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + find_package(fmt REQUIRED) + colour_message(STATUS ${Green} "✅ fmt found ${fmt_VERSION}") +endif() + # ------------------------------------------------------------------------------ # Utility function to clear a list of vars one by one # ------------------------------------------------------------------------------ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c4778b174..51cfb879b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,14 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/IpplVersions.h.in add_library(ippl) +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + target_compile_definitions(ippl PUBLIC IPPL_LOGGING_ENABLED=1) + target_compile_definitions(ippl + PUBLIC SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${IPPL_LOG_LEVEL_UPPERCASE}) + target_link_libraries(ippl PUBLIC spdlog::spdlog $<$:ws2_32>) + target_link_libraries(ippl PUBLIC fmt::fmt) +endif() + target_compile_features(ippl PUBLIC cxx_std_20) target_compile_options( diff --git a/src/Utility/Logging.h b/src/Utility/Logging.h new file mode 100644 index 000000000..99bc2d08b --- /dev/null +++ b/src/Utility/Logging.h @@ -0,0 +1,39 @@ +#pragma once + +#if defined(IPPL_LOGGING_ENABLED) + +#include +#include +#include +#include +#include +#include "Utility/PrintType.h" + +template +struct scoped_var { + // capture tuple elements by reference - no temp vars in constructor please + std::tuple const message_; + // + explicit scoped_var(Args const&... args) + : message_(args...) // + { + SPDLOG_CRITICAL("SCOPE >> enter << {}", message_); + } + + ~scoped_var() { SPDLOG_CRITICAL("SCOPE << leave >> {}", message_); } +}; +#define SPDLOG_SCOPE(...) scoped_var scope(__VA_ARGS__); + +#else + +// In increasing level +#define SPDLOG_TRACE(...) +#define SPDLOG_DEBUG(...) +#define SPDLOG_INFO(...) +#define SPDLOG_WARN(...) +#define SPDLOG_ERROR(...) +#define SPDLOG_CRITICAL(...) +// +#define SPDLOG_SCOPE(...) + +#endif diff --git a/src/Utility/PrintType.h b/src/Utility/PrintType.h new file mode 100644 index 000000000..c250c3d8a --- /dev/null +++ b/src/Utility/PrintType.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include +#include +#include +#include + +// gcc and clang both provide this heaader +#if __has_include() +#include +using cxxabi_supported__ = std::true_type; +#else +using cxxabi_supported__ = std::false_type; +// create some dummmy function to make the compiler happy in the true_type instantiation +namespace abi { + template + char* __cxa_demangle(Ts... ts) { + return nullptr; + } +} // namespace abi +#endif + +// -------------------------------------------------------------------- +namespace ippl::debug::detail { + // default : use built-in typeid to get the best info we can + template + struct demangle_helper { + char const* type_id() const { return typeid(T).name(); } + }; + + // if available : demangle an arbitrary c++ type using gnu utility + template + struct demangle_helper { + demangle_helper() + : demangled_{abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr), + std::free} {} + + char const* type_id() const { return demangled_ ? demangled_.get() : typeid(T).name(); } + + private: + std::unique_ptr demangled_; + }; + + template + using cxx_type_id = demangle_helper; +} // namespace ippl::debug::detail + +// -------------------------------------------------------------------- +// print type information +// usage : std::cout << debug::print_type("separator") +// separator is appended if the number of types > 1 +// -------------------------------------------------------------------- +namespace ippl::debug { + template // print a single type + inline std::string print_type(char const* = "") { + return std::string(detail::cxx_type_id().type_id()); + } + + template <> // fallback for an empty type + inline std::string print_type<>(char const*) { + return "<>"; + } + + template // print a list of types + inline std::enable_if_t print_type(char const* delim = "") { + std::string temp(print_type()); + return temp + delim + print_type(delim); + } +} // namespace ippl::debug From 0df60b4de3621f9d4222d8b0d3845e6907038cc8 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 10 Dec 2025 14:51:17 +0100 Subject: [PATCH 07/13] Add logging to Timer start/stop --- src/Utility/IpplTimings.cpp | 44 ++++++++++++++++++++++--------------- src/Utility/IpplTimings.h | 16 ++++++++++++-- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/src/Utility/IpplTimings.cpp b/src/Utility/IpplTimings.cpp index 2648fe50f..8d75823fc 100644 --- a/src/Utility/IpplTimings.cpp +++ b/src/Utility/IpplTimings.cpp @@ -35,20 +35,22 @@ #ifdef IPPL_ENABLE_NSYS_PROFILER #include "nvtx3/nvToolsExt.h" -const uint32_t colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff }; -const int num_colors = sizeof(colors)/sizeof(uint32_t); -#define PUSH_RANGE(name,cid) { \ - int color_id = cid; \ - color_id = color_id%num_colors;\ - nvtxEventAttributes_t eventAttrib = {0}; \ - eventAttrib.version = NVTX_VERSION; \ - eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ - eventAttrib.colorType = NVTX_COLOR_ARGB; \ - eventAttrib.color = colors[color_id]; \ - eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ - eventAttrib.message.ascii = name; \ - nvtxRangePushEx(&eventAttrib); \ -} +const uint32_t colors[] = {0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, + 0xff00ffff, 0xffff0000, 0xffffffff}; +const int num_colors = sizeof(colors) / sizeof(uint32_t); +#define PUSH_RANGE(name, cid) \ + { \ + int color_id = cid; \ + color_id = color_id % num_colors; \ + nvtxEventAttributes_t eventAttrib = {0}; \ + eventAttrib.version = NVTX_VERSION; \ + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ + eventAttrib.colorType = NVTX_COLOR_ARGB; \ + eventAttrib.color = colors[color_id]; \ + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ + eventAttrib.message.ascii = name; \ + nvtxRangePushEx(&eventAttrib); \ + } #endif Timing* IpplTimings::instance = new Timing(); @@ -88,9 +90,9 @@ Timing::TimerRef Timing::getTimer(const char* nm) { void Timing::startTimer(TimerRef t) { if (t >= TimerList.size()) return; - #ifdef IPPL_ENABLE_NSYS_PROFILER +#ifdef IPPL_ENABLE_NSYS_PROFILER PUSH_RANGE(TimerList[t]->name.c_str(), (int)t); - #endif +#endif TimerList[t]->start(); } @@ -98,9 +100,9 @@ void Timing::startTimer(TimerRef t) { void Timing::stopTimer(TimerRef t) { if (t >= TimerList.size()) return; - #ifdef IPPL_ENABLE_NSYS_PROFILER +#ifdef IPPL_ENABLE_NSYS_PROFILER nvtxRangePop(); - #endif +#endif TimerList[t]->stop(); } @@ -111,6 +113,12 @@ void Timing::clearTimer(TimerRef t) { TimerList[t]->clear(); } +const std::string& Timing::timerName(TimerRef t) { + if (t >= TimerList.size()) + return EmptyName; + return TimerList[t]->name; +} + // print out the timing results void Timing::print() { if (TimerList.size() < 1) diff --git a/src/Utility/IpplTimings.h b/src/Utility/IpplTimings.h index aea1332cd..e2ea93c5b 100644 --- a/src/Utility/IpplTimings.h +++ b/src/Utility/IpplTimings.h @@ -31,6 +31,7 @@ #include #include +#include "Utility/Logging.h" #include "Utility/PAssert.h" #include "Utility/Timer.h" #include "Utility/my_auto_ptr.h" @@ -117,6 +118,9 @@ struct Timing { // clear a timer, by turning it off and throwing away its time void clearTimer(TimerRef); + // access the timer's name + const std::string& timerName(TimerRef); + // return a TimerInfo struct by asking for the name TimerInfo* infoTimer(const char* nm) { return TimerMap[std::string(nm)]; } @@ -136,6 +140,8 @@ struct Timing { // a map of timers, keyed by string TimerMap_t TimerMap; + + std::string EmptyName; }; class IpplTimings { @@ -150,10 +156,16 @@ class IpplTimings { static TimerRef getTimer(const char* nm) { return instance->getTimer(nm); } // start a timer - static void startTimer(TimerRef t) { instance->startTimer(t); } + static void startTimer(TimerRef t) { + SPDLOG_TRACE("Starting timer [{}]", instance->timerName(t)); + instance->startTimer(t); + } // stop a timer, and accumulate it's values - static void stopTimer(TimerRef t) { instance->stopTimer(t); } + static void stopTimer(TimerRef t) { + SPDLOG_TRACE("Stopping timer [{}]", instance->timerName(t)); + instance->stopTimer(t); + } // clear a timer, by turning it off and throwing away its time static void clearTimer(TimerRef t) { instance->clearTimer(t); } From 45ea40267829aefe4d820d68089f2d8223164a54 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Fri, 5 Dec 2025 17:09:45 +0100 Subject: [PATCH 08/13] Use a singleton BufferHandler to avoid duplication of buffers --- src/Communicate/Buffers.cpp | 4 +- src/Communicate/Buffers.hpp | 4 +- src/Communicate/Communicator.cpp | 88 ++++++++++++++----------- src/Communicate/Communicator.h | 7 +- src/Communicate/CommunicatorLogging.cpp | 2 +- 5 files changed, 60 insertions(+), 45 deletions(-) diff --git a/src/Communicate/Buffers.cpp b/src/Communicate/Buffers.cpp index 04cbb1808..026633fec 100644 --- a/src/Communicate/Buffers.cpp +++ b/src/Communicate/Buffers.cpp @@ -32,13 +32,13 @@ namespace ippl { } void Communicator::deleteAllBuffers() { - buffer_handlers_m.forAll([](BufferHandler&& bh) { + buffer_handlers_m->forAll([](BufferHandler&& bh) { bh.deleteAllBuffers(); }); } void Communicator::freeAllBuffers() { - buffer_handlers_m.forAll([](BufferHandler&& bh) { + buffer_handlers_m->forAll([](BufferHandler&& bh) { bh.freeAllBuffers(); }); } diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp index c08dd2a69..98c0b513f 100644 --- a/src/Communicate/Buffers.hpp +++ b/src/Communicate/Buffers.hpp @@ -26,7 +26,7 @@ namespace ippl { template Communicator::buffer_type Communicator::getBuffer(size_type size, double overallocation) { - auto& buffer_handler = buffer_handlers_m.get(); + auto& buffer_handler = buffer_handlers_m->get(); return buffer_handler.getBuffer(size * sizeof(T), std::max(overallocation, defaultOveralloc_m)); @@ -34,7 +34,7 @@ namespace ippl { template void Communicator::freeBuffer(Communicator::buffer_type buffer) { - auto& buffer_handler = buffer_handlers_m.get(); + auto& buffer_handler = buffer_handlers_m->get(); buffer_handler.freeBuffer(buffer); } diff --git a/src/Communicate/Communicator.cpp b/src/Communicate/Communicator.cpp index 0ade37738..55d601715 100644 --- a/src/Communicate/Communicator.cpp +++ b/src/Communicate/Communicator.cpp @@ -1,42 +1,54 @@ #include "Communicate/Communicator.h" -namespace ippl { - namespace mpi { - - Communicator::Communicator() - : comm_m(new MPI_Comm(MPI_COMM_WORLD)) { - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - } - - Communicator::Communicator(MPI_Comm comm) { - comm_m = std::make_shared(comm); - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - } - - Communicator& Communicator::operator=(MPI_Comm comm) { - comm_m = std::make_shared(comm); - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - return *this; - } - - Communicator Communicator::Communicator::split(int color, int key) const { - MPI_Comm newcomm; - MPI_Comm_split(*comm_m, color, key, &newcomm); - return Communicator(newcomm); - } - - void Communicator::probe(int source, int tag, Status& status) { - MPI_Probe(source, tag, *comm_m, status); - } - - bool Communicator::iprobe(int source, int tag, Status& status) { - int flag = 0; - MPI_Iprobe(source, tag, *comm_m, &flag, status); - return (flag != 0); +namespace ippl::mpi { + + Communicator::Communicator() + : buffer_handlers_m(get_buffer_handler_instance()) + , comm_m(new MPI_Comm(MPI_COMM_WORLD)) { + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + } + + Communicator::Communicator(MPI_Comm comm) { + buffer_handlers_m = get_buffer_handler_instance(); + comm_m = std::make_shared(comm); + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + } + + Communicator& Communicator::operator=(MPI_Comm comm) { + buffer_handlers_m = get_buffer_handler_instance(); + comm_m = std::make_shared(comm); + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + return *this; + } + + Communicator Communicator::Communicator::split(int color, int key) const { + MPI_Comm newcomm; + MPI_Comm_split(*comm_m, color, key, &newcomm); + return Communicator(newcomm); + } + + void Communicator::probe(int source, int tag, Status& status) { + MPI_Probe(source, tag, *comm_m, status); + } + + bool Communicator::iprobe(int source, int tag, Status& status) { + int flag = 0; + MPI_Iprobe(source, tag, *comm_m, &flag, status); + return (flag != 0); + } + + // --------------------------------------- + // singleton access to buffer manager + // --------------------------------------- + std::shared_ptr Communicator::get_buffer_handler_instance() { + static std::shared_ptr comm_buff_handler_ptr{nullptr}; + if (comm_buff_handler_ptr == nullptr) { + comm_buff_handler_ptr = std::make_shared(); } - } // namespace mpi -} // namespace ippl + return comm_buff_handler_ptr; + } +} // namespace ippl::mpi diff --git a/src/Communicate/Communicator.h b/src/Communicate/Communicator.h index 63886368a..1b2490e62 100644 --- a/src/Communicate/Communicator.h +++ b/src/Communicate/Communicator.h @@ -201,8 +201,7 @@ namespace ippl { std::vector gatherLogsFromAllRanks(const std::vector& localLogs); void writeLogsToFile(const std::vector& allLogs, const std::string& filename); - buffer_handler_type buffer_handlers_m; - + std::shared_ptr buffer_handlers_m; double defaultOveralloc_m = 1.0; ///////////////////////////////////////////////////////////////////////////////////// @@ -211,7 +210,11 @@ namespace ippl { std::shared_ptr comm_m; int size_m; int rank_m; + + public: + std::shared_ptr get_buffer_handler_instance(); }; + } // namespace mpi } // namespace ippl diff --git a/src/Communicate/CommunicatorLogging.cpp b/src/Communicate/CommunicatorLogging.cpp index 92e821ebd..fbaf8357b 100644 --- a/src/Communicate/CommunicatorLogging.cpp +++ b/src/Communicate/CommunicatorLogging.cpp @@ -28,7 +28,7 @@ namespace ippl { std::vector Communicator::gatherLocalLogs() { std::vector localLogs; - buffer_handlers_m.forAll([&](auto& loggingHandler) { + buffer_handlers_m->forAll([&](auto& loggingHandler) { const auto& logs = loggingHandler.getLogs(); localLogs.insert(localLogs.end(), logs.begin(), logs.end()); }); From b21f1942ac4ba696735bf4bf95dac05f9046b444 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Fri, 5 Dec 2025 23:53:57 +0100 Subject: [PATCH 09/13] Make DefaultBufferHandler default instead of LoggingBufferHandler --- src/Communicate/Communicator.h | 2 +- src/Communicate/CommunicatorLogging.cpp | 167 ++++++++++++------------ 2 files changed, 86 insertions(+), 83 deletions(-) diff --git a/src/Communicate/Communicator.h b/src/Communicate/Communicator.h index 1b2490e62..9819ab3bd 100644 --- a/src/Communicate/Communicator.h +++ b/src/Communicate/Communicator.h @@ -135,7 +135,7 @@ namespace ippl { private: template - using buffer_container_type = LoggingBufferHandler; + using buffer_container_type = DefaultBufferHandler; using buffer_handler_type = typename detail::ContainerForAllSpaces::type; diff --git a/src/Communicate/CommunicatorLogging.cpp b/src/Communicate/CommunicatorLogging.cpp index fbaf8357b..ff301e60c 100644 --- a/src/Communicate/CommunicatorLogging.cpp +++ b/src/Communicate/CommunicatorLogging.cpp @@ -8,117 +8,120 @@ #include "Communicate/Communicator.h" #include "Communicate/LogEntry.h" -namespace ippl { - namespace mpi { - void Communicator::printLogs(const std::string& filename) { - std::vector localLogs = gatherLocalLogs(); - - std::vector allLogs; - if (rank() == 0) { - allLogs = gatherLogsFromAllRanks(localLogs); - } else { - sendLogsToRank0(localLogs); - } +namespace ippl::mpi { + void Communicator::printLogs(const std::string& filename) { + std::vector localLogs = gatherLocalLogs(); + + std::vector allLogs; + if (rank() == 0) { + allLogs = gatherLogsFromAllRanks(localLogs); + } else { + sendLogsToRank0(localLogs); + } - if (rank() == 0) { - writeLogsToFile(allLogs, filename); - } + if (rank() == 0) { + writeLogsToFile(allLogs, filename); } + } + + template + struct is_a_logger : std::false_type {}; - std::vector Communicator::gatherLocalLogs() { - std::vector localLogs; + template + struct is_a_logger > : std::true_type {}; + std::vector Communicator::gatherLocalLogs() { + std::vector localLogs; + if constexpr (is_a_logger::value) { buffer_handlers_m->forAll([&](auto& loggingHandler) { const auto& logs = loggingHandler.getLogs(); localLogs.insert(localLogs.end(), logs.begin(), logs.end()); }); - - return localLogs; } + return localLogs; + } - void Communicator::sendLogsToRank0(const std::vector& localLogs) { - std::vector buffer = serializeLogs(localLogs); - - int logSize = buffer.size(); + void Communicator::sendLogsToRank0(const std::vector& localLogs) { + std::vector buffer = serializeLogs(localLogs); - this->send(logSize, 1, 0, 0); - this->send(buffer.data(), logSize, 0, 0); - } + int logSize = buffer.size(); - std::vector Communicator::gatherLogsFromAllRanks( - const std::vector& localLogs) { - std::vector allLogs = localLogs; + this->send(logSize, 1, 0, 0); + this->send(buffer.data(), logSize, 0, 0); + } - for (int rank = 1; rank < size_m; ++rank) { - int logSize; - Status status; + std::vector Communicator::gatherLogsFromAllRanks( + const std::vector& localLogs) { + std::vector allLogs = localLogs; - this->recv(logSize, 1, rank, 0, status); + for (int rank = 1; rank < size_m; ++rank) { + int logSize; + Status status; - std::vector buffer(logSize); - this->recv(buffer.data(), logSize, rank, 0, status); + this->recv(logSize, 1, rank, 0, status); - std::vector deserializedLogs = deserializeLogs(buffer); - allLogs.insert(allLogs.end(), deserializedLogs.begin(), deserializedLogs.end()); - } + std::vector buffer(logSize); + this->recv(buffer.data(), logSize, rank, 0, status); - return allLogs; + std::vector deserializedLogs = deserializeLogs(buffer); + allLogs.insert(allLogs.end(), deserializedLogs.begin(), deserializedLogs.end()); } - std::vector serializeLogs(const std::vector& logs) { - std::vector buffer; + return allLogs; + } - for (const auto& logEntry : logs) { - std::vector serializedEntry = logEntry.serialize(); - buffer.insert(buffer.end(), serializedEntry.begin(), serializedEntry.end()); - } + std::vector serializeLogs(const std::vector& logs) { + std::vector buffer; - return buffer; + for (const auto& logEntry : logs) { + std::vector serializedEntry = logEntry.serialize(); + buffer.insert(buffer.end(), serializedEntry.begin(), serializedEntry.end()); } - std::vector deserializeLogs(const std::vector& buffer) { - std::vector logs; - size_t offset = 0; + return buffer; + } - while (offset < buffer.size()) { - LogEntry logEntry = LogEntry::deserialize(buffer, offset); + std::vector deserializeLogs(const std::vector& buffer) { + std::vector logs; + size_t offset = 0; - logs.push_back(logEntry); + while (offset < buffer.size()) { + LogEntry logEntry = LogEntry::deserialize(buffer, offset); - offset += logEntry.serialize().size(); - } - return logs; + logs.push_back(logEntry); + + offset += logEntry.serialize().size(); } + return logs; + } + + void Communicator::writeLogsToFile(const std::vector& allLogs, + const std::string& filename) { + Inform logFile(0, filename.c_str(), Inform::OVERWRITE, 0); + logFile.setOutputLevel(1); - void Communicator::writeLogsToFile(const std::vector& allLogs, - const std::string& filename) { - Inform logFile(0, filename.c_str(), Inform::OVERWRITE, 0); - logFile.setOutputLevel(1); - - logFile << "Timestamp,Method,Rank,MemorySpace,usedSize,FreeSize,Parameters" << endl; - - for (const auto& log : allLogs) { - auto timestamp = std::chrono::duration_cast( - log.timestamp.time_since_epoch()) - .count(); - - logFile << timestamp << "," << log.methodName << "," << log.rank << "," - << log.memorySpace << "," << log.usedSize << "," << log.freeSize; - - logFile << ",\""; - bool first = true; - for (const auto& [key, value] : log.parameters) { - if (!first) { - logFile << "; "; - } - logFile << key << ": " << value; - first = false; + logFile << "Timestamp,Method,Rank,MemorySpace,usedSize,FreeSize,Parameters" << endl; + + for (const auto& log : allLogs) { + auto timestamp = std::chrono::duration_cast( + log.timestamp.time_since_epoch()) + .count(); + + logFile << timestamp << "," << log.methodName << "," << log.rank << "," + << log.memorySpace << "," << log.usedSize << "," << log.freeSize; + + logFile << ",\""; + bool first = true; + for (const auto& [key, value] : log.parameters) { + if (!first) { + logFile << "; "; } - logFile << "\"" << endl; + logFile << key << ": " << value; + first = false; } - - logFile.flush(); + logFile << "\"" << endl; } - } // namespace mpi -} // namespace ippl + logFile.flush(); + } +} // namespace ippl::mpi From 7bdb540e33df5acb8cf1ff0d363b46314e76f0e3 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 8 Dec 2025 11:14:05 +0100 Subject: [PATCH 10/13] Abstract communication buffers away from plain kokkos::view's To improve messaging performance, we may wish to replace simple kokkos:views in buffer handling with an alternative implementation that allows for better alignment and caching. Additionally, code may leak communication buffers via direct use of kokkos views that are passed around from send/rec code into/out of fields or attributes. Placing buffers behind an API reduces the chance of this happening. --- src/Communicate/Archive.h | 12 +- src/Communicate/Archive.hpp | 28 +- src/Communicate/BufferHandler.h | 47 ++- src/Communicate/BufferHandler.hpp | 4 +- src/Communicate/Buffers.hpp | 31 +- src/Communicate/Collectives.hpp | 86 +++-- src/Communicate/Communicator.h | 296 +++++++++--------- src/Communicate/CommunicatorLogging.cpp | 4 +- src/Communicate/CommunicatorLogging.hpp | 10 +- src/Communicate/LoggingBufferHandler.h | 18 +- src/Communicate/LoggingBufferHandler.hpp | 6 +- src/Field/HaloCells.h | 5 +- src/Particle/ParticleAttrib.h | 15 +- src/Particle/ParticleAttribBase.h | 9 +- unit_tests/Communicate/BufferHandler.cpp | 15 +- .../Communicate/LoggingBufferHandler.cpp | 8 +- 16 files changed, 313 insertions(+), 281 deletions(-) diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h index 6ec74f946..93f99f2e0 100644 --- a/src/Communicate/Archive.h +++ b/src/Communicate/Archive.h @@ -21,14 +21,14 @@ namespace ippl { namespace detail { /*! * @file Archive.h - * Serialize and desesrialize particle attributes. + * Serialize and deserialize particle attributes. * @tparam Properties variadic template for Kokkos::View */ - template + template class Archive { public: - using buffer_type = typename ViewType::view_type; + using buffer_type = BufferType; using pointer_type = typename buffer_type::pointer_type; Archive(size_type size = 0); @@ -84,7 +84,9 @@ namespace ippl { void resizeBuffer(size_type size) { Kokkos::resize(buffer_m, size); } - void reallocBuffer(size_type size) { Kokkos::realloc(buffer_m, size); } + void reallocBuffer(size_type size) { + Kokkos::realloc(buffer_m, size); /*buffer_m.reallocBuffer(size);*/ + } void resetWritePos() { writepos_m = 0; } void resetReadPos() { readpos_m = 0; } @@ -97,7 +99,7 @@ namespace ippl { //! read position for deserialization size_type readpos_m; //! serialized data - buffer_type buffer_m; + BufferType buffer_m; }; } // namespace detail } // namespace ippl diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp index f1d097a69..d86d7ee8d 100644 --- a/src/Communicate/Archive.hpp +++ b/src/Communicate/Archive.hpp @@ -9,16 +9,16 @@ namespace ippl { namespace detail { - template - Archive::Archive(size_type size) + template + Archive::Archive(size_type size) : writepos_m(0) , readpos_m(0) , buffer_m("buffer", size) {} - template + template template - void Archive::serialize(const Kokkos::View& view, - size_type nsends) { + void Archive::serialize(const Kokkos::View& view, + size_type nsends) { using exec_space = typename Kokkos::View::execution_space; using policy_type = Kokkos::RangePolicy; @@ -32,10 +32,10 @@ namespace ippl { writepos_m += size * nsends; } - template + template template - void Archive::serialize( - const Kokkos::View*, ViewArgs...>& view, size_type nsends) { + void Archive::serialize(const Kokkos::View*, ViewArgs...>& view, + size_type nsends) { using exec_space = typename Kokkos::View::execution_space; size_t size = sizeof(T); @@ -58,10 +58,10 @@ namespace ippl { writepos_m += Dim * size * nsends; } - template + template template - void Archive::deserialize(Kokkos::View& view, - size_type nrecvs) { + void Archive::deserialize(Kokkos::View& view, + size_type nrecvs) { using exec_space = typename Kokkos::View::execution_space; using policy_type = Kokkos::RangePolicy; @@ -80,10 +80,10 @@ namespace ippl { readpos_m += size * nrecvs; } - template + template template - void Archive::deserialize(Kokkos::View*, ViewArgs...>& view, - size_type nrecvs) { + void Archive::deserialize(Kokkos::View*, ViewArgs...>& view, + size_type nrecvs) { using exec_space = typename Kokkos::View::execution_space; size_t size = sizeof(T); diff --git a/src/Communicate/BufferHandler.h b/src/Communicate/BufferHandler.h index 117c12f22..6ad36ff61 100644 --- a/src/Communicate/BufferHandler.h +++ b/src/Communicate/BufferHandler.h @@ -4,9 +4,33 @@ #include #include +#include "Types/IpplTypes.h" +#include "Types/ViewTypes.h" + +#include "Utility/TypeUtils.h" +#include "Utility/Logging.h" + #include "Communicate/Archive.h" -namespace ippl { +namespace ippl::comms { + + template + using communicator_storage = + ippl::detail::ViewType>::view_type; + + // --------------------------------------------- + // archive wrapper around some arbitrary buffer + template + struct rma_archive { + using type = detail::Archive; + }; + + template + using rma_archive_type = rma_archive::type; + + template + using archive_buffer = rma_archive_type>; /** * @brief Interface for memory buffer handling. @@ -17,11 +41,11 @@ namespace ippl { * * @tparam MemorySpace The memory space type used for buffer allocation. */ - template + template class BufferHandler { public: - using archive_type = ippl::detail::Archive; - using buffer_type = std::shared_ptr; + using archive_type = Buffer; + using buffer_type = std::shared_ptr; using size_type = ippl::detail::size_type; virtual ~BufferHandler() {} @@ -92,11 +116,12 @@ namespace ippl { * @tparam MemorySpace The memory space type for the buffer (e.g., `Kokkos::HostSpace`). */ template - class DefaultBufferHandler : public BufferHandler { + class DefaultBufferHandler : public BufferHandler, MemorySpace> { public: - using typename BufferHandler::archive_type; - using typename BufferHandler::buffer_type; - using typename BufferHandler::size_type; + using buffer_type = + typename BufferHandler, MemorySpace>::buffer_type; + using typename BufferHandler, MemorySpace>::archive_type; + using typename BufferHandler, MemorySpace>::size_type; ~DefaultBufferHandler() override; @@ -106,8 +131,8 @@ namespace ippl { * Requests a memory buffer of the specified size, with the option * to request a buffer larger than the base size by an overallocation * multiplier. If a sufficiently large buffer is available, it is returned. If not, the - * largest free buffer is reallocated. If there are no free buffers available, only then a - * new buffer is allocated. + * largest free buffer is reallocated. If there are no free buffers available, only then + * a new buffer is allocated. * * @param size The required buffer size. * @param overallocation A multiplier to allocate additional buffer space. @@ -163,7 +188,7 @@ namespace ippl { buffer_set_type free_buffers{ &DefaultBufferHandler::bufferSizeComparator}; ///< Set of free buffers }; -} // namespace ippl +} // namespace ippl::comms #include "Communicate/BufferHandler.hpp" diff --git a/src/Communicate/BufferHandler.hpp b/src/Communicate/BufferHandler.hpp index c6d57f0a3..6a585579d 100644 --- a/src/Communicate/BufferHandler.hpp +++ b/src/Communicate/BufferHandler.hpp @@ -1,7 +1,7 @@ #ifndef IPPL_BUFFER_HANDLER_HPP #define IPPL_BUFFER_HANDLER_HPP -namespace ippl { +namespace ippl::comms { template DefaultBufferHandler::~DefaultBufferHandler() {} @@ -143,6 +143,6 @@ namespace ippl { return newBuffer; } -} // namespace ippl +} // namespace ippl::comms #endif diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp index 98c0b513f..9732f9619 100644 --- a/src/Communicate/Buffers.hpp +++ b/src/Communicate/Buffers.hpp @@ -20,22 +20,33 @@ // exchanging particle data between ranks. // +#include "Utility/Logging.h" +#include "Utility/TypeUtils.h" + namespace ippl { namespace mpi { - template - Communicator::buffer_type Communicator::getBuffer(size_type size, - double overallocation) { - auto& buffer_handler = buffer_handlers_m->get(); + // ----------------------------------- + template + Communicator::buffer_type Communicator::getBuffer(size_type size, + double overallocation) { + using memory_space = BufferType::memory_space; - return buffer_handler.getBuffer(size * sizeof(T), - std::max(overallocation, defaultOveralloc_m)); - } + auto& buffer_handler = buffer_handlers_m->get(); - template - void Communicator::freeBuffer(Communicator::buffer_type buffer) { - auto& buffer_handler = buffer_handlers_m->get(); + auto b = buffer_handler.getBuffer(size * sizeof(T), + std::max(overallocation, defaultOveralloc_m)); + SPDLOG_TRACE("{}, getBuffer {}, buf, {}, size {}", (void*)this, + ippl::debug::print_type(), (void*)(b->getBuffer()), + size * sizeof(T)); + return b; + } + template + void Communicator::freeBuffer(Communicator::buffer_type buffer) { + using memory_space = BufferType::memory_space; + auto& buffer_handler = buffer_handlers_m->get(); + SPDLOG_TRACE("freeBuffer buf, {}", (void*)(buffer->getBuffer())); buffer_handler.freeBuffer(buffer); } diff --git a/src/Communicate/Collectives.hpp b/src/Communicate/Collectives.hpp index d273242ab..0ed2aa69b 100644 --- a/src/Communicate/Collectives.hpp +++ b/src/Communicate/Collectives.hpp @@ -2,62 +2,60 @@ #include "Communicate/Operations.h" -namespace ippl { - namespace mpi { - template - void Communicator::gather(const T* input, T* output, int count, int root) { - MPI_Datatype type = get_mpi_datatype(*input); +namespace ippl::mpi { + template + void Communicator::gather(const T* input, T* output, int count, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m); - } + MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m); + } - template - void Communicator::scatter(const T* input, T* output, int count, int root) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::scatter(const T* input, T* output, int count, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m); - } + MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m); + } - template - void Communicator::reduce(const T* input, T* output, int count, Op, int root) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::reduce(const T* input, T* output, int count, Op, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m); - } + MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m); + } - template - void Communicator::reduce(const T& input, T& output, int count, Op op, int root) { - reduce(&input, &output, count, op, root); - } + template + void Communicator::reduce(const T& input, T& output, int count, Op op, int root) { + reduce(&input, &output, count, op, root); + } - template - void Communicator::allreduce(const T* input, T* output, int count, Op) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::allreduce(const T* input, T* output, int count, Op) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m); - } + MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m); + } - template - void Communicator::allreduce(const T& input, T& output, int count, Op op) { - allreduce(&input, &output, count, op); - } + template + void Communicator::allreduce(const T& input, T& output, int count, Op op) { + allreduce(&input, &output, count, op); + } - template - void Communicator::allreduce(T* inout, int count, Op) { - MPI_Datatype type = get_mpi_datatype(*inout); + template + void Communicator::allreduce(T* inout, int count, Op) { + MPI_Datatype type = get_mpi_datatype(*inout); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m); - } + MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m); + } - template - void Communicator::allreduce(T& inout, int count, Op op) { - allreduce(&inout, count, op); - } - } // namespace mpi -} // namespace ippl + template + void Communicator::allreduce(T& inout, int count, Op op) { + allreduce(&inout, count, op); + } +} // namespace ippl::mpi diff --git a/src/Communicate/Communicator.h b/src/Communicate/Communicator.h index 9819ab3bd..6dc1ae506 100644 --- a/src/Communicate/Communicator.h +++ b/src/Communicate/Communicator.h @@ -5,218 +5,210 @@ #ifndef IPPL_MPI_COMMUNICATOR_H #define IPPL_MPI_COMMUNICATOR_H -#include -#include - -#include "Communicate/BufferHandler.h" -#include "Communicate/LoggingBufferHandler.h" -#include "Communicate/Request.h" -#include "Communicate/Status.h" - -//////////////////////////////////////////////// -// For message size check; see below #include #include +#include +#include #include "Utility/TypeUtils.h" #include "Communicate/Archive.h" +#include "Communicate/BufferHandler.h" +#include "Communicate/LogEntry.h" +#include "Communicate/Request.h" +#include "Communicate/Status.h" #include "Communicate/TagMaker.h" #include "Communicate/Tags.h" -//////////////////////////////////////////////////// -namespace ippl { - namespace mpi { - - class Communicator : public TagMaker { - public: - Communicator(); +//////////////////////////////////////////////////// - Communicator(MPI_Comm comm); +namespace ippl::mpi { - Communicator& operator=(MPI_Comm comm); + class Communicator : public TagMaker { + public: + Communicator(); - ~Communicator() = default; + Communicator(MPI_Comm comm); - Communicator split(int color, int key) const; + Communicator& operator=(MPI_Comm comm); - operator const MPI_Comm&() const noexcept { return *comm_m; } + ~Communicator() = default; - int size() const noexcept { return size_m; } + Communicator split(int color, int key) const; - int rank() const noexcept { return rank_m; } + operator const MPI_Comm&() const noexcept { return *comm_m; } - void barrier() { MPI_Barrier(*comm_m); } + int size() const noexcept { return size_m; } - void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); } + int rank() const noexcept { return rank_m; } - /* - * Blocking point-to-point communication - * - */ + void barrier() { MPI_Barrier(*comm_m); } - template - void send(const T& buffer, int count, int dest, int tag); + void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); } - template - void send(const T* buffer, int count, int dest, int tag); + /* + * Blocking point-to-point communication + * + */ - template - void recv(T& output, int count, int source, int tag, Status& status); + template + void send(const T& buffer, int count, int dest, int tag); - template - void recv(T* output, int count, int source, int tag, Status& status); + template + void send(const T* buffer, int count, int dest, int tag); - void probe(int source, int tag, Status& status); + template + void recv(T& output, int count, int source, int tag, Status& status); - /* - * Non-blocking point-to-point communication - * - */ + template + void recv(T* output, int count, int source, int tag, Status& status); - template - void isend(const T& buffer, int count, int dest, int tag, Request& request); + void probe(int source, int tag, Status& status); - template - void isend(const T* buffer, int count, int dest, int tag, Request& request); + /* + * Non-blocking point-to-point communication + * + */ - template - void irecv(T& buffer, int count, int source, int tag, Request& request); + template + void isend(const T& buffer, int count, int dest, int tag, Request& request); - template - void irecv(T* buffer, int count, int source, int tag, Request& request); + template + void isend(const T* buffer, int count, int dest, int tag, Request& request); - bool iprobe(int source, int tag, Status& status); + template + void irecv(T& buffer, int count, int source, int tag, Request& request); - /* - * Collective communication - */ + template + void irecv(T* buffer, int count, int source, int tag, Request& request); - /* Gather the data in the given source container from all other nodes to a - * specific node (default: 0). - */ - template - void gather(const T* input, T* output, int count, int root = 0); + bool iprobe(int source, int tag, Status& status); - /* Scatter the data from all other nodes to a - * specific node (default: 0). - */ - template - void scatter(const T* input, T* output, int count, int root = 0); + /* + * Collective communication + */ - /* Reduce data coming from all nodes to a specific node - * (default: 0). Apply certain operation - * - */ - template - void reduce(const T* input, T* output, int count, Op op, int root = 0); + /* Gather the data in the given source container from all other nodes to a + * specific node (default: 0). + */ + template + void gather(const T* input, T* output, int count, int root = 0); - template - void reduce(const T& input, T& output, int count, Op op, int root = 0); + /* Scatter the data from all other nodes to a + * specific node (default: 0). + */ + template + void scatter(const T* input, T* output, int count, int root = 0); - template - void allreduce(const T* input, T* output, int count, Op op); + /* Reduce data coming from all nodes to a specific node + * (default: 0). Apply certain operation + * + */ + template + void reduce(const T* input, T* output, int count, Op op, int root = 0); - template - void allreduce(const T& input, T& output, int count, Op op); + template + void reduce(const T& input, T& output, int count, Op op, int root = 0); - template - void allreduce(T* inout, int count, Op op); + template + void allreduce(const T* input, T* output, int count, Op op); - template - void allreduce(T& inout, int count, Op op); + template + void allreduce(const T& input, T& output, int count, Op op); - ///////////////////////////////////////////////////////////////////////////////////// - template - using archive_type = detail::Archive; + template + void allreduce(T* inout, int count, Op op); - template - using buffer_type = std::shared_ptr>; + template + void allreduce(T& inout, int count, Op op); - private: - template - using buffer_container_type = DefaultBufferHandler; + private: + template + using buffer_container_type = comms::DefaultBufferHandler; - using buffer_handler_type = - typename detail::ContainerForAllSpaces::type; + using buffer_handler_type = + typename detail::ContainerForAllSpaces::type; - public: - using size_type = detail::size_type; - double getDefaultOverallocation() const { return defaultOveralloc_m; } - void setDefaultOverallocation(double factor); + public: + template + using buffer_type = buffer_container_type::buffer_type; - template - buffer_type getBuffer(size_type size, double overallocation = 1.0); + public: + using size_type = detail::size_type; + double getDefaultOverallocation() const { return defaultOveralloc_m; } + void setDefaultOverallocation(double factor); - void deleteAllBuffers(); - void freeAllBuffers(); + template + buffer_type getBuffer(size_type size, double overallocation = 1.0); - template - void freeBuffer(buffer_type buffer); + void deleteAllBuffers(); + void freeAllBuffers(); - const MPI_Comm& getCommunicator() const noexcept { return *comm_m; } + template + void freeBuffer(buffer_type buffer); - template - void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize, - size_type nrecvs) { - // Temporary fix. MPI communication seems to have problems when the - // count argument exceeds the range of int, so large messages should - // be split into smaller messages - if (msize > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - MPI_Status status; - MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &status); + const MPI_Comm& getCommunicator() const noexcept { return *comm_m; } - buffer.deserialize(ar, nrecvs); + template + void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize, + size_type nrecvs) { + // Temporary fix. MPI communication seems to have problems when the + // count argument exceeds the range of int, so large messages should + // be split into smaller messages + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - - template - void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request, - size_type nsends) { - if (ar.getSize() > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - buffer.serialize(ar, nsends); - MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + MPI_Status status; + MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &status); + + buffer.deserialize(ar, nrecvs); + } + + template + void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request, + size_type nsends) { + if (ar.getSize() > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - - template - void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) { - if (msize > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - MPI_Irecv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &request); + buffer.serialize(ar, nsends); + MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + } + + template + void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) { + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } + MPI_Irecv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &request); + } - void printLogs(const std::string& filename); + void printLogs(const std::string& filename); - private: - std::vector gatherLocalLogs(); - void sendLogsToRank0(const std::vector& localLogs); - std::vector gatherLogsFromAllRanks(const std::vector& localLogs); - void writeLogsToFile(const std::vector& allLogs, const std::string& filename); + private: + std::vector gatherLocalLogs(); + void sendLogsToRank0(const std::vector& localLogs); + std::vector gatherLogsFromAllRanks(const std::vector& localLogs); + void writeLogsToFile(const std::vector& allLogs, const std::string& filename); - std::shared_ptr buffer_handlers_m; - double defaultOveralloc_m = 1.0; + std::shared_ptr buffer_handlers_m; + double defaultOveralloc_m = 1.0; - ///////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////// - protected: - std::shared_ptr comm_m; - int size_m; - int rank_m; + protected: + std::shared_ptr comm_m; + int size_m; + int rank_m; - public: - std::shared_ptr get_buffer_handler_instance(); - }; + public: + std::shared_ptr get_buffer_handler_instance(); + }; - } // namespace mpi -} // namespace ippl +} // namespace ippl::mpi #include "Communicate/Collectives.hpp" #include "Communicate/PointToPoint.hpp" diff --git a/src/Communicate/CommunicatorLogging.cpp b/src/Communicate/CommunicatorLogging.cpp index ff301e60c..3c3000695 100644 --- a/src/Communicate/CommunicatorLogging.cpp +++ b/src/Communicate/CommunicatorLogging.cpp @@ -6,7 +6,7 @@ #include "Utility/Inform.h" #include "Communicate/Communicator.h" -#include "Communicate/LogEntry.h" +#include "Communicate/LoggingBufferHandler.h" namespace ippl::mpi { void Communicator::printLogs(const std::string& filename) { @@ -28,7 +28,7 @@ namespace ippl::mpi { struct is_a_logger : std::false_type {}; template - struct is_a_logger > : std::true_type {}; + struct is_a_logger > : std::true_type {}; std::vector Communicator::gatherLocalLogs() { std::vector localLogs; diff --git a/src/Communicate/CommunicatorLogging.hpp b/src/Communicate/CommunicatorLogging.hpp index 5746c3df9..12caaa4d9 100644 --- a/src/Communicate/CommunicatorLogging.hpp +++ b/src/Communicate/CommunicatorLogging.hpp @@ -5,11 +5,9 @@ #include "Communicate/LogEntry.h" -namespace ippl { - namespace mpi { - std::vector serializeLogs(const std::vector& logs); - std::vector deserializeLogs(const std::vector& buffer); - } // namespace mpi -} // namespace ippl +namespace ippl::mpi { + std::vector serializeLogs(const std::vector& logs); + std::vector deserializeLogs(const std::vector& buffer); +} // namespace ippl::mpi #endif diff --git a/src/Communicate/LoggingBufferHandler.h b/src/Communicate/LoggingBufferHandler.h index f6a0bd5d6..fc3ad2eba 100644 --- a/src/Communicate/LoggingBufferHandler.h +++ b/src/Communicate/LoggingBufferHandler.h @@ -10,7 +10,7 @@ #include "Communicate/BufferHandler.h" #include "Communicate/LogEntry.h" -namespace ippl { +namespace ippl::comms { /** * @class LoggingBufferHandler @@ -28,10 +28,12 @@ namespace ippl { * Instead, it adds logging for monitoring purposes. */ template - class LoggingBufferHandler : public BufferHandler { + class LoggingBufferHandler : public BufferHandler, MemorySpace> { public: - using buffer_type = typename BufferHandler::buffer_type; - using size_type = typename BufferHandler::size_type; + using buffer_type = + typename BufferHandler, MemorySpace>::buffer_type; + using size_type = + typename BufferHandler, MemorySpace>::size_type; /** * @brief Constructs a LoggingBufferHandler with an existing buffer handler. @@ -39,7 +41,9 @@ namespace ippl { * operations. * @param rank The MPI rank for logging purposes, used to identify the source of logs. */ - LoggingBufferHandler(std::shared_ptr> handler, int rank); + LoggingBufferHandler( + std::shared_ptr, MemorySpace>> handler, + int rank); /** * @brief Default constructor, creates an internal `BufferHandler` for managing buffers. @@ -104,7 +108,7 @@ namespace ippl { const std::vector& getLogs() const; private: - std::shared_ptr> + std::shared_ptr, MemorySpace>> handler_m; ///< Internal handler for buffer management. std::vector logEntries_m; ///< Log entries for buffer operations. int rank_m; ///< MPI rank for identifying log sources. @@ -122,7 +126,7 @@ namespace ippl { void logMethod(const std::string& methodName, const std::map& parameters); }; -} // namespace ippl +} // namespace ippl::comms #include "Communicate/LoggingBufferHandler.hpp" diff --git a/src/Communicate/LoggingBufferHandler.hpp b/src/Communicate/LoggingBufferHandler.hpp index 33f4269cd..9517e1de9 100644 --- a/src/Communicate/LoggingBufferHandler.hpp +++ b/src/Communicate/LoggingBufferHandler.hpp @@ -4,11 +4,11 @@ #include #include -namespace ippl { +namespace ippl::comms { template LoggingBufferHandler::LoggingBufferHandler( - std::shared_ptr> handler, int rank) + std::shared_ptr, MemorySpace>> handler, int rank) : handler_m(std::move(handler)) , rank_m(rank) {} @@ -70,6 +70,6 @@ namespace ippl { std::chrono::high_resolution_clock::now()}); } -} // namespace ippl +} // namespace ippl::comms #endif diff --git a/src/Field/HaloCells.h b/src/Field/HaloCells.h index c4d87b9bd..c056177dd 100644 --- a/src/Field/HaloCells.h +++ b/src/Field/HaloCells.h @@ -22,7 +22,7 @@ namespace ippl { template struct FieldBufferData { using view_type = typename detail::ViewType::view_type; - using archive_type = Archive; + using archive_type = comms::archive_buffer; void serialize(archive_type& ar, size_type nsends) { ar.serialize(buffer, nsends); } @@ -139,7 +139,8 @@ namespace ippl { * unpack function call */ template - void exchangeBoundaries(view_type& view, Layout_t* layout, SendOrder order, int nghost = 1); + void exchangeBoundaries(view_type& view, Layout_t* layout, SendOrder order, + int nghost = 1); /*! * Extract the subview of the original data. This does not copy. diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index ca55ca81f..9ec60ceff 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -45,6 +45,7 @@ namespace ippl { using memory_space = typename view_type::memory_space; using execution_space = typename view_type::execution_space; + using archive_type = comms::archive_buffer; using size_type = detail::size_type; @@ -66,24 +67,22 @@ namespace ippl { void unpack(size_type) override; - void serialize(detail::Archive& ar, size_type nsends) override { - ar.serialize(buf_m, nsends); - } + void serialize(archive_type& ar, size_type nsends) override { ar.serialize(buf_m, nsends); } - void deserialize(detail::Archive& ar, size_type nrecvs) override { + void deserialize(archive_type& ar, size_type nrecvs) override { ar.deserialize(buf_m, nrecvs); } virtual ~ParticleAttrib() = default; - + size_type size() const override { return dview_m.extent(0); } - + size_type packedSize(const size_type count) const override { return count * sizeof(value_type); } - + void resize(size_type n) { Kokkos::resize(dview_m, n); } - + void realloc(size_type n) { Kokkos::realloc(dview_m, n); } void print() { diff --git a/src/Particle/ParticleAttribBase.h b/src/Particle/ParticleAttribBase.h index b0e973e79..6172751f7 100644 --- a/src/Particle/ParticleAttribBase.h +++ b/src/Particle/ParticleAttribBase.h @@ -35,9 +35,10 @@ namespace ippl { }; public: - using hash_type = ippl::detail::hash_type; using memory_space = MemorySpace; using execution_space = typename memory_space::execution_space; + using hash_type = detail::hash_type; + using archive_type = comms::archive_buffer; template using with_properties = typename WithMemSpace::type; @@ -66,9 +67,9 @@ namespace ippl { virtual void unpack(size_type) = 0; - virtual void serialize(Archive& ar, size_type nsends) = 0; + virtual void serialize(archive_type& ar, size_type nsends) = 0; - virtual void deserialize(Archive& ar, size_type nrecvs) = 0; + virtual void deserialize(archive_type& ar, size_type nrecvs) = 0; virtual size_type size() const = 0; @@ -78,7 +79,7 @@ namespace ippl { size_type getParticleCount() const { return *localNum_mp; } virtual void applyPermutation(const hash_type&) = 0; - virtual void internalCopy(const hash_type&) = 0; + virtual void internalCopy(const hash_type&) = 0; protected: const size_type* localNum_mp; diff --git a/unit_tests/Communicate/BufferHandler.cpp b/unit_tests/Communicate/BufferHandler.cpp index eef387da9..15b1b6bf2 100644 --- a/unit_tests/Communicate/BufferHandler.cpp +++ b/unit_tests/Communicate/BufferHandler.cpp @@ -14,10 +14,10 @@ class TypedBufferHandlerTest : public ::testing::Test { protected: using memory_space = MemorySpace; - class TestableBufferHandler : public ippl::DefaultBufferHandler { + class TestableBufferHandler : public ippl::comms::DefaultBufferHandler { public: - using ippl::DefaultBufferHandler::deleteAllBuffers; - using ippl::DefaultBufferHandler::freeAllBuffers; + using ippl::comms::DefaultBufferHandler::deleteAllBuffers; + using ippl::comms::DefaultBufferHandler::freeAllBuffers; size_t usedBuffersSize() const { return this->used_buffers.size(); } @@ -109,9 +109,9 @@ TYPED_TEST(TypedBufferHandlerTest, GetBuffer_ExactSizeMatch) { // Test: Freeing a buffer that does not exist in the used pool has no effect TYPED_TEST(TypedBufferHandlerTest, FreeNonExistentBuffer) { - auto buffer = this->handler->getBuffer(100, 1.0); - auto newBuffer = - std::make_shared>(200); + using archive_type = ippl::comms::archive_buffer; + auto buffer = this->handler->getBuffer(100, 1.0); + auto newBuffer = std::make_shared(200); this->handler->freeBuffer(newBuffer); EXPECT_EQ(this->handler->usedBuffersSize(), 1); @@ -182,7 +182,8 @@ TYPED_TEST(TypedBufferHandlerTest, GetAllocatedAndFreeSize_AfterDeleteAllBuffers EXPECT_EQ(this->handler->getFreeSize(), 0); } -// Test: Buffer size is correctly accounted for if a free buffer is available but we request a larger one, thus reallocating this one +// Test: Buffer size is correctly accounted for if a free buffer is available but we request a +// larger one, thus reallocating this one TYPED_TEST(TypedBufferHandlerTest, GetAllocatedAndFreeSize_ResizeBufferLargerThanAvailable) { auto smallBuffer = this->handler->getBuffer(50, 1.0); this->handler->freeBuffer(smallBuffer); diff --git a/unit_tests/Communicate/LoggingBufferHandler.cpp b/unit_tests/Communicate/LoggingBufferHandler.cpp index a0db38366..8a1d9f38e 100644 --- a/unit_tests/Communicate/LoggingBufferHandler.cpp +++ b/unit_tests/Communicate/LoggingBufferHandler.cpp @@ -12,14 +12,14 @@ class TypedLoggingBufferHandlerTest : public ::testing::Test { protected: void SetUp() override { rank = 0; - this->bufferHandler = std::make_shared>(); + this->bufferHandler = std::make_shared>(); this->loggingHandler = - std::make_shared>(bufferHandler, rank); + std::make_shared>(bufferHandler, rank); } int rank; - std::shared_ptr> bufferHandler; - std::shared_ptr> loggingHandler; + std::shared_ptr> bufferHandler; + std::shared_ptr> loggingHandler; }; TYPED_TEST_SUITE(TypedLoggingBufferHandlerTest, MemorySpaces); From 17505434db0ea971dfae108a21d1d3b37fc76bc2 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Thu, 11 Dec 2025 01:51:03 +0100 Subject: [PATCH 11/13] Put back freeAllBuffers in particle update --- src/Particle/ParticleSpatialLayout.hpp | 210 ++++++++++++------------- 1 file changed, 101 insertions(+), 109 deletions(-) diff --git a/src/Particle/ParticleSpatialLayout.hpp b/src/Particle/ParticleSpatialLayout.hpp index 00f256bc3..44ae03fa7 100644 --- a/src/Particle/ParticleSpatialLayout.hpp +++ b/src/Particle/ParticleSpatialLayout.hpp @@ -27,7 +27,6 @@ #include "Communicate/Window.h" - namespace ippl { /*! @@ -38,7 +37,7 @@ namespace ippl { */ struct increment_type { size_t count[2]; - + KOKKOS_FUNCTION void init() { count[0] = 0; count[1] = 0; @@ -72,14 +71,13 @@ namespace ippl { template void ParticleSpatialLayout::updateLayout(FieldLayout& fl, Mesh& mesh) { - //flayout_m = fl; + // flayout_m = fl; rlayout_m.changeDomain(fl, mesh); } template template void ParticleSpatialLayout::update(ParticleContainer& pc) { - /* Apply Boundary Conditions */ static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC"); IpplTimings::startTimer(ParticleBCTimer); @@ -89,7 +87,7 @@ namespace ippl { /* Update Timer for the rest of the function */ static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("updateParticle"); IpplTimings::startTimer(ParticleUpdateTimer); - + int nRanks = Comm->size(); if (nRanks < 2) { return; @@ -103,7 +101,7 @@ namespace ippl { */ // 1. figure out which particles need to go where -> locateParticles(...) ============= // - + static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles"); IpplTimings::startTimer(locateTimer); @@ -112,40 +110,38 @@ namespace ippl { /* The indices correspond to the indices of the local particles, * the values correspond to the ranks to which the particles need to be sent - */ + */ locate_type particleRanks("particles' MPI ranks", localnum); /* The indices are the indices of the particles, - * the boolean values describe whether the particle has left the current rank + * the boolean values describe whether the particle has left the current rank * 0 --> particle valid (inside current rank) * 1 --> particle invalid (left rank) */ bool_type invalidParticles("validity of particles", localnum); /* The indices are the MPI ranks, - * the values are the number of particles are sent to that rank from myrank + * the values are the number of particles are sent to that rank from myrank */ locate_type rankSendCount_dview("rankSendCount Device", nRanks); - /* The indices have no particluar meaning, + /* The indices have no particluar meaning, * the values are the MPI ranks to which we need to send */ locate_type destinationRanks_dview("destinationRanks Device", nRanks); /* nInvalid is the number of invalid particles * nDestinationRanks is the number of MPI ranks we need to send to - */ - auto [nInvalid, nDestinationRanks] = - locateParticles(pc, - particleRanks, invalidParticles, - rankSendCount_dview, destinationRanks_dview); + */ + auto [nInvalid, nDestinationRanks] = locateParticles( + pc, particleRanks, invalidParticles, rankSendCount_dview, destinationRanks_dview); /* Host space copy of rankSendCount_dview */ - auto rankSendCount_hview = + auto rankSendCount_hview = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rankSendCount_dview); - + /* Host Space copy of destinationRanks_dview */ - auto destinationRanks_hview = + auto destinationRanks_hview = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), destinationRanks_dview); IpplTimings::stopTimer(locateTimer); @@ -153,18 +149,18 @@ namespace ippl { // 2. fill send buffer and send particles =============================================== // // 2.1 Remote Memory Access window for one-sided communication - + static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("sendPreprocess"); IpplTimings::startTimer(preprocTimer); - - std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); + + std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); window_m.fence(0); - - // Prepare RMA window for the ranks we need to send to - for(size_t ridx=0; ridx < nDestinationRanks; ridx++){ + + // Prepare RMA window for the ranks we need to send to + for (size_t ridx = 0; ridx < nDestinationRanks; ridx++) { int rank = destinationRanks_hview[ridx]; - if (rank == Comm->rank()){ + if (rank == Comm->rank()) { // we do not need to send to ourselves continue; } @@ -175,30 +171,29 @@ namespace ippl { IpplTimings::stopTimer(preprocTimer); - // 2.2 Particle Sends + // 2.2 Particle Sends static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("particleSend"); IpplTimings::startTimer(sendTimer); - + std::vector requests(0); int tag = Comm->next_tag(mpi::tag::P_SPATIAL_LAYOUT, mpi::tag::P_LAYOUT_CYCLE); - for(size_t ridx=0; ridx < nDestinationRanks; ridx++){ + for (size_t ridx = 0; ridx < nDestinationRanks; ridx++) { int rank = destinationRanks_hview[ridx]; - if(rank == Comm->rank()){ - continue; - } + if (rank == Comm->rank()) { + continue; + } hash_type hash("hash", rankSendCount_hview(rank)); fillHash(rank, particleRanks, hash); pc.sendToRank(rank, tag, requests, hash); } - - IpplTimings::stopTimer(sendTimer); + IpplTimings::stopTimer(sendTimer); // 3. Internal destruction of invalid particles ======================================= // - + static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("particleDestroy"); IpplTimings::startTimer(destroyTimer); @@ -206,9 +201,9 @@ namespace ippl { Kokkos::fence(); IpplTimings::stopTimer(destroyTimer); - - // 4. Receive Particles ================================================================ // - + + // 4. Receive Particles ================================================================ // + static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("particleRecv"); IpplTimings::startTimer(recvTimer); @@ -219,13 +214,13 @@ namespace ippl { } IpplTimings::stopTimer(recvTimer); - IpplTimings::startTimer(sendTimer); if (requests.size() > 0) { MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } IpplTimings::stopTimer(sendTimer); + Comm->freeAllBuffers(); IpplTimings::stopTimer(ParticleUpdateTimer); } @@ -238,9 +233,9 @@ namespace ippl { return ((pos[Idx] > region[Idx].min()) && ...) && ((pos[Idx] <= region[Idx].max()) && ...); }; - - /* Helper function that evaluates the total number of neighbors for the current rank in Dim dimensions. - */ + /* Helper function that evaluates the total number of neighbors for the current rank in Dim + * dimensions. + */ template detail::size_type ParticleSpatialLayout::getNeighborSize( const neighbor_list& neighbors) const { @@ -253,29 +248,29 @@ namespace ippl { return totalSize; } - /** - * @brief This function determines to which rank particles need to be sent after the iteration step. - * It starts by first scanning direct rank neighbors, and only does a global scan if there are still - * unfound particles. It then calculates how many particles need to be sent to each rank and how many - * ranks are sent to in total. + * @brief This function determines to which rank particles need to be sent after the iteration + * step. It starts by first scanning direct rank neighbors, and only does a global scan if there + * are still unfound particles. It then calculates how many particles need to be sent to each + * rank and how many ranks are sent to in total. * * @param pc Particle Container - * @param ranks A vector the length of the number of particles on the current rank, where each value refers - * to the new rank of the particle - * @param invalid A vector marking the particles that need to be sent away, and thus locally deleted - * @param nSends_dview Device view the length of number of ranks, where each value determines the number - * of particles sent to that rank from the current rank + * @param ranks A vector the length of the number of particles on the current rank, where + * each value refers to the new rank of the particle + * @param invalid A vector marking the particles that need to be sent away, and thus + * locally deleted + * @param nSends_dview Device view the length of number of ranks, where each value determines + * the number of particles sent to that rank from the current rank * @param sends_dview Device view for the number of ranks that are sent to from current rank * * @return tuple with the number of particles sent away and the number of ranks sent to */ template template - std::pair ParticleSpatialLayout::locateParticles( - const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, + std::pair + ParticleSpatialLayout::locateParticles( + const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, locate_type& nSends_dview, locate_type& sends_dview) const { - auto positions = pc.R.getView(); region_view_type Regions = rlayout_m.getdLocalRegions(); @@ -291,9 +286,9 @@ namespace ippl { locate_type outsideIds("Particles outside of neighborhood", size_type(pc.getLocalNum())); /// outsideCount: Tracks the number of particles that travelled outside of the neighborhood. - size_type outsideCount = 0; + size_type outsideCount = 0; /// invalidCount: Tracks the number of particles that need to be sent to other ranks. - size_type invalidCount = 0; + size_type invalidCount = 0; /// neighborSize: Size of a neighborhood in D dimentions. const size_type neighborSize = getNeighborSize(neighbors); @@ -301,11 +296,11 @@ namespace ippl { /// neighbors_view: Kokkos view with the IDs of the neighboring MPI ranks. locate_type neighbors_view("Nearest neighbors IDs", neighborSize); - /* red_val: Used to reduce both the number of invalid particles and the number of particles - * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction values, so we - * use the helper class increment_type). First element updates InvalidCount, second - * one updates outsideCount. - */ + /* red_val: Used to reduce both the number of invalid particles and the number of particles + * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction + * values, so we use the helper class increment_type). First element updates InvalidCount, + * second one updates outsideCount. + */ increment_type red_val; red_val.init(); @@ -317,7 +312,7 @@ namespace ippl { for (const auto& componentNeighbors : neighbors) { for (size_t j = 0; j < componentNeighbors.size(); ++j) { neighbors_mirror(k) = componentNeighbors[j]; - //std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl; + // std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl; k++; } } @@ -338,21 +333,21 @@ namespace ippl { Kokkos::RangePolicy(0, ranks.extent(0)), KOKKOS_LAMBDA(const size_type i, increment_type& val, const bool final) { /* Step 1 - * inCurr: True if the particle hasn't left the current MPI rank. - * inNeighbor: True if the particle is found in a neighboring rank. - * found: True either if inCurr = True or inNeighbor = True. - * increment: Helper variable to update red_val. - */ - bool inCurr = false; + * inCurr: True if the particle hasn't left the current MPI rank. + * inNeighbor: True if the particle is found in a neighboring rank. + * found: True either if inCurr = True or inNeighbor = True. + * increment: Helper variable to update red_val. + */ + bool inCurr = false; bool inNeighbor = false; - bool found = false; + bool found = false; bool increment[2]; - inCurr = positionInRegion(is, positions(i), Regions(myRank)); + inCurr = positionInRegion(is, positions(i), Regions(myRank)); - ranks(i) = inCurr * myRank; - invalid(i) = !inCurr; - found = inCurr || found; + ranks(i) = inCurr * myRank; + invalid(i) = !inCurr; + found = inCurr || found; /// Step 2 for (size_t j = 0; j < neighbors_view.extent(0); ++j) { @@ -360,34 +355,34 @@ namespace ippl { inNeighbor = positionInRegion(is, positions(i), Regions(rank)); - ranks(i) = !(inNeighbor) * ranks(i) + inNeighbor * rank; - found = inNeighbor || found; + ranks(i) = !(inNeighbor)*ranks(i) + inNeighbor * rank; + found = inNeighbor || found; } /// Step 3 - /* isOut: When the last thread has finished the search, checks whether the particle has been found - * either in the current rank or in a neighboring one. - * Used to avoid race conditions when updating outsideIds. + /* isOut: When the last thread has finished the search, checks whether the particle + * has been found either in the current rank or in a neighboring one. Used to avoid + * race conditions when updating outsideIds. */ - if(final && !found) { + if (final && !found) { outsideIds(val.count[1]) = i; } - //outsideIds(val.count[1]) = i * isOut; + // outsideIds(val.count[1]) = i * isOut; increment[0] = invalid(i); increment[1] = !found; val += increment; - }, red_val); Kokkos::fence(); - invalidCount = red_val.count[0]; - outsideCount = red_val.count[1]; + invalidCount = red_val.count[0]; + outsideCount = red_val.count[1]; IpplTimings::stopTimer(neighborSearch); - /// Step 4 - static IpplTimings::TimerRef nonNeighboringParticles = IpplTimings::getTimer("nonNeighboringParticles"); + /// Step 4 + static IpplTimings::TimerRef nonNeighboringParticles = + IpplTimings::getTimer("nonNeighboringParticles"); IpplTimings::startTimer(nonNeighboringParticles); if (outsideCount > 0) { Kokkos::parallel_for( @@ -396,37 +391,35 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i, const size_type j) { /// pID: (local) ID of the particle that is currently being searched. size_type pId = outsideIds(i); - + /// inRegion: Checks whether particle pID is inside region j. bool inRegion = positionInRegion(is, positions(pId), Regions(j)); - if(inRegion){ + if (inRegion) { ranks(pId) = j; - } + } }); Kokkos::fence(); } IpplTimings::stopTimer(nonNeighboringParticles); - - Kokkos::parallel_for("Calculate nSends", - Kokkos::RangePolicy(0, ranks.extent(0)), - KOKKOS_LAMBDA(const size_t i){ - size_type rank = ranks(i); - Kokkos::atomic_fetch_add(&nSends_dview(rank),1); - } - ); - - // Number of Ranks we need to send to + + Kokkos::parallel_for( + "Calculate nSends", Kokkos::RangePolicy(0, ranks.extent(0)), + KOKKOS_LAMBDA(const size_t i) { + size_type rank = ranks(i); + Kokkos::atomic_fetch_add(&nSends_dview(rank), 1); + }); + + // Number of Ranks we need to send to Kokkos::View rankSends("Number of Ranks we need to send to"); - - Kokkos::parallel_for("Calculate sends", - Kokkos::RangePolicy(0, nSends_dview.extent(0)), - KOKKOS_LAMBDA(const size_t rank){ - if(nSends_dview(rank) != 0){ - size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1); - sends_dview(index) = rank; - } - } - ); + + Kokkos::parallel_for( + "Calculate sends", Kokkos::RangePolicy(0, nSends_dview.extent(0)), + KOKKOS_LAMBDA(const size_t rank) { + if (nSends_dview(rank) != 0) { + size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1); + sends_dview(index) = rank; + } + }); size_type temp; Kokkos::deep_copy(temp, rankSends); @@ -454,7 +447,6 @@ namespace ippl { } }); Kokkos::fence(); - } template From b1c0ff8cabaa6c281200cfa51134618655a707b4 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Wed, 10 Dec 2025 14:48:11 +0100 Subject: [PATCH 12/13] Remove KOKKOS_CLASS_LAMBDA from serialization as it triggers archive copies Always reset read/write pos on buffer when getBuffer is called --- src/Communicate/Archive.h | 6 +- src/Communicate/Archive.hpp | 101 +++++++++++++++++------------- src/Communicate/BufferHandler.h | 2 +- src/Communicate/BufferHandler.hpp | 3 + src/Communicate/Buffers.hpp | 4 +- src/Communicate/Communicator.h | 18 ++++-- 6 files changed, 83 insertions(+), 51 deletions(-) diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h index 93f99f2e0..aed062db5 100644 --- a/src/Communicate/Archive.h +++ b/src/Communicate/Archive.h @@ -73,7 +73,7 @@ namespace ippl { /*! * @returns a pointer to the data of the buffer */ - pointer_type getBuffer() { return buffer_m.data(); } + pointer_type getData() { return buffer_m.data(); } /*! * @returns the size of the buffer @@ -88,6 +88,10 @@ namespace ippl { Kokkos::realloc(buffer_m, size); /*buffer_m.reallocBuffer(size);*/ } + void resetReadWritePos() { + readpos_m = 0; + writepos_m = 0; + } void resetWritePos() { writepos_m = 0; } void resetReadPos() { readpos_m = 0; } diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp index d86d7ee8d..3737e98ca 100644 --- a/src/Communicate/Archive.hpp +++ b/src/Communicate/Archive.hpp @@ -4,6 +4,8 @@ // #include +#include "Utility/Logging.h" + #include "Archive.h" namespace ippl { @@ -15,88 +17,103 @@ namespace ippl { , readpos_m(0) , buffer_m("buffer", size) {} + // ----------------------------------- + // Scalar serialize template template void Archive::serialize(const Kokkos::View& view, size_type nsends) { - using exec_space = typename Kokkos::View::execution_space; - using policy_type = Kokkos::RangePolicy; - - size_t size = sizeof(T); - Kokkos::parallel_for( - "Archive::serialize()", policy_type(0, nsends), - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(buffer_m.data() + i * size + writepos_m, view.data() + i, size); - }); + constexpr size_t size = sizeof(T); + char* dst_ptr = (char*)(buffer_m.data()) + writepos_m; + char* src_ptr = (char*)(view.data()); + assert(writepos_m + (nsends * size) <= buffer_m.size()); + // construct temp views of the src/dst buffers of the correct size (bytes) + Kokkos::View src_view(src_ptr, size * nsends); + Kokkos::View dst_view(dst_ptr, size * nsends); + Kokkos::deep_copy(dst_view, src_view); Kokkos::fence(); - writepos_m += size * nsends; + SPDLOG_TRACE("Incrementing writepos: {}, from {}, to {}", (void*)dst_view.data(), + writepos_m, writepos_m + (nsends * size)); + writepos_m += (nsends * size); } + // ----------------------------------- + // Vector serialize template template void Archive::serialize(const Kokkos::View*, ViewArgs...>& view, size_type nsends) { - using exec_space = typename Kokkos::View::execution_space; - - size_t size = sizeof(T); - // Default index type for range policies is int64, + constexpr size_t size = sizeof(T); + char* dst_ptr = (char*)(buffer_m.data()); + ippl::Vector* src_ptr = view.data(); + auto wp = writepos_m; + // The Kokkos range policies expect int64 // so we have to explicitly specify size_type (uint64) + using exec_space = typename Kokkos::View::execution_space; using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>; Kokkos::parallel_for( - "Archive::serialize()", - // The constructor for Kokkos range policies always - // expects int64 regardless of index type provided - // by template parameters, so the typecast is necessary - // to avoid compiler warnings - mdrange_t({0, 0}, {(long int)nsends, Dim}), - KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { - std::memcpy(buffer_m.data() + (Dim * i + d) * size + writepos_m, - &(*(view.data() + i))[d], size); + "Archive::serialize()", mdrange_t({0, 0}, {(long int)nsends, Dim}), + KOKKOS_LAMBDA(const size_type i, const size_t d) { + std::memcpy(dst_ptr + (Dim * i + d) * size + wp, &(*(src_ptr + i))[d], size); }); + Kokkos::fence(); writepos_m += Dim * size * nsends; } + // ----------------------------------- + // Scalar Deserialize template template void Archive::deserialize(Kokkos::View& view, size_type nrecvs) { - using exec_space = typename Kokkos::View::execution_space; - using policy_type = Kokkos::RangePolicy; - - size_t size = sizeof(T); + // if we have to enlarge the destination view if (nrecvs > view.extent(0)) { + SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(), + view.extent(0), nrecvs); Kokkos::realloc(view, nrecvs); } - Kokkos::parallel_for( - "Archive::deserialize()", policy_type(0, nrecvs), - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(view.data() + i, buffer_m.data() + i * size + readpos_m, size); - }); - // Wait for deserialization kernel to complete - // (as with serialization kernels) + // + constexpr size_t size = sizeof(T); + char* src_ptr = (char*)(buffer_m.data()) + readpos_m; + char* dst_ptr = (char*)(view.data()); + assert(readpos_m + (nrecvs * size) <= buffer_m.size()); + // construct temp views of the src/dst buffers of the correct size (bytes) + Kokkos::View src_view(src_ptr, size * nrecvs); + Kokkos::View dst_view(dst_ptr, size * nrecvs); + Kokkos::deep_copy(dst_view, src_view); Kokkos::fence(); - readpos_m += size * nrecvs; + SPDLOG_TRACE("Incrementing readpos: {}, from {}, to {}", (void*)buffer_m.data(), + readpos_m, readpos_m + (nrecvs * size)); + readpos_m += (nrecvs * size); } + // ----------------------------------- + // Vecto Deserialize template template void Archive::deserialize(Kokkos::View*, ViewArgs...>& view, - size_type nrecvs) { - using exec_space = typename Kokkos::View::execution_space; - - size_t size = sizeof(T); + size_type nrecvs) // + { + // if we have to enlarge the destination view if (nrecvs > view.extent(0)) { + SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(), + view.extent(0), nrecvs); Kokkos::realloc(view, nrecvs); } + // + constexpr size_t size = sizeof(T); + char* src_ptr = (char*)(buffer_m.data()) + readpos_m; + ippl::Vector* dst_ptr = view.data(); + auto rp = readpos_m; + using exec_space = typename Kokkos::View::execution_space; using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>; Kokkos::parallel_for( "Archive::deserialize()", mdrange_t({0, 0}, {(long int)nrecvs, Dim}), - KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { - std::memcpy(&(*(view.data() + i))[d], - buffer_m.data() + (Dim * i + d) * size + readpos_m, size); + KOKKOS_LAMBDA(const size_type i, const size_t d) { + std::memcpy(&(*(dst_ptr + i))[d], src_ptr + (Dim * i + d) * size + rp, size); }); Kokkos::fence(); readpos_m += Dim * size * nrecvs; diff --git a/src/Communicate/BufferHandler.h b/src/Communicate/BufferHandler.h index 6ad36ff61..a18558d9a 100644 --- a/src/Communicate/BufferHandler.h +++ b/src/Communicate/BufferHandler.h @@ -7,8 +7,8 @@ #include "Types/IpplTypes.h" #include "Types/ViewTypes.h" -#include "Utility/TypeUtils.h" #include "Utility/Logging.h" +#include "Utility/TypeUtils.h" #include "Communicate/Archive.h" diff --git a/src/Communicate/BufferHandler.hpp b/src/Communicate/BufferHandler.hpp index 6a585579d..f654dea7f 100644 --- a/src/Communicate/BufferHandler.hpp +++ b/src/Communicate/BufferHandler.hpp @@ -112,6 +112,7 @@ namespace ippl::comms { freeSize_m -= buffer->getBufferSize(); usedSize_m += buffer->getBufferSize(); + buffer->resetReadWritePos(); free_buffers.erase(buffer); used_buffers.insert(buffer); return buffer; @@ -128,6 +129,7 @@ namespace ippl::comms { free_buffers.erase(buffer); buffer->reallocBuffer(requiredSize); + buffer->resetReadWritePos(); used_buffers.insert(buffer); return buffer; @@ -140,6 +142,7 @@ namespace ippl::comms { usedSize_m += newBuffer->getBufferSize(); used_buffers.insert(newBuffer); + newBuffer->resetReadWritePos(); return newBuffer; } diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp index 9732f9619..90b477af3 100644 --- a/src/Communicate/Buffers.hpp +++ b/src/Communicate/Buffers.hpp @@ -37,7 +37,7 @@ namespace ippl { auto b = buffer_handler.getBuffer(size * sizeof(T), std::max(overallocation, defaultOveralloc_m)); SPDLOG_TRACE("{}, getBuffer {}, buf, {}, size {}", (void*)this, - ippl::debug::print_type(), (void*)(b->getBuffer()), + ippl::debug::print_type(), (void*)(b->getData()), size * sizeof(T)); return b; } @@ -46,7 +46,7 @@ namespace ippl { void Communicator::freeBuffer(Communicator::buffer_type buffer) { using memory_space = BufferType::memory_space; auto& buffer_handler = buffer_handlers_m->get(); - SPDLOG_TRACE("freeBuffer buf, {}", (void*)(buffer->getBuffer())); + SPDLOG_TRACE("freeBuffer buf, {}", (void*)(buffer->getData())); buffer_handler.freeBuffer(buffer); } diff --git a/src/Communicate/Communicator.h b/src/Communicate/Communicator.h index 6dc1ae506..114b0b4a8 100644 --- a/src/Communicate/Communicator.h +++ b/src/Communicate/Communicator.h @@ -161,20 +161,25 @@ namespace ippl::mpi { this->abort(); } MPI_Status status; - MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &status); - + MPI_Recv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &status); + SPDLOG_DEBUG("Recv buf {}, size {:04}, src {:02}, tag {:04}", (void*)(ar.getData()), + msize, src, tag); buffer.deserialize(ar, nrecvs); } template void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request, - size_type nsends) { + size_type nsends) // + { if (ar.getSize() > INT_MAX) { std::cerr << "Message size exceeds range of int" << std::endl; this->abort(); } buffer.serialize(ar, nsends); - MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + MPI_Isend(ar.getData(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + SPDLOG_DEBUG("Isend buf {}, size {:04}, dst {:02}, tag {:04}, req {}", + (void*)(ar.getData()), ar.getSize(), dest, tag, + static_cast(request)); } template @@ -183,7 +188,10 @@ namespace ippl::mpi { std::cerr << "Message size exceeds range of int" << std::endl; this->abort(); } - MPI_Irecv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &request); + + MPI_Irecv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &request); + SPDLOG_DEBUG("Irecv buf {}, size {:04}, src {:02}, tag {:04}, req {}", + (void*)(ar.getData()), msize, src, tag, static_cast(request)); } void printLogs(const std::string& filename); From 99cfcc07576e231c99ff02f319523a8b059ae058 Mon Sep 17 00:00:00 2001 From: John Biddiscombe Date: Mon, 8 Dec 2025 23:55:26 +0100 Subject: [PATCH 13/13] Replace communication buffer internals with aligned user managed GPU memory Kokkos::views are aligned to 64 byte boundaries, but communications can benefit from stronger alignment. Support user defined (compile time) alignment of higher order and create kokkos views inside manually allocated GPU memory Add CUDA/HIP specializations for buffer alignment --- CMakeLists.txt | 2 + src/Communicate/AlignedBuffer.h | 168 +++++++++++++++++++++++++++++++ src/Communicate/Archive.h | 8 +- src/Communicate/Archive.hpp | 2 +- src/Communicate/BufferHandler.h | 23 +++-- src/Communicate/CMakeLists.txt | 4 + src/Communicate/Communicator.cpp | 2 + 7 files changed, 194 insertions(+), 15 deletions(-) create mode 100644 src/Communicate/AlignedBuffer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a7d1a73c..164b7d18d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,8 @@ option(IPPL_ENABLE_TESTS "Build integration tests in test/ directory" OFF) option(IPPL_ENABLE_COVERAGE "Enable code coverage" OFF) option(IPPL_ENABLE_NSYS_PROFILER "Enable Nvidia Nsys Profiler" OFF) option(IPPL_ENABLE_SANITIZER "Enable sanitizer(s)" OFF) +option(IPPL_ENABLE_ALIGNED_COMMUNICATION_BUFFERS "Enable aligned memory buffersvfor communication" + OFF) option(IPPL_USE_ALTERNATIVE_VARIANT "Use modified variant implementation (required for CUDA 12.2 + GCC 12.3.0)" OFF) option(IPPL_USE_STANDARD_FOLDERS "Put all generated binaries in bin/lib folders" OFF) diff --git a/src/Communicate/AlignedBuffer.h b/src/Communicate/AlignedBuffer.h new file mode 100644 index 000000000..95e0cb74c --- /dev/null +++ b/src/Communicate/AlignedBuffer.h @@ -0,0 +1,168 @@ +#ifndef IPPL_ALIGNED_BUFFER_H +#define IPPL_ALIGNED_BUFFER_H + +#include "Types/ViewTypes.h" + +#include "Utility/Logging.h" +#include "Utility/TypeUtils.h" +// +#include "Communicate/Archive.h" + +namespace ippl::comms { + +#define DEFAULT_BUFFER_ALIGNMENT 1024 + // Here's a simple class that provides an aligned buffer, by default on the host + // but we can specialize the constructor/destructor for other memory spaces + template + struct AlignedBuffer { + using memory_space = MemorySpace; + void* ptrOriginal{nullptr}; + void* ptrAligned{nullptr}; + detail::size_type space{0}; + // + AlignedBuffer() {} + // + AlignedBuffer& operator=(AlignedBuffer&& other) { + ptrOriginal = other.ptrOriginal; + ptrAligned = other.ptrAligned; + space = other.space; + other.ptrOriginal = nullptr; + other.ptrAligned = nullptr; + other.space = 0; + return *this; + } + // + AlignedBuffer(std::size_t size) { + ptrOriginal = std::aligned_alloc(DEFAULT_BUFFER_ALIGNMENT, size); + ptrAligned = ptrOriginal; + space = size; + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + ~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying host buffer {}", ptrOriginal); + std::free(ptrOriginal); + } + } + }; + + // --------------------------------------------------------------------- +#if defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_CUDA) + // make number a multiple of the alignment + inline std::int64_t to_multiple(std::int64_t num) { + return ((2 * num + (DEFAULT_BUFFER_ALIGNMENT - 1)) & (-DEFAULT_BUFFER_ALIGNMENT)); + } +#endif + + // --------------------------------------------------------------------- +#ifdef KOKKOS_ENABLE_CUDA + // Specialize buffer allocation/free for cuda + template <> + inline AlignedBuffer::AlignedBuffer(std::size_t size) { + void* original; + space = to_multiple(size); + cudaMalloc(&original, space); + if (!original) { + throw std::runtime_error("Error allocating cuda memory in AlignedBuffer"); + } + ptrOriginal = original; + ptrAligned = std::align(DEFAULT_BUFFER_ALIGNMENT, size, original, space); + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + template <> + inline AlignedBuffer::~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying cuda buffer {}", ptrOriginal); + cudaFree(ptrOriginal); + } + } +#endif + + // --------------------------------------------------------------------- +#ifdef KOKKOS_ENABLE_HIP +#define HIP_CHECK(expression) \ + { \ + const hipError_t status = expression; \ + if (status != hipSuccess) { \ + std::cerr << "HIP error " << status << ": " << hipGetErrorString(status) << " at " \ + << __FILE__ << ":" << __LINE__ << std::endl; \ + } \ + } + + // Specialize buffer allocation/free for HIP + template <> + inline AlignedBuffer::AlignedBuffer(std::size_t size) { + void* original; + space = to_multiple(size); + HIP_CHECK(hipMalloc(&original, space)); + if (!original) { + throw std::runtime_error("Error allocating HIP memory in AlignedBuffer"); + } + ptrOriginal = original; + ptrAligned = std::align(DEFAULT_BUFFER_ALIGNMENT, size, original, space); + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + template <> + inline AlignedBuffer::~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying HIP buffer {}", ptrOriginal); + HIP_CHECK(hipFree(ptrOriginal)); + } + } +#endif + + // --------------------------------------------------------------------- + template + struct aligned_storage_wrapper { + // + using memory_space = MemorySpace; + using buffer_type = + ippl::detail::ViewType>::view_type; + using pointer_type = typename buffer_type::pointer_type; + using size_type = detail::size_type; + // + aligned_storage_wrapper(const std::string& /*name*/, size_type size) + : view() // we will construct the view manually + , buffer(size) // + { + SPDLOG_TRACE("Construct: view origin {}, aligned {}", (void*)(view.data()), + (void*)(buffer.ptrAligned)); + view = buffer_type((pointer_type)buffer.ptrAligned, size); + assert(view.data() == buffer.ptrAligned); + } + // + size_type size() const { return buffer.space; } + // + pointer_type data() { return view.data(); } + + // Note that this makes no effort to preserve any existing data + void reallocBuffer(size_type newsize) { + // wipe the old memory, before allocating new, (help prevent out-of-space errors) + buffer = AlignedBuffer(); + // allocate new + buffer = AlignedBuffer(newsize); + view = buffer_type((pointer_type)buffer.ptrAligned, newsize); + SPDLOG_DEBUG("Realloc : view {}, aligned {}, size {}, space {}", (void*)(view.data()), + (void*)(buffer.ptrAligned), newsize, buffer.space); + } + // + buffer_type view; + AlignedBuffer buffer; + }; + +} // namespace ippl::comms + +#endif diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h index aed062db5..1f2623dac 100644 --- a/src/Communicate/Archive.h +++ b/src/Communicate/Archive.h @@ -82,10 +82,12 @@ namespace ippl { size_type getBufferSize() const { return buffer_m.size(); } - void resizeBuffer(size_type size) { Kokkos::resize(buffer_m, size); } - void reallocBuffer(size_type size) { - Kokkos::realloc(buffer_m, size); /*buffer_m.reallocBuffer(size);*/ +#ifdef IPPL_ALIGNED_COMMS_BUFFERS + buffer_m.reallocBuffer(size); +#else + Kokkos::realloc(buffer_m, size); +#endif } void resetReadWritePos() { diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp index 3737e98ca..1135c8157 100644 --- a/src/Communicate/Archive.hpp +++ b/src/Communicate/Archive.hpp @@ -104,7 +104,7 @@ namespace ippl { } // constexpr size_t size = sizeof(T); - char* src_ptr = (char*)(buffer_m.data()) + readpos_m; + char* src_ptr = (char*)(buffer_m.data()); ippl::Vector* dst_ptr = view.data(); auto rp = readpos_m; using exec_space = typename Kokkos::View::execution_space; diff --git a/src/Communicate/BufferHandler.h b/src/Communicate/BufferHandler.h index a18558d9a..a42a1ac0a 100644 --- a/src/Communicate/BufferHandler.h +++ b/src/Communicate/BufferHandler.h @@ -12,25 +12,26 @@ #include "Communicate/Archive.h" +#ifdef IPPL_ALIGNED_COMMS_BUFFERS +#include "Communicate/AlignedBuffer.h" +#endif + namespace ippl::comms { +#ifdef IPPL_ALIGNED_COMMS_BUFFERS + // alignment provided by AlignedBuffer wrapper + template + using archive_buffer = detail::Archive>; +#else + // default kokkos alignment template using communicator_storage = ippl::detail::ViewType>::view_type; - // --------------------------------------------- - // archive wrapper around some arbitrary buffer - template - struct rma_archive { - using type = detail::Archive; - }; - - template - using rma_archive_type = rma_archive::type; - template - using archive_buffer = rma_archive_type>; + using archive_buffer = detail::Archive>; +#endif /** * @brief Interface for memory buffer handling. diff --git a/src/Communicate/CMakeLists.txt b/src/Communicate/CMakeLists.txt index 95e7b4819..31cb70e89 100644 --- a/src/Communicate/CMakeLists.txt +++ b/src/Communicate/CMakeLists.txt @@ -4,5 +4,9 @@ # Adds all Communicate-related sources and headers to the IPPL library. # ----------------------------------------------------------------------------- +if(IPPL_ENABLE_ALIGNED_COMMUNICATION_BUFFERS) + target_compile_definitions(ippl PUBLIC IPPL_ALIGNED_COMMS_BUFFERS) +endif() + target_sources(ippl PRIVATE Communicator.cpp CommunicatorLogging.cpp Environment.cpp Buffers.cpp Request.cpp LogEntry.cpp) diff --git a/src/Communicate/Communicator.cpp b/src/Communicate/Communicator.cpp index 55d601715..81376de62 100644 --- a/src/Communicate/Communicator.cpp +++ b/src/Communicate/Communicator.cpp @@ -48,6 +48,8 @@ namespace ippl::mpi { static std::shared_ptr comm_buff_handler_ptr{nullptr}; if (comm_buff_handler_ptr == nullptr) { comm_buff_handler_ptr = std::make_shared(); + SPDLOG_DEBUG("BufferHandler new: {}", + ippl::debug::print_type()); } return comm_buff_handler_ptr; }