diff --git a/CMakeLists.txt b/CMakeLists.txt
index 737c30386..164b7d18d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,8 @@ option(IPPL_ENABLE_TESTS "Build integration tests in test/ directory" OFF)
option(IPPL_ENABLE_COVERAGE "Enable code coverage" OFF)
option(IPPL_ENABLE_NSYS_PROFILER "Enable Nvidia Nsys Profiler" OFF)
option(IPPL_ENABLE_SANITIZER "Enable sanitizer(s)" OFF)
+option(IPPL_ENABLE_ALIGNED_COMMUNICATION_BUFFERS "Enable aligned memory buffersvfor communication"
+ OFF)
option(IPPL_USE_ALTERNATIVE_VARIANT
"Use modified variant implementation (required for CUDA 12.2 + GCC 12.3.0)" OFF)
option(IPPL_USE_STANDARD_FOLDERS "Put all generated binaries in bin/lib folders" OFF)
@@ -51,6 +53,19 @@ option(IPPL_MARK_FAILING_TESTS
"Prefix names of tests that are known to fail with 'known_fail' for filtering with ctest"
OFF)
option(IPPL_ENABLE_SCRIPTS "Generate job script templates for some benchmarks/tests" OFF)
+option(IPPL_GPU_AWARE_MPI "Allow MPI to/from from device memory buffers" OFF)
+# logging options
+set(IPPL_LOG_LEVEL "off" CACHE STRING "Enable logging for messages of >=level")
+set_property(
+ CACHE IPPL_LOG_LEVEL
+ PROPERTY STRINGS
+ off
+ trace
+ debug
+ info
+ warn
+ error
+ critical)
# "Build IPPL as a shared library (ON) or static library (OFF)" OFF) if(IPPL_DYL)
# set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) message(WARNING "IPPL_DYL is deprecated; use
@@ -99,21 +114,6 @@ if(DEFINED USE_ALTERNATIVE_VARIANT)
set(IPPL_USE_ALTERNATIVE_VARIANT ${USE_ALTERNATIVE_VARIANT} CACHE BOOL "" FORCE)
endif()
-# ------------------------------------------------------------------------------
-# Debug: This tells the compiler to replace occurrences of ${
} with in debug info and error
-# messages.
-# ------------------------------------------------------------------------------
-add_compile_options(
- $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.>
- $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.>)
-
-if(DEFINED FETCHCONTENT_BASE_DIR)
- add_compile_options(
- $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p>
- $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p>
- )
-endif()
-
# ------------------------------------------------------------------------------
# Define sources for project
# ------------------------------------------------------------------------------
diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt
index 4db043d4f..ff9856ba3 100644
--- a/alpine/CMakeLists.txt
+++ b/alpine/CMakeLists.txt
@@ -16,9 +16,13 @@ endfunction()
if(IPPL_ENABLE_TESTS)
# cmake-format: off
+ # Landau will write a CSV file to the data directory that we will validate later
+ make_directory("${PROJECT_BINARY_DIR}/alpine/data")
+ # Add the test
add_ippl_integration_test(LandauDamping
- ARGS 16 16 16 10000000 10 FFT 0.01 LeapFrog --overallocate 2.0 --info 10
- LABELS alpine integration)
+ ARGS "16" "16" "16" "10000000" "25" "FFT" "0.01" "LeapFrog" "--overallocate" "2.0" "--info" "10"
+ LABELS alpine integration
+ WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/alpine/")
# cmake-format: on
else()
add_alpine_example(LandauDamping)
@@ -26,3 +30,5 @@ endif()
add_alpine_example(PenningTrap)
add_alpine_example(BumponTailInstability)
+
+add_subdirectory(validation)
diff --git a/alpine/LandauDamping.cpp b/alpine/LandauDamping.cpp
index 718672513..dc8cadfed 100644
--- a/alpine/LandauDamping.cpp
+++ b/alpine/LandauDamping.cpp
@@ -37,17 +37,22 @@ const char* TestName = "LandauDamping";
#include "Manager/datatypes.h"
#include "Utility/IpplTimings.h"
+#include "Utility/Logging.h"
#include "LandauDampingManager.h"
#include "Manager/PicManager.h"
int main(int argc, char* argv[]) {
+#if defined(SPDLOG_ACTIVE_LEVEL) && (SPDLOG_ACTIVE_LEVEL != SPDLOG_LEVEL_OFF)
+ spdlog::set_pattern("[%^%-8l%$]%t| %v");
+ spdlog::set_level(spdlog::level::trace);
+#endif
ippl::initialize(argc, argv);
{
Inform msg(TestName);
Inform msg2all(TestName, INFORM_ALL_NODES);
- static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total");
+ static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total");
static IpplTimings::TimerRef initializeTimer = IpplTimings::getTimer("initialize");
IpplTimings::startTimer(mainTimer);
IpplTimings::startTimer(initializeTimer);
@@ -82,7 +87,7 @@ int main(int argc, char* argv[]) {
manager.pre_run();
IpplTimings::stopTimer(initializeTimer);
-
+
manager.setTime(0.0);
msg << "Starting iterations ..." << endl;
diff --git a/alpine/validation/CMakeLists.txt b/alpine/validation/CMakeLists.txt
new file mode 100644
index 000000000..5236a9ba0
--- /dev/null
+++ b/alpine/validation/CMakeLists.txt
@@ -0,0 +1,23 @@
+# -----------------------------------------------------------------------------
+# validation check for LandauDamping test
+# -----------------------------------------------------------------------------
+
+if(BUILD_TESTING)
+ # Build the C++ correctness validation test
+ add_executable(LandauDampingCorrectness LandauDampingCorrectness.cpp)
+
+ message("Adding test: LandauDampingCorrectness")
+
+ # command line params are : output, reference, tolerance
+ add_test(
+ NAME LandauDampingCorrectnessValidation
+ COMMAND LandauDampingCorrectness "${PROJECT_BINARY_DIR}/alpine/data/FieldLandau_2_manager.csv"
+ "${PROJECT_SOURCE_DIR}/alpine/validation/FieldLandau_valid_result.csv" "4E-1"
+ WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/alpine/")
+
+ set_tests_properties(
+ LandauDampingCorrectnessValidation
+ PROPERTIES LABELS "validation;alpine" TIMEOUT 30
+ # This test should only run if the LandauDamping integration test passed
+ DEPENDS LandauDamping)
+endif()
diff --git a/alpine/validation/FieldLandau_valid_result.csv b/alpine/validation/FieldLandau_valid_result.csv
new file mode 100644
index 000000000..9eb047189
--- /dev/null
+++ b/alpine/validation/FieldLandau_valid_result.csv
@@ -0,0 +1,27 @@
+time, Ex_field_energy, Ex_max_norm
+0.0000000000000000e+00 9.6742715140721582e+00 1.0825163128256524e-01
+5.0000000000000003e-02 9.6368948973459112e+00 1.0808994906148438e-01
+1.0000000000000001e-01 9.5415075224669970e+00 1.0740779743767520e-01
+1.5000000000000002e-01 9.3882773328014082e+00 1.0630178072727425e-01
+2.0000000000000001e-01 9.1791183371635334e+00 1.0482602738140372e-01
+2.5000000000000000e-01 8.9172134334449193e+00 1.0306560288960222e-01
+2.9999999999999999e-01 8.6061662103776140e+00 1.0113820775231776e-01
+3.4999999999999998e-01 8.2505504653792148e+00 9.8691447720593309e-02
+3.9999999999999997e-01 7.8550417931569703e+00 9.5598706257118735e-02
+4.4999999999999996e-01 7.4249700904468465e+00 9.2872937917417214e-02
+4.9999999999999994e-01 6.9653510910124385e+00 9.0033318617501135e-02
+5.4999999999999993e-01 6.4822476651919327e+00 8.7257335757243495e-02
+5.9999999999999998e-01 5.9816257151911403e+00 8.4644036660629890e-02
+6.5000000000000002e-01 5.4696405485153790e+00 8.2154906259950061e-02
+7.0000000000000007e-01 4.9535573676676501e+00 7.9019365369956629e-02
+7.5000000000000011e-01 4.4392121571874581e+00 7.5190519676643197e-02
+8.0000000000000016e-01 3.9321278953403862e+00 7.1054552970285123e-02
+8.5000000000000020e-01 3.4384135389096344e+00 6.6356647790906387e-02
+9.0000000000000024e-01 2.9638108812800437e+00 6.1694793644303261e-02
+9.5000000000000029e-01 2.5131142881540285e+00 5.7498634682739851e-02
+1.0000000000000002e+00 2.0909400590908556e+00 5.3113907328247389e-02
+1.0500000000000003e+00 1.7017166009804880e+00 4.8965793799131234e-02
+1.1000000000000003e+00 1.3486230947669464e+00 4.4410412852319857e-02
+1.1500000000000004e+00 1.0343505136217537e+00 3.9926242617971765e-02
+1.2000000000000004e+00 7.6105675926529781e-01 3.5819185614705762e-02
+1.2500000000000004e+00 5.3020078549605387e-01 3.1521337179656390e-02
diff --git a/alpine/validation/LandauDampingCorrectness.cpp b/alpine/validation/LandauDampingCorrectness.cpp
new file mode 100644
index 000000000..065c6c477
--- /dev/null
+++ b/alpine/validation/LandauDampingCorrectness.cpp
@@ -0,0 +1,333 @@
+// LandauDamping Correctness Validation Test
+//
+// This test compares the CSV output generated by the LandauDamping simulation
+// with a reference (valid) result file. All numerical values must be within
+// a tolerance of 1E-7 for the test to pass.
+//
+// Usage:
+// ./LandauDampingCorrectness [output_csv] [reference_csv] [tolerance]
+//
+// Where:
+// output_csv : Path to the generated FieldLandau_*.csv file
+// (default: data/FieldLandau_*_manager.csv - finds first match)
+// reference_csv : Path to the reference file
+// (default: ../../alpine/validation/FieldLandau_valid_result.csv)
+// tolerance : Absolute tolerance for comparison (default: 1e-7)
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace fs = std::filesystem;
+
+// Structure to hold a row of CSV data
+struct CSVRow {
+ double time;
+ double Ex_field_energy;
+ double Ex_max_norm;
+
+ CSVRow()
+ : time(0.0)
+ , Ex_field_energy(0.0)
+ , Ex_max_norm(0.0) {}
+
+ CSVRow(double t, double e, double n)
+ : time(t)
+ , Ex_field_energy(e)
+ , Ex_max_norm(n) {}
+};
+
+// Trim whitespace from string
+std::string trim(const std::string& str) {
+ size_t first = str.find_first_not_of(" \t\r\n");
+ if (first == std::string::npos)
+ return "";
+ size_t last = str.find_last_not_of(" \t\r\n");
+ return str.substr(first, (last - first + 1));
+}
+
+// Find first CSV file matching pattern in directory
+std::string findCSVFile(const std::string& pattern) {
+ // If pattern is a file that exists, return it
+ if (fs::exists(pattern) && fs::is_regular_file(pattern)) {
+ return pattern;
+ }
+
+ // Try to find a matching file in data/ directory
+ std::string dataDir = "data";
+ if (fs::exists(dataDir) && fs::is_directory(dataDir)) {
+ for (const auto& entry : fs::directory_iterator(dataDir)) {
+ if (entry.is_regular_file() && entry.path().extension() == ".csv") {
+ std::string filename = entry.path().filename().string();
+ if (filename.find("FieldLandau_") == 0
+ && filename.find("_manager.csv") != std::string::npos) {
+ return entry.path().string();
+ }
+ }
+ }
+ }
+
+ return pattern; // Return original pattern if not found
+}
+
+// Load CSV file into vector of CSVRow
+bool loadCSV(const std::string& filepath, std::vector& data, bool& hasMaxNorm,
+ std::string& errorMsg) {
+ std::ifstream file(filepath);
+ if (!file.is_open()) {
+ errorMsg = "Could not open file: " + filepath;
+ return false;
+ }
+
+ std::string line;
+ bool isFirstLine = true;
+ int lineNum = 0;
+ hasMaxNorm = false;
+
+ while (std::getline(file, line)) {
+ lineNum++;
+ line = trim(line);
+
+ // Skip empty lines
+ if (line.empty())
+ continue;
+
+ // Check header line
+ if (isFirstLine) {
+ isFirstLine = false;
+ // Check if we have Ex_max_norm column
+ hasMaxNorm = (line.find("Ex_max_norm") != std::string::npos);
+ continue;
+ }
+
+ // Parse data line
+ std::istringstream iss(line);
+ double time, energy, maxNorm = 0.0;
+
+ if (!(iss >> time >> energy)) {
+ errorMsg = "Failed to parse line " + std::to_string(lineNum) + ": " + line;
+ return false;
+ }
+
+ // Try to read third column if present
+ if (hasMaxNorm) {
+ if (!(iss >> maxNorm)) {
+ // If header says we have max_norm but can't read it, it's an error
+ errorMsg = "Expected Ex_max_norm column but failed to parse at line "
+ + std::to_string(lineNum);
+ return false;
+ }
+ }
+
+ data.emplace_back(time, energy, maxNorm);
+ }
+
+ file.close();
+
+ if (data.empty()) {
+ errorMsg = "No data rows found in file: " + filepath;
+ return false;
+ }
+
+ return true;
+}
+
+// Compare two CSV datasets
+bool compareCSVData(const std::vector& output, const std::vector& reference,
+ bool outputHasMaxNorm, bool refHasMaxNorm, double tolerance,
+ std::vector& errors) {
+ // Check row count
+ if (output.size() != reference.size()) {
+ std::ostringstream oss;
+ oss << "Row count mismatch: output has " << output.size() << " rows, reference has "
+ << reference.size() << " rows";
+ errors.push_back(oss.str());
+ return false;
+ }
+
+ // Compare each row
+ std::vector timeErrors, energyErrors, normErrors;
+ double maxTimeDiff = 0.0, maxEnergyDiff = 0.0, maxNormDiff = 0.0;
+
+ for (size_t i = 0; i < output.size(); ++i) {
+ // Compare time
+ double timeDiff = std::abs(output[i].time - reference[i].time);
+ if (timeDiff > tolerance) {
+ timeErrors.push_back(static_cast(i));
+ maxTimeDiff = std::max(maxTimeDiff, timeDiff);
+ }
+
+ // Compare Ex_field_energy
+ double energyDiff = std::abs(output[i].Ex_field_energy - reference[i].Ex_field_energy);
+ if (energyDiff > tolerance) {
+ energyErrors.push_back(static_cast(i));
+ maxEnergyDiff = std::max(maxEnergyDiff, energyDiff);
+ }
+
+ // Compare Ex_max_norm if both have it
+ if (outputHasMaxNorm && refHasMaxNorm) {
+ double normDiff = std::abs(output[i].Ex_max_norm - reference[i].Ex_max_norm);
+ if (normDiff > tolerance) {
+ normErrors.push_back(static_cast(i));
+ maxNormDiff = std::max(maxNormDiff, normDiff);
+ }
+ }
+ }
+
+ // Report errors
+ bool hasErrors = false;
+
+ if (!timeErrors.empty()) {
+ hasErrors = true;
+ std::ostringstream oss;
+ oss << "Column 'time': " << timeErrors.size() << " value(s) outside tolerance "
+ << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxTimeDiff
+ << ") at rows [";
+ for (size_t i = 0; i < std::min(timeErrors.size(), size_t(10)); ++i) {
+ if (i > 0)
+ oss << ", ";
+ oss << timeErrors[i];
+ }
+ if (timeErrors.size() > 10)
+ oss << ", ...";
+ oss << "]";
+ errors.push_back(oss.str());
+ }
+
+ if (!energyErrors.empty()) {
+ hasErrors = true;
+ std::ostringstream oss;
+ oss << "Column 'Ex_field_energy': " << energyErrors.size() << " value(s) outside tolerance "
+ << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxEnergyDiff
+ << ") at rows [";
+ for (size_t i = 0; i < std::min(energyErrors.size(), size_t(10)); ++i) {
+ if (i > 0)
+ oss << ", ";
+ oss << energyErrors[i];
+ }
+ if (energyErrors.size() > 10)
+ oss << ", ...";
+ oss << "]";
+ errors.push_back(oss.str());
+ }
+
+ if (!normErrors.empty()) {
+ hasErrors = true;
+ std::ostringstream oss;
+ oss << "Column 'Ex_max_norm': " << normErrors.size() << " value(s) outside tolerance "
+ << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxNormDiff
+ << ") at rows [";
+ for (size_t i = 0; i < std::min(normErrors.size(), size_t(10)); ++i) {
+ if (i > 0)
+ oss << ", ";
+ oss << normErrors[i];
+ }
+ if (normErrors.size() > 10)
+ oss << ", ...";
+ oss << "]";
+ errors.push_back(oss.str());
+ }
+
+ return !hasErrors;
+}
+
+int main(int argc, char* argv[]) {
+ // Default values
+ std::string outputCSV = "data/FieldLandau_*_manager.csv";
+ std::string referenceCSV = "../../alpine/validation/FieldLandau_valid_result.csv";
+ double tolerance = 1e-7;
+
+ // Parse command line arguments
+ if (argc > 1) {
+ if (std::string(argv[1]) == "--help" || std::string(argv[1]) == "-h") {
+ std::cout << "LandauDamping Correctness Validation Test\n\n"
+ << "Usage: " << argv[0] << " [output_csv] [reference_csv] [tolerance]\n\n"
+ << "Arguments:\n"
+ << " output_csv : Path to generated CSV file (default: "
+ "data/FieldLandau_*_manager.csv)\n"
+ << " reference_csv : Path to reference CSV file (default: "
+ "../../alpine/validation/FieldLandau_valid_result.csv)\n"
+ << " tolerance : Absolute tolerance for comparison (default: 1e-7)\n\n"
+ << "Exit codes:\n"
+ << " 0 : PASS - Output matches reference within tolerance\n"
+ << " 1 : FAIL - Output does not match reference\n"
+ << " 2 : ERROR - File I/O or parsing error\n\n"
+ << "Examples:\n"
+ << " " << argv[0] << "\n"
+ << " " << argv[0] << " data/FieldLandau_2_manager.csv\n"
+ << " " << argv[0] << " output.csv reference.csv 1e-6\n";
+ return 0;
+ }
+ outputCSV = argv[1];
+ }
+ if (argc > 2) {
+ referenceCSV = argv[2];
+ }
+ if (argc > 3) {
+ tolerance = std::atof(argv[3]);
+ }
+
+ // Find output file if pattern given
+ outputCSV = findCSVFile(outputCSV);
+
+ std::cout << "LandauDamping Correctness Validation\n";
+ std::cout << "=====================================\n";
+ std::cout << "Output file: " << outputCSV << "\n";
+ std::cout << "Reference file: " << referenceCSV << "\n";
+ std::cout << "Tolerance: " << std::scientific << tolerance << "\n";
+ std::cout << std::endl;
+
+ // Load output CSV
+ std::vector outputData;
+ bool outputHasMaxNorm = false;
+ std::string errorMsg;
+
+ if (!loadCSV(outputCSV, outputData, outputHasMaxNorm, errorMsg)) {
+ std::cerr << "✗ ERROR: Failed to load output CSV\n";
+ std::cerr << " " << errorMsg << std::endl;
+ return 2;
+ }
+
+ std::cout << "Loaded output: " << outputData.size() << " rows";
+ if (outputHasMaxNorm)
+ std::cout << " (with Ex_max_norm column)";
+ std::cout << std::endl;
+
+ // Load reference CSV
+ std::vector referenceData;
+ bool refHasMaxNorm = false;
+
+ if (!loadCSV(referenceCSV, referenceData, refHasMaxNorm, errorMsg)) {
+ std::cerr << "✗ ERROR: Failed to load reference CSV\n";
+ std::cerr << " " << errorMsg << std::endl;
+ return 2;
+ }
+
+ std::cout << "Loaded reference: " << referenceData.size() << " rows";
+ if (refHasMaxNorm)
+ std::cout << " (with Ex_max_norm column)";
+ std::cout << std::endl;
+ std::cout << std::endl;
+
+ // Compare data
+ std::vector errors;
+ bool passed = compareCSVData(outputData, referenceData, outputHasMaxNorm, refHasMaxNorm,
+ tolerance, errors);
+
+ if (passed) {
+ std::cout << "✓ PASS: Output matches reference within tolerance" << std::endl;
+ return 0;
+ } else {
+ std::cout << "✗ FAIL: Output does not match reference" << std::endl;
+ for (const auto& error : errors) {
+ std::cout << " - " << error << std::endl;
+ }
+ return 1;
+ }
+}
diff --git a/ci/cscs/common.yml b/ci/cscs/common.yml
index 8439b1afd..aab7f3bef 100644
--- a/ci/cscs/common.yml
+++ b/ci/cscs/common.yml
@@ -6,7 +6,9 @@ include:
- echo "CI_PROJECT_URL=$CI_PROJECT_URL"
- echo "CI_COMMIT_SHORT_SHA=$CI_COMMIT_SHORT_SHA"
- |
- if [[ "$CI_COMMIT_REF_NAME" =~ pr([0-9]+)$ ]]; then
+ if [[ "$CI_COMMIT_REF_NAME" =~ gh-readonly-queue.*-pr-([0-9]+)- ]]; then
+ export CDASH_LABEL="Merge-Queue-PR-${BASH_REMATCH[1]}"
+ elif [[ "$CI_COMMIT_REF_NAME" =~ pr([0-9]+)$ ]]; then
export CDASH_LABEL="PR-${BASH_REMATCH[1]}"
else
export CDASH_LABEL="$CI_COMMIT_REF_SLUG"
diff --git a/ci/cscs/cuda/build_sm90.yml b/ci/cscs/cuda/build_sm90.yml
index b54ffdbb4..6175f92c5 100644
--- a/ci/cscs/cuda/build_sm90.yml
+++ b/ci/cscs/cuda/build_sm90.yml
@@ -27,17 +27,18 @@ variables:
- env | sort | grep CI
script:
- >-
- ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake
+ ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake
-DCTEST_SITE="$CTEST_SITE"
- -DPRESET=alps-gh200
- -DCDASH_LABEL=$CDASH_LABEL
+ -DPRESET=alps-gh200
+ -DCDASH_LABEL=$CDASH_LABEL
-DBUILD_TYPE=$BUILD_TYPE
-DBUILD_DIR=$BUILD_PATH
- -DBUILD_ARCH=$BUILD_ARCH
- -DKokkos_VERSION=git.4.7.02
- -DKokkos_ARCH_FLAG=Kokkos_ARCH_HOPPER90
- -DMPIEXEC_EXECUTABLE=/usr/bin/srun
- -DMPIEXEC_PREFLAGS="$SRUN_FLAGS"
+ -DBUILD_ARCH=$BUILD_ARCH
+ -DHeffte_VERSION=git.v2.4.1
+ -DKokkos_VERSION=git.4.7.02
+ -DKokkos_ARCH_FLAG=Kokkos_ARCH_HOPPER90
+ -DMPIEXEC_EXECUTABLE=/usr/bin/srun
+ -DMPIEXEC_PREFLAGS="$SRUN_FLAGS"
-DMPIEXEC_MAX_NUMPROCS=4
- echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1)
- find $BUILD_PATH -name \*.o -delete
diff --git a/ci/cscs/dashboard-configure-build.cmake b/ci/cscs/dashboard-configure-build.cmake
index b12f67b12..ed460f165 100644
--- a/ci/cscs/dashboard-configure-build.cmake
+++ b/ci/cscs/dashboard-configure-build.cmake
@@ -50,7 +50,12 @@ set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_EXECUTABLE=${M
set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_PREFLAGS=${MPIEXEC_PREFLAGS}")
set(CTEST_CONFIGURE_COMMAND
"${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_MAX_NUMPROCS=${MPIEXEC_MAX_NUMPROCS}")
-set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DKokkos_VERSION=${Kokkos_VERSION}")
+if(DEFINED Heffte_VERSION)
+ set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DHeffte_VERSION=${Heffte_VERSION}")
+endif()
+if(DEFINED Kokkos_VERSION)
+ set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DKokkos_VERSION=${Kokkos_VERSION}")
+endif()
set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -D${Kokkos_ARCH_FLAG}=ON")
# --- configure & build ---
diff --git a/ci/cscs/openmp/build_openmp.yml b/ci/cscs/openmp/build_openmp.yml
index ff4896956..4387273f2 100644
--- a/ci/cscs/openmp/build_openmp.yml
+++ b/ci/cscs/openmp/build_openmp.yml
@@ -28,19 +28,19 @@ variables:
- export CXX=g++
script:
- >-
- ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake
+ ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake
-DCTEST_SITE="$CTEST_SITE"
- -DPRESET=release-testing
- -DCDASH_LABEL=$CDASH_LABEL
- -DBUILD_TYPE=$BUILD_TYPE
+ -DPRESET=release-testing
+ -DCDASH_LABEL=$CDASH_LABEL
+ -DBUILD_TYPE=$BUILD_TYPE
-DBUILD_DIR=$BUILD_PATH
- -DBUILD_ARCH=$BUILD_ARCH
+ -DBUILD_ARCH=$BUILD_ARCH
-DIPPL_PLATFORMS=OPENMP
-DIPPL_OPENMP_THREADS=32
- -DKokkos_VERSION=git.4.7.02
- -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU
- -DMPIEXEC_EXECUTABLE=/usr/bin/srun
- -DMPIEXEC_PREFLAGS="$SRUN_FLAGS"
+ -DKokkos_VERSION=git.4.7.02
+ -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU
+ -DMPIEXEC_EXECUTABLE=/usr/bin/srun
+ -DMPIEXEC_PREFLAGS="$SRUN_FLAGS"
-DMPIEXEC_MAX_NUMPROCS=4
- echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1)
- find $BUILD_PATH -name \*.o -delete
diff --git a/ci/cscs/rocm/build_rocm-6.3.yml b/ci/cscs/rocm/build_rocm-6.3.yml
index d09a7de6f..4735f8fa8 100644
--- a/ci/cscs/rocm/build_rocm-6.3.yml
+++ b/ci/cscs/rocm/build_rocm-6.3.yml
@@ -32,17 +32,17 @@ variables:
- export CMAKE_PREFIX_PATH=$HSA_PATH:$CMAKE_PREFIX_PATH
script:
- >-
- ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake
+ ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake
-DCTEST_SITE="$CTEST_SITE"
- -DPRESET=alps-mi300
- -DCDASH_LABEL=$CDASH_LABEL
+ -DPRESET=alps-mi300
+ -DCDASH_LABEL=$CDASH_LABEL
-DBUILD_TYPE=$BUILD_TYPE
-DBUILD_DIR=$BUILD_PATH
- -DBUILD_ARCH=$BUILD_ARCH
- -DKokkos_VERSION=git.4.7.02
- -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU
- -DMPIEXEC_EXECUTABLE=/usr/bin/srun
- -DMPIEXEC_PREFLAGS="$SRUN_FLAGS"
+ -DBUILD_ARCH=$BUILD_ARCH
+ -DKokkos_VERSION=git.4.7.02
+ -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU
+ -DMPIEXEC_EXECUTABLE=/usr/bin/srun
+ -DMPIEXEC_PREFLAGS="$SRUN_FLAGS"
-DMPIEXEC_MAX_NUMPROCS=4
# -DHeffte_ENABLE_GPU_AWARE_MPI=OFF
- echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 70bb39314..704d87a8b 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -44,6 +44,23 @@ if("OPENMP" IN_LIST IPPL_PLATFORMS)
colour_message(STATUS ${Green} "✅ OpenMP platform requested OpenMP found ${OPENMP_VERSION}")
endif()
+# ------------------------------------------------------------------------------
+# spdlog logging library
+# ------------------------------------------------------------------------------
+string(TOUPPER ${IPPL_LOG_LEVEL} IPPL_LOG_LEVEL_UPPERCASE)
+if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF")
+ find_package(spdlog REQUIRED)
+ colour_message(STATUS ${Green} "✅ spdlog found ${spdlog_VERSION}")
+endif()
+
+# ------------------------------------------------------------------------------
+# fmt library (for formatting nice log messages)
+# ------------------------------------------------------------------------------
+if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF")
+ find_package(fmt REQUIRED)
+ colour_message(STATUS ${Green} "✅ fmt found ${fmt_VERSION}")
+endif()
+
# ------------------------------------------------------------------------------
# Utility function to clear a list of vars one by one
# ------------------------------------------------------------------------------
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index 4731c2f76..cad844a9d 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -9,6 +9,20 @@ message(STATUS "IPPL_MACHINENAME for script generation is: ${IPPL_MACHINENAME}")
# ------------------------------------------------------------------------------
set(IPPL_JOB_SUBMISSION_ACCOUNT "c41" CACHE STRING "Account to use for job submission templates")
+# populate IPPL_SCRIPTS_UENV from environment variable UENV_MOUNT_LIST (if present)
+if(DEFINED ENV{UENV_MOUNT_LIST})
+ set(IPPL_SCRIPTS_UENV "$ENV{UENV_MOUNT_LIST}"
+ CACHE STRING "UENV to use on alps when running scripts mount")
+ set(IPPL_SCRIPTS_UENV_VIEW "default" CACHE STRING "View to set in uenv")
+ colour_message(STATUS ${LightBlue}
+ "IPPL_SCRIPTS_UENV set from UENV_MOUNT_LIST: ${IPPL_SCRIPTS_UENV}")
+else()
+ # Remove any cached and normal definitions so the variable is completely unset
+ unset(IPPL_SCRIPTS_UENV CACHE)
+ unset(IPPL_SCRIPTS_UENV_VIEW CACHE)
+ colour_message(STATUS ${LightBlue} "No UENV detected")
+endif()
+
# ------------------------------------------------------------------------------
# utility function to get target path/name since we can't use generator expressions to set variables
# directly
diff --git a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm
index 4c40a5bc2..338c7f682 100644
--- a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm
+++ b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm
@@ -12,8 +12,8 @@
#SBATCH --cpus-per-task=72
#SBATCH --exclusive
-#SBATCH --uenv=/capstor/store/cscs/cscs/public/uenvs/opal-x-gh200-mpich-gcc-2025-09-28.squashfs
-#SBATCH --view=develop
+#SBATCH --uenv=@IPPL_SCRIPTS_UENV@
+#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@
#SBATCH --output=landau__n_.out
#SBATCH --error=landau__n_.error
@@ -41,10 +41,9 @@ export JOB_DIR=job_dir
export EXE=@LANDAU_BINARY@
export WRAPPER=@PROJECT_BINARY_DIR@/@JOB_SCRIPTS_PATH@/@JOB_WRAPPER_SCRIPT@
-mkdir -p data
export OMP_PROC_BIND=spread
export OMP_PLACES=threads
-export OMP_NUM_THREADS=72
+export OMP_NUM_THREADS=2
# --------------------
# cray-mpich debug putput
@@ -62,4 +61,14 @@ echo "N Particles: $nparticles"
echo "Iterations: $iterations"
# --------------------
-srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10
+mkdir -p ipc1
+pushd ipc1
+mkdir -p data
+IPC_ON=1 srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10
+popd
+
+mkdir -p ipc0
+pushd ipc0
+mkdir -p data
+IPC_ON=0 srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10
+popd
diff --git a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm
index de29cc9dc..1bd459192 100644
--- a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm
+++ b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm
@@ -13,8 +13,8 @@
#SBATCH --cpus-per-task=48
#SBATCH --exclusive
-#SBATCH --uenv=/capstor/scratch/cscs/biddisco/opal-x-mi300-mpich-gcc-2025-10-27.squashfs
-#SBATCH --view=default
+#SBATCH --uenv=@IPPL_SCRIPTS_UENV@
+#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@
#SBATCH --output=landau__n_.out
#SBATCH --error=landau__n_.error
diff --git a/scripts/landau/strong-scaling-alps/wrapper-gh200.sh b/scripts/landau/strong-scaling-alps/wrapper-gh200.sh
index b8a02caab..6522117af 100755
--- a/scripts/landau/strong-scaling-alps/wrapper-gh200.sh
+++ b/scripts/landau/strong-scaling-alps/wrapper-gh200.sh
@@ -88,9 +88,15 @@ export CUDA_VISIBLE_DEVICES=$gpu
# ---------------
# cray-mpich : see https://cpe.ext.hpe.com/docs/24.03/mpt/mpich/intro_mpi.html#general-mpich-environment-variables
# ---------------
-export MPICH_GPU_SUPPORT_ENABLED=1
+# profiling and debugging options
#export MPICH_OFI_CXI_COUNTER_REPORT=1
-export MPICH_GPU_IPC_ENABLED=0
+
+# gpu support
+export MPICH_GPU_IPC_ENABLED=1
+export MPICH_GPU_IPC_CACHE_MAX_SIZE=256
+if [ "$IPC_ON" -eq "0" ]; then
+ export MPICH_GPU_IPC_ENABLED=0
+fi
# ---------------
# OpenMPI mappings for MCA variables
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c4778b174..51cfb879b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -35,6 +35,14 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/IpplVersions.h.in
add_library(ippl)
+if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF")
+ target_compile_definitions(ippl PUBLIC IPPL_LOGGING_ENABLED=1)
+ target_compile_definitions(ippl
+ PUBLIC SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${IPPL_LOG_LEVEL_UPPERCASE})
+ target_link_libraries(ippl PUBLIC spdlog::spdlog $<$:ws2_32>)
+ target_link_libraries(ippl PUBLIC fmt::fmt)
+endif()
+
target_compile_features(ippl PUBLIC cxx_std_20)
target_compile_options(
diff --git a/src/Communicate/AlignedBuffer.h b/src/Communicate/AlignedBuffer.h
new file mode 100644
index 000000000..95e0cb74c
--- /dev/null
+++ b/src/Communicate/AlignedBuffer.h
@@ -0,0 +1,168 @@
+#ifndef IPPL_ALIGNED_BUFFER_H
+#define IPPL_ALIGNED_BUFFER_H
+
+#include "Types/ViewTypes.h"
+
+#include "Utility/Logging.h"
+#include "Utility/TypeUtils.h"
+//
+#include "Communicate/Archive.h"
+
+namespace ippl::comms {
+
+#define DEFAULT_BUFFER_ALIGNMENT 1024
+ // Here's a simple class that provides an aligned buffer, by default on the host
+ // but we can specialize the constructor/destructor for other memory spaces
+ template
+ struct AlignedBuffer {
+ using memory_space = MemorySpace;
+ void* ptrOriginal{nullptr};
+ void* ptrAligned{nullptr};
+ detail::size_type space{0};
+ //
+ AlignedBuffer() {}
+ //
+ AlignedBuffer& operator=(AlignedBuffer&& other) {
+ ptrOriginal = other.ptrOriginal;
+ ptrAligned = other.ptrAligned;
+ space = other.space;
+ other.ptrOriginal = nullptr;
+ other.ptrAligned = nullptr;
+ other.space = 0;
+ return *this;
+ }
+ //
+ AlignedBuffer(std::size_t size) {
+ ptrOriginal = std::aligned_alloc(DEFAULT_BUFFER_ALIGNMENT, size);
+ ptrAligned = ptrOriginal;
+ space = size;
+ SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}",
+ (void*)(ptrOriginal), (void*)(ptrAligned), size, space);
+ // sanity check should always be true when std::align used
+ assert(space >= size);
+ }
+ //
+ ~AlignedBuffer() {
+ if (ptrOriginal) {
+ SPDLOG_DEBUG("Destroying host buffer {}", ptrOriginal);
+ std::free(ptrOriginal);
+ }
+ }
+ };
+
+ // ---------------------------------------------------------------------
+#if defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_CUDA)
+ // make number a multiple of the alignment
+ inline std::int64_t to_multiple(std::int64_t num) {
+ return ((2 * num + (DEFAULT_BUFFER_ALIGNMENT - 1)) & (-DEFAULT_BUFFER_ALIGNMENT));
+ }
+#endif
+
+ // ---------------------------------------------------------------------
+#ifdef KOKKOS_ENABLE_CUDA
+ // Specialize buffer allocation/free for cuda
+ template <>
+ inline AlignedBuffer::AlignedBuffer(std::size_t size) {
+ void* original;
+ space = to_multiple(size);
+ cudaMalloc(&original, space);
+ if (!original) {
+ throw std::runtime_error("Error allocating cuda memory in AlignedBuffer");
+ }
+ ptrOriginal = original;
+ ptrAligned = std::align(DEFAULT_BUFFER_ALIGNMENT, size, original, space);
+ SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}",
+ (void*)(ptrOriginal), (void*)(ptrAligned), size, space);
+ // sanity check should always be true when std::align used
+ assert(space >= size);
+ }
+ //
+ template <>
+ inline AlignedBuffer::~AlignedBuffer() {
+ if (ptrOriginal) {
+ SPDLOG_DEBUG("Destroying cuda buffer {}", ptrOriginal);
+ cudaFree(ptrOriginal);
+ }
+ }
+#endif
+
+ // ---------------------------------------------------------------------
+#ifdef KOKKOS_ENABLE_HIP
+#define HIP_CHECK(expression) \
+ { \
+ const hipError_t status = expression; \
+ if (status != hipSuccess) { \
+ std::cerr << "HIP error " << status << ": " << hipGetErrorString(status) << " at " \
+ << __FILE__ << ":" << __LINE__ << std::endl; \
+ } \
+ }
+
+ // Specialize buffer allocation/free for HIP
+ template <>
+ inline AlignedBuffer::AlignedBuffer(std::size_t size) {
+ void* original;
+ space = to_multiple(size);
+ HIP_CHECK(hipMalloc(&original, space));
+ if (!original) {
+ throw std::runtime_error("Error allocating HIP memory in AlignedBuffer");
+ }
+ ptrOriginal = original;
+ ptrAligned = std::align(DEFAULT_BUFFER_ALIGNMENT, size, original, space);
+ SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}",
+ (void*)(ptrOriginal), (void*)(ptrAligned), size, space);
+ // sanity check should always be true when std::align used
+ assert(space >= size);
+ }
+ //
+ template <>
+ inline AlignedBuffer::~AlignedBuffer() {
+ if (ptrOriginal) {
+ SPDLOG_DEBUG("Destroying HIP buffer {}", ptrOriginal);
+ HIP_CHECK(hipFree(ptrOriginal));
+ }
+ }
+#endif
+
+ // ---------------------------------------------------------------------
+ template
+ struct aligned_storage_wrapper {
+ //
+ using memory_space = MemorySpace;
+ using buffer_type =
+ ippl::detail::ViewType>::view_type;
+ using pointer_type = typename buffer_type::pointer_type;
+ using size_type = detail::size_type;
+ //
+ aligned_storage_wrapper(const std::string& /*name*/, size_type size)
+ : view() // we will construct the view manually
+ , buffer(size) //
+ {
+ SPDLOG_TRACE("Construct: view origin {}, aligned {}", (void*)(view.data()),
+ (void*)(buffer.ptrAligned));
+ view = buffer_type((pointer_type)buffer.ptrAligned, size);
+ assert(view.data() == buffer.ptrAligned);
+ }
+ //
+ size_type size() const { return buffer.space; }
+ //
+ pointer_type data() { return view.data(); }
+
+ // Note that this makes no effort to preserve any existing data
+ void reallocBuffer(size_type newsize) {
+ // wipe the old memory, before allocating new, (help prevent out-of-space errors)
+ buffer = AlignedBuffer();
+ // allocate new
+ buffer = AlignedBuffer(newsize);
+ view = buffer_type((pointer_type)buffer.ptrAligned, newsize);
+ SPDLOG_DEBUG("Realloc : view {}, aligned {}, size {}, space {}", (void*)(view.data()),
+ (void*)(buffer.ptrAligned), newsize, buffer.space);
+ }
+ //
+ buffer_type view;
+ AlignedBuffer buffer;
+ };
+
+} // namespace ippl::comms
+
+#endif
diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h
index 6ec74f946..1f2623dac 100644
--- a/src/Communicate/Archive.h
+++ b/src/Communicate/Archive.h
@@ -21,14 +21,14 @@ namespace ippl {
namespace detail {
/*!
* @file Archive.h
- * Serialize and desesrialize particle attributes.
+ * Serialize and deserialize particle attributes.
* @tparam Properties variadic template for Kokkos::View
*/
- template
+ template
class Archive {
public:
- using buffer_type = typename ViewType::view_type;
+ using buffer_type = BufferType;
using pointer_type = typename buffer_type::pointer_type;
Archive(size_type size = 0);
@@ -73,7 +73,7 @@ namespace ippl {
/*!
* @returns a pointer to the data of the buffer
*/
- pointer_type getBuffer() { return buffer_m.data(); }
+ pointer_type getData() { return buffer_m.data(); }
/*!
* @returns the size of the buffer
@@ -82,10 +82,18 @@ namespace ippl {
size_type getBufferSize() const { return buffer_m.size(); }
- void resizeBuffer(size_type size) { Kokkos::resize(buffer_m, size); }
-
- void reallocBuffer(size_type size) { Kokkos::realloc(buffer_m, size); }
+ void reallocBuffer(size_type size) {
+#ifdef IPPL_ALIGNED_COMMS_BUFFERS
+ buffer_m.reallocBuffer(size);
+#else
+ Kokkos::realloc(buffer_m, size);
+#endif
+ }
+ void resetReadWritePos() {
+ readpos_m = 0;
+ writepos_m = 0;
+ }
void resetWritePos() { writepos_m = 0; }
void resetReadPos() { readpos_m = 0; }
@@ -97,7 +105,7 @@ namespace ippl {
//! read position for deserialization
size_type readpos_m;
//! serialized data
- buffer_type buffer_m;
+ BufferType buffer_m;
};
} // namespace detail
} // namespace ippl
diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp
index f1d097a69..1135c8157 100644
--- a/src/Communicate/Archive.hpp
+++ b/src/Communicate/Archive.hpp
@@ -4,99 +4,116 @@
//
#include
+#include "Utility/Logging.h"
+
#include "Archive.h"
namespace ippl {
namespace detail {
- template
- Archive::Archive(size_type size)
+ template
+ Archive::Archive(size_type size)
: writepos_m(0)
, readpos_m(0)
, buffer_m("buffer", size) {}
- template
+ // -----------------------------------
+ // Scalar serialize
+ template
template
- void Archive::serialize(const Kokkos::View& view,
- size_type nsends) {
- using exec_space = typename Kokkos::View::execution_space;
- using policy_type = Kokkos::RangePolicy;
-
- size_t size = sizeof(T);
- Kokkos::parallel_for(
- "Archive::serialize()", policy_type(0, nsends),
- KOKKOS_CLASS_LAMBDA(const size_type i) {
- std::memcpy(buffer_m.data() + i * size + writepos_m, view.data() + i, size);
- });
+ void Archive::serialize(const Kokkos::View& view,
+ size_type nsends) {
+ constexpr size_t size = sizeof(T);
+ char* dst_ptr = (char*)(buffer_m.data()) + writepos_m;
+ char* src_ptr = (char*)(view.data());
+ assert(writepos_m + (nsends * size) <= buffer_m.size());
+ // construct temp views of the src/dst buffers of the correct size (bytes)
+ Kokkos::View src_view(src_ptr, size * nsends);
+ Kokkos::View dst_view(dst_ptr, size * nsends);
+ Kokkos::deep_copy(dst_view, src_view);
Kokkos::fence();
- writepos_m += size * nsends;
+ SPDLOG_TRACE("Incrementing writepos: {}, from {}, to {}", (void*)dst_view.data(),
+ writepos_m, writepos_m + (nsends * size));
+ writepos_m += (nsends * size);
}
- template
+ // -----------------------------------
+ // Vector serialize
+ template
template
- void Archive::serialize(
- const Kokkos::View*, ViewArgs...>& view, size_type nsends) {
- using exec_space = typename Kokkos::View::execution_space;
-
- size_t size = sizeof(T);
- // Default index type for range policies is int64,
+ void Archive::serialize(const Kokkos::View*, ViewArgs...>& view,
+ size_type nsends) {
+ constexpr size_t size = sizeof(T);
+ char* dst_ptr = (char*)(buffer_m.data());
+ ippl::Vector* src_ptr = view.data();
+ auto wp = writepos_m;
+ // The Kokkos range policies expect int64
// so we have to explicitly specify size_type (uint64)
+ using exec_space = typename Kokkos::View::execution_space;
using mdrange_t =
Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>;
Kokkos::parallel_for(
- "Archive::serialize()",
- // The constructor for Kokkos range policies always
- // expects int64 regardless of index type provided
- // by template parameters, so the typecast is necessary
- // to avoid compiler warnings
- mdrange_t({0, 0}, {(long int)nsends, Dim}),
- KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) {
- std::memcpy(buffer_m.data() + (Dim * i + d) * size + writepos_m,
- &(*(view.data() + i))[d], size);
+ "Archive::serialize()", mdrange_t({0, 0}, {(long int)nsends, Dim}),
+ KOKKOS_LAMBDA(const size_type i, const size_t d) {
+ std::memcpy(dst_ptr + (Dim * i + d) * size + wp, &(*(src_ptr + i))[d], size);
});
+
Kokkos::fence();
writepos_m += Dim * size * nsends;
}
- template
+ // -----------------------------------
+ // Scalar Deserialize
+ template
template
- void Archive::deserialize(Kokkos::View& view,
- size_type nrecvs) {
- using exec_space = typename Kokkos::View::execution_space;
- using policy_type = Kokkos::RangePolicy;
-
- size_t size = sizeof(T);
+ void Archive::deserialize(Kokkos::View& view,
+ size_type nrecvs) {
+ // if we have to enlarge the destination view
if (nrecvs > view.extent(0)) {
+ SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(),
+ view.extent(0), nrecvs);
Kokkos::realloc(view, nrecvs);
}
- Kokkos::parallel_for(
- "Archive::deserialize()", policy_type(0, nrecvs),
- KOKKOS_CLASS_LAMBDA(const size_type i) {
- std::memcpy(view.data() + i, buffer_m.data() + i * size + readpos_m, size);
- });
- // Wait for deserialization kernel to complete
- // (as with serialization kernels)
+ //
+ constexpr size_t size = sizeof(T);
+ char* src_ptr = (char*)(buffer_m.data()) + readpos_m;
+ char* dst_ptr = (char*)(view.data());
+ assert(readpos_m + (nrecvs * size) <= buffer_m.size());
+ // construct temp views of the src/dst buffers of the correct size (bytes)
+ Kokkos::View src_view(src_ptr, size * nrecvs);
+ Kokkos::View dst_view(dst_ptr, size * nrecvs);
+ Kokkos::deep_copy(dst_view, src_view);
Kokkos::fence();
- readpos_m += size * nrecvs;
+ SPDLOG_TRACE("Incrementing readpos: {}, from {}, to {}", (void*)buffer_m.data(),
+ readpos_m, readpos_m + (nrecvs * size));
+ readpos_m += (nrecvs * size);
}
- template
+ // -----------------------------------
+ // Vecto Deserialize
+ template
template
- void Archive::deserialize(Kokkos::View*, ViewArgs...>& view,
- size_type nrecvs) {
- using exec_space = typename Kokkos::View::execution_space;
-
- size_t size = sizeof(T);
+ void Archive::deserialize(Kokkos::View*, ViewArgs...>& view,
+ size_type nrecvs) //
+ {
+ // if we have to enlarge the destination view
if (nrecvs > view.extent(0)) {
+ SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(),
+ view.extent(0), nrecvs);
Kokkos::realloc(view, nrecvs);
}
+ //
+ constexpr size_t size = sizeof(T);
+ char* src_ptr = (char*)(buffer_m.data());
+ ippl::Vector* dst_ptr = view.data();
+ auto rp = readpos_m;
+ using exec_space = typename Kokkos::View::execution_space;
using mdrange_t =
Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>;
Kokkos::parallel_for(
"Archive::deserialize()", mdrange_t({0, 0}, {(long int)nrecvs, Dim}),
- KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) {
- std::memcpy(&(*(view.data() + i))[d],
- buffer_m.data() + (Dim * i + d) * size + readpos_m, size);
+ KOKKOS_LAMBDA(const size_type i, const size_t d) {
+ std::memcpy(&(*(dst_ptr + i))[d], src_ptr + (Dim * i + d) * size + rp, size);
});
Kokkos::fence();
readpos_m += Dim * size * nrecvs;
diff --git a/src/Communicate/BufferHandler.h b/src/Communicate/BufferHandler.h
index 117c12f22..a42a1ac0a 100644
--- a/src/Communicate/BufferHandler.h
+++ b/src/Communicate/BufferHandler.h
@@ -4,9 +4,34 @@
#include
#include
+#include "Types/IpplTypes.h"
+#include "Types/ViewTypes.h"
+
+#include "Utility/Logging.h"
+#include "Utility/TypeUtils.h"
+
#include "Communicate/Archive.h"
-namespace ippl {
+#ifdef IPPL_ALIGNED_COMMS_BUFFERS
+#include "Communicate/AlignedBuffer.h"
+#endif
+
+namespace ippl::comms {
+
+#ifdef IPPL_ALIGNED_COMMS_BUFFERS
+ // alignment provided by AlignedBuffer wrapper
+ template
+ using archive_buffer = detail::Archive>;
+#else
+ // default kokkos alignment
+ template
+ using communicator_storage =
+ ippl::detail::ViewType>::view_type;
+
+ template
+ using archive_buffer = detail::Archive>;
+#endif
/**
* @brief Interface for memory buffer handling.
@@ -17,11 +42,11 @@ namespace ippl {
*
* @tparam MemorySpace The memory space type used for buffer allocation.
*/
- template
+ template
class BufferHandler {
public:
- using archive_type = ippl::detail::Archive;
- using buffer_type = std::shared_ptr;
+ using archive_type = Buffer;
+ using buffer_type = std::shared_ptr;
using size_type = ippl::detail::size_type;
virtual ~BufferHandler() {}
@@ -92,11 +117,12 @@ namespace ippl {
* @tparam MemorySpace The memory space type for the buffer (e.g., `Kokkos::HostSpace`).
*/
template
- class DefaultBufferHandler : public BufferHandler {
+ class DefaultBufferHandler : public BufferHandler, MemorySpace> {
public:
- using typename BufferHandler::archive_type;
- using typename BufferHandler::buffer_type;
- using typename BufferHandler::size_type;
+ using buffer_type =
+ typename BufferHandler, MemorySpace>::buffer_type;
+ using typename BufferHandler, MemorySpace>::archive_type;
+ using typename BufferHandler, MemorySpace>::size_type;
~DefaultBufferHandler() override;
@@ -106,8 +132,8 @@ namespace ippl {
* Requests a memory buffer of the specified size, with the option
* to request a buffer larger than the base size by an overallocation
* multiplier. If a sufficiently large buffer is available, it is returned. If not, the
- * largest free buffer is reallocated. If there are no free buffers available, only then a
- * new buffer is allocated.
+ * largest free buffer is reallocated. If there are no free buffers available, only then
+ * a new buffer is allocated.
*
* @param size The required buffer size.
* @param overallocation A multiplier to allocate additional buffer space.
@@ -163,7 +189,7 @@ namespace ippl {
buffer_set_type free_buffers{
&DefaultBufferHandler::bufferSizeComparator}; ///< Set of free buffers
};
-} // namespace ippl
+} // namespace ippl::comms
#include "Communicate/BufferHandler.hpp"
diff --git a/src/Communicate/BufferHandler.hpp b/src/Communicate/BufferHandler.hpp
index c6d57f0a3..f654dea7f 100644
--- a/src/Communicate/BufferHandler.hpp
+++ b/src/Communicate/BufferHandler.hpp
@@ -1,7 +1,7 @@
#ifndef IPPL_BUFFER_HANDLER_HPP
#define IPPL_BUFFER_HANDLER_HPP
-namespace ippl {
+namespace ippl::comms {
template
DefaultBufferHandler::~DefaultBufferHandler() {}
@@ -112,6 +112,7 @@ namespace ippl {
freeSize_m -= buffer->getBufferSize();
usedSize_m += buffer->getBufferSize();
+ buffer->resetReadWritePos();
free_buffers.erase(buffer);
used_buffers.insert(buffer);
return buffer;
@@ -128,6 +129,7 @@ namespace ippl {
free_buffers.erase(buffer);
buffer->reallocBuffer(requiredSize);
+ buffer->resetReadWritePos();
used_buffers.insert(buffer);
return buffer;
@@ -140,9 +142,10 @@ namespace ippl {
usedSize_m += newBuffer->getBufferSize();
used_buffers.insert(newBuffer);
+ newBuffer->resetReadWritePos();
return newBuffer;
}
-} // namespace ippl
+} // namespace ippl::comms
#endif
diff --git a/src/Communicate/Buffers.cpp b/src/Communicate/Buffers.cpp
index 04cbb1808..026633fec 100644
--- a/src/Communicate/Buffers.cpp
+++ b/src/Communicate/Buffers.cpp
@@ -32,13 +32,13 @@ namespace ippl {
}
void Communicator::deleteAllBuffers() {
- buffer_handlers_m.forAll([](BufferHandler&& bh) {
+ buffer_handlers_m->forAll([](BufferHandler&& bh) {
bh.deleteAllBuffers();
});
}
void Communicator::freeAllBuffers() {
- buffer_handlers_m.forAll([](BufferHandler&& bh) {
+ buffer_handlers_m->forAll([](BufferHandler&& bh) {
bh.freeAllBuffers();
});
}
diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp
index c08dd2a69..90b477af3 100644
--- a/src/Communicate/Buffers.hpp
+++ b/src/Communicate/Buffers.hpp
@@ -20,22 +20,33 @@
// exchanging particle data between ranks.
//
+#include "Utility/Logging.h"
+#include "Utility/TypeUtils.h"
+
namespace ippl {
namespace mpi {
- template
- Communicator::buffer_type Communicator::getBuffer(size_type size,
- double overallocation) {
- auto& buffer_handler = buffer_handlers_m.get();
+ // -----------------------------------
+ template
+ Communicator::buffer_type Communicator::getBuffer(size_type size,
+ double overallocation) {
+ using memory_space = BufferType::memory_space;
- return buffer_handler.getBuffer(size * sizeof(T),
- std::max(overallocation, defaultOveralloc_m));
- }
+ auto& buffer_handler = buffer_handlers_m->get();
- template
- void Communicator::freeBuffer(Communicator::buffer_type buffer) {
- auto& buffer_handler = buffer_handlers_m.get();
+ auto b = buffer_handler.getBuffer(size * sizeof(T),
+ std::max(overallocation, defaultOveralloc_m));
+ SPDLOG_TRACE("{}, getBuffer {}, buf, {}, size {}", (void*)this,
+ ippl::debug::print_type(), (void*)(b->getData()),
+ size * sizeof(T));
+ return b;
+ }
+ template
+ void Communicator::freeBuffer(Communicator::buffer_type buffer) {
+ using memory_space = BufferType::memory_space;
+ auto& buffer_handler = buffer_handlers_m->get();
+ SPDLOG_TRACE("freeBuffer buf, {}", (void*)(buffer->getData()));
buffer_handler.freeBuffer(buffer);
}
diff --git a/src/Communicate/CMakeLists.txt b/src/Communicate/CMakeLists.txt
index 95e7b4819..31cb70e89 100644
--- a/src/Communicate/CMakeLists.txt
+++ b/src/Communicate/CMakeLists.txt
@@ -4,5 +4,9 @@
# Adds all Communicate-related sources and headers to the IPPL library.
# -----------------------------------------------------------------------------
+if(IPPL_ENABLE_ALIGNED_COMMUNICATION_BUFFERS)
+ target_compile_definitions(ippl PUBLIC IPPL_ALIGNED_COMMS_BUFFERS)
+endif()
+
target_sources(ippl PRIVATE Communicator.cpp CommunicatorLogging.cpp Environment.cpp Buffers.cpp
Request.cpp LogEntry.cpp)
diff --git a/src/Communicate/Collectives.hpp b/src/Communicate/Collectives.hpp
index d273242ab..0ed2aa69b 100644
--- a/src/Communicate/Collectives.hpp
+++ b/src/Communicate/Collectives.hpp
@@ -2,62 +2,60 @@
#include "Communicate/Operations.h"
-namespace ippl {
- namespace mpi {
- template
- void Communicator::gather(const T* input, T* output, int count, int root) {
- MPI_Datatype type = get_mpi_datatype(*input);
+namespace ippl::mpi {
+ template
+ void Communicator::gather(const T* input, T* output, int count, int root) {
+ MPI_Datatype type = get_mpi_datatype(*input);
- MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m);
- }
+ MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m);
+ }
- template
- void Communicator::scatter(const T* input, T* output, int count, int root) {
- MPI_Datatype type = get_mpi_datatype(*input);
+ template
+ void Communicator::scatter(const T* input, T* output, int count, int root) {
+ MPI_Datatype type = get_mpi_datatype(*input);
- MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m);
- }
+ MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m);
+ }
- template
- void Communicator::reduce(const T* input, T* output, int count, Op, int root) {
- MPI_Datatype type = get_mpi_datatype(*input);
+ template
+ void Communicator::reduce(const T* input, T* output, int count, Op, int root) {
+ MPI_Datatype type = get_mpi_datatype(*input);
- MPI_Op mpiOp = get_mpi_op();
+ MPI_Op mpiOp = get_mpi_op();
- MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m);
- }
+ MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m);
+ }
- template
- void Communicator::reduce(const T& input, T& output, int count, Op op, int root) {
- reduce(&input, &output, count, op, root);
- }
+ template
+ void Communicator::reduce(const T& input, T& output, int count, Op op, int root) {
+ reduce(&input, &output, count, op, root);
+ }
- template
- void Communicator::allreduce(const T* input, T* output, int count, Op) {
- MPI_Datatype type = get_mpi_datatype(*input);
+ template
+ void Communicator::allreduce(const T* input, T* output, int count, Op) {
+ MPI_Datatype type = get_mpi_datatype(*input);
- MPI_Op mpiOp = get_mpi_op();
+ MPI_Op mpiOp = get_mpi_op();
- MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m);
- }
+ MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m);
+ }
- template
- void Communicator::allreduce(const T& input, T& output, int count, Op op) {
- allreduce(&input, &output, count, op);
- }
+ template
+ void Communicator::allreduce(const T& input, T& output, int count, Op op) {
+ allreduce(&input, &output, count, op);
+ }
- template
- void Communicator::allreduce(T* inout, int count, Op) {
- MPI_Datatype type = get_mpi_datatype(*inout);
+ template
+ void Communicator::allreduce(T* inout, int count, Op) {
+ MPI_Datatype type = get_mpi_datatype(*inout);
- MPI_Op mpiOp = get_mpi_op();
+ MPI_Op mpiOp = get_mpi_op();
- MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m);
- }
+ MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m);
+ }
- template
- void Communicator::allreduce(T& inout, int count, Op op) {
- allreduce(&inout, count, op);
- }
- } // namespace mpi
-} // namespace ippl
+ template
+ void Communicator::allreduce(T& inout, int count, Op op) {
+ allreduce(&inout, count, op);
+ }
+} // namespace ippl::mpi
diff --git a/src/Communicate/Communicator.cpp b/src/Communicate/Communicator.cpp
index 0ade37738..81376de62 100644
--- a/src/Communicate/Communicator.cpp
+++ b/src/Communicate/Communicator.cpp
@@ -1,42 +1,56 @@
#include "Communicate/Communicator.h"
-namespace ippl {
- namespace mpi {
-
- Communicator::Communicator()
- : comm_m(new MPI_Comm(MPI_COMM_WORLD)) {
- MPI_Comm_rank(*comm_m, &rank_m);
- MPI_Comm_size(*comm_m, &size_m);
- }
-
- Communicator::Communicator(MPI_Comm comm) {
- comm_m = std::make_shared(comm);
- MPI_Comm_rank(*comm_m, &rank_m);
- MPI_Comm_size(*comm_m, &size_m);
- }
-
- Communicator& Communicator::operator=(MPI_Comm comm) {
- comm_m = std::make_shared(comm);
- MPI_Comm_rank(*comm_m, &rank_m);
- MPI_Comm_size(*comm_m, &size_m);
- return *this;
- }
-
- Communicator Communicator::Communicator::split(int color, int key) const {
- MPI_Comm newcomm;
- MPI_Comm_split(*comm_m, color, key, &newcomm);
- return Communicator(newcomm);
- }
-
- void Communicator::probe(int source, int tag, Status& status) {
- MPI_Probe(source, tag, *comm_m, status);
- }
-
- bool Communicator::iprobe(int source, int tag, Status& status) {
- int flag = 0;
- MPI_Iprobe(source, tag, *comm_m, &flag, status);
- return (flag != 0);
+namespace ippl::mpi {
+
+ Communicator::Communicator()
+ : buffer_handlers_m(get_buffer_handler_instance())
+ , comm_m(new MPI_Comm(MPI_COMM_WORLD)) {
+ MPI_Comm_rank(*comm_m, &rank_m);
+ MPI_Comm_size(*comm_m, &size_m);
+ }
+
+ Communicator::Communicator(MPI_Comm comm) {
+ buffer_handlers_m = get_buffer_handler_instance();
+ comm_m = std::make_shared(comm);
+ MPI_Comm_rank(*comm_m, &rank_m);
+ MPI_Comm_size(*comm_m, &size_m);
+ }
+
+ Communicator& Communicator::operator=(MPI_Comm comm) {
+ buffer_handlers_m = get_buffer_handler_instance();
+ comm_m = std::make_shared(comm);
+ MPI_Comm_rank(*comm_m, &rank_m);
+ MPI_Comm_size(*comm_m, &size_m);
+ return *this;
+ }
+
+ Communicator Communicator::Communicator::split(int color, int key) const {
+ MPI_Comm newcomm;
+ MPI_Comm_split(*comm_m, color, key, &newcomm);
+ return Communicator(newcomm);
+ }
+
+ void Communicator::probe(int source, int tag, Status& status) {
+ MPI_Probe(source, tag, *comm_m, status);
+ }
+
+ bool Communicator::iprobe(int source, int tag, Status& status) {
+ int flag = 0;
+ MPI_Iprobe(source, tag, *comm_m, &flag, status);
+ return (flag != 0);
+ }
+
+ // ---------------------------------------
+ // singleton access to buffer manager
+ // ---------------------------------------
+ std::shared_ptr Communicator::get_buffer_handler_instance() {
+ static std::shared_ptr comm_buff_handler_ptr{nullptr};
+ if (comm_buff_handler_ptr == nullptr) {
+ comm_buff_handler_ptr = std::make_shared();
+ SPDLOG_DEBUG("BufferHandler new: {}",
+ ippl::debug::print_type());
}
- } // namespace mpi
-} // namespace ippl
+ return comm_buff_handler_ptr;
+ }
+} // namespace ippl::mpi
diff --git a/src/Communicate/Communicator.h b/src/Communicate/Communicator.h
index 63886368a..114b0b4a8 100644
--- a/src/Communicate/Communicator.h
+++ b/src/Communicate/Communicator.h
@@ -5,215 +5,218 @@
#ifndef IPPL_MPI_COMMUNICATOR_H
#define IPPL_MPI_COMMUNICATOR_H
-#include
-#include
-
-#include "Communicate/BufferHandler.h"
-#include "Communicate/LoggingBufferHandler.h"
-#include "Communicate/Request.h"
-#include "Communicate/Status.h"
-
-////////////////////////////////////////////////
-// For message size check; see below
#include
#include
+#include
+#include
#include "Utility/TypeUtils.h"
#include "Communicate/Archive.h"
+#include "Communicate/BufferHandler.h"
+#include "Communicate/LogEntry.h"
+#include "Communicate/Request.h"
+#include "Communicate/Status.h"
#include "Communicate/TagMaker.h"
#include "Communicate/Tags.h"
-////////////////////////////////////////////////////
-namespace ippl {
- namespace mpi {
-
- class Communicator : public TagMaker {
- public:
- Communicator();
+////////////////////////////////////////////////////
- Communicator(MPI_Comm comm);
+namespace ippl::mpi {
- Communicator& operator=(MPI_Comm comm);
+ class Communicator : public TagMaker {
+ public:
+ Communicator();
- ~Communicator() = default;
+ Communicator(MPI_Comm comm);
- Communicator split(int color, int key) const;
+ Communicator& operator=(MPI_Comm comm);
- operator const MPI_Comm&() const noexcept { return *comm_m; }
+ ~Communicator() = default;
- int size() const noexcept { return size_m; }
+ Communicator split(int color, int key) const;
- int rank() const noexcept { return rank_m; }
+ operator const MPI_Comm&() const noexcept { return *comm_m; }
- void barrier() { MPI_Barrier(*comm_m); }
+ int size() const noexcept { return size_m; }
- void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); }
+ int rank() const noexcept { return rank_m; }
- /*
- * Blocking point-to-point communication
- *
- */
+ void barrier() { MPI_Barrier(*comm_m); }
- template
- void send(const T& buffer, int count, int dest, int tag);
+ void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); }
- template
- void send(const T* buffer, int count, int dest, int tag);
+ /*
+ * Blocking point-to-point communication
+ *
+ */
- template
- void recv(T& output, int count, int source, int tag, Status& status);
+ template
+ void send(const T& buffer, int count, int dest, int tag);
- template
- void recv(T* output, int count, int source, int tag, Status& status);
+ template
+ void send(const T* buffer, int count, int dest, int tag);
- void probe(int source, int tag, Status& status);
+ template
+ void recv(T& output, int count, int source, int tag, Status& status);
- /*
- * Non-blocking point-to-point communication
- *
- */
+ template
+ void recv(T* output, int count, int source, int tag, Status& status);
- template
- void isend(const T& buffer, int count, int dest, int tag, Request& request);
+ void probe(int source, int tag, Status& status);
- template
- void isend(const T* buffer, int count, int dest, int tag, Request& request);
+ /*
+ * Non-blocking point-to-point communication
+ *
+ */
- template
- void irecv(T& buffer, int count, int source, int tag, Request& request);
+ template
+ void isend(const T& buffer, int count, int dest, int tag, Request& request);
- template
- void irecv(T* buffer, int count, int source, int tag, Request& request);
+ template
+ void isend(const T* buffer, int count, int dest, int tag, Request& request);
- bool iprobe(int source, int tag, Status& status);
+ template
+ void irecv(T& buffer, int count, int source, int tag, Request& request);
- /*
- * Collective communication
- */
+ template
+ void irecv(T* buffer, int count, int source, int tag, Request& request);
- /* Gather the data in the given source container from all other nodes to a
- * specific node (default: 0).
- */
- template
- void gather(const T* input, T* output, int count, int root = 0);
+ bool iprobe(int source, int tag, Status& status);
- /* Scatter the data from all other nodes to a
- * specific node (default: 0).
- */
- template
- void scatter(const T* input, T* output, int count, int root = 0);
+ /*
+ * Collective communication
+ */
- /* Reduce data coming from all nodes to a specific node
- * (default: 0). Apply certain operation
- *
- */
- template
- void reduce(const T* input, T* output, int count, Op op, int root = 0);
+ /* Gather the data in the given source container from all other nodes to a
+ * specific node (default: 0).
+ */
+ template
+ void gather(const T* input, T* output, int count, int root = 0);
- template
- void reduce(const T& input, T& output, int count, Op op, int root = 0);
+ /* Scatter the data from all other nodes to a
+ * specific node (default: 0).
+ */
+ template
+ void scatter(const T* input, T* output, int count, int root = 0);
- template
- void allreduce(const T* input, T* output, int count, Op op);
+ /* Reduce data coming from all nodes to a specific node
+ * (default: 0). Apply certain operation
+ *
+ */
+ template
+ void reduce(const T* input, T* output, int count, Op op, int root = 0);
- template
- void allreduce(const T& input, T& output, int count, Op op);
+ template
+ void reduce(const T& input, T& output, int count, Op op, int root = 0);
- template
- void allreduce(T* inout, int count, Op op);
+ template
+ void allreduce(const T* input, T* output, int count, Op op);
- template
- void allreduce(T& inout, int count, Op op);
+ template
+ void allreduce(const T& input, T& output, int count, Op op);
- /////////////////////////////////////////////////////////////////////////////////////
- template
- using archive_type = detail::Archive;
+ template
+ void allreduce(T* inout, int count, Op op);
- template
- using buffer_type = std::shared_ptr>;
+ template
+ void allreduce(T& inout, int count, Op op);
- private:
- template
- using buffer_container_type = LoggingBufferHandler;
+ private:
+ template
+ using buffer_container_type = comms::DefaultBufferHandler;
- using buffer_handler_type =
- typename detail::ContainerForAllSpaces::type;
+ using buffer_handler_type =
+ typename detail::ContainerForAllSpaces::type;
- public:
- using size_type = detail::size_type;
- double getDefaultOverallocation() const { return defaultOveralloc_m; }
- void setDefaultOverallocation(double factor);
+ public:
+ template
+ using buffer_type = buffer_container_type::buffer_type;
- template
- buffer_type getBuffer(size_type size, double overallocation = 1.0);
+ public:
+ using size_type = detail::size_type;
+ double getDefaultOverallocation() const { return defaultOveralloc_m; }
+ void setDefaultOverallocation(double factor);
- void deleteAllBuffers();
- void freeAllBuffers();
+ template
+ buffer_type getBuffer(size_type size, double overallocation = 1.0);
- template
- void freeBuffer(buffer_type buffer);
+ void deleteAllBuffers();
+ void freeAllBuffers();
- const MPI_Comm& getCommunicator() const noexcept { return *comm_m; }
+ template
+ void freeBuffer(buffer_type buffer);
- template
- void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize,
- size_type nrecvs) {
- // Temporary fix. MPI communication seems to have problems when the
- // count argument exceeds the range of int, so large messages should
- // be split into smaller messages
- if (msize > INT_MAX) {
- std::cerr << "Message size exceeds range of int" << std::endl;
- this->abort();
- }
- MPI_Status status;
- MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &status);
+ const MPI_Comm& getCommunicator() const noexcept { return *comm_m; }
- buffer.deserialize(ar, nrecvs);
+ template
+ void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize,
+ size_type nrecvs) {
+ // Temporary fix. MPI communication seems to have problems when the
+ // count argument exceeds the range of int, so large messages should
+ // be split into smaller messages
+ if (msize > INT_MAX) {
+ std::cerr << "Message size exceeds range of int" << std::endl;
+ this->abort();
}
-
- template
- void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request,
- size_type nsends) {
- if (ar.getSize() > INT_MAX) {
- std::cerr << "Message size exceeds range of int" << std::endl;
- this->abort();
- }
- buffer.serialize(ar, nsends);
- MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request);
+ MPI_Status status;
+ MPI_Recv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &status);
+ SPDLOG_DEBUG("Recv buf {}, size {:04}, src {:02}, tag {:04}", (void*)(ar.getData()),
+ msize, src, tag);
+ buffer.deserialize(ar, nrecvs);
+ }
+
+ template
+ void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request,
+ size_type nsends) //
+ {
+ if (ar.getSize() > INT_MAX) {
+ std::cerr << "Message size exceeds range of int" << std::endl;
+ this->abort();
}
-
- template
- void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) {
- if (msize > INT_MAX) {
- std::cerr << "Message size exceeds range of int" << std::endl;
- this->abort();
- }
- MPI_Irecv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &request);
+ buffer.serialize(ar, nsends);
+ MPI_Isend(ar.getData(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request);
+ SPDLOG_DEBUG("Isend buf {}, size {:04}, dst {:02}, tag {:04}, req {}",
+ (void*)(ar.getData()), ar.getSize(), dest, tag,
+ static_cast(request));
+ }
+
+ template
+ void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) {
+ if (msize > INT_MAX) {
+ std::cerr << "Message size exceeds range of int" << std::endl;
+ this->abort();
}
- void printLogs(const std::string& filename);
+ MPI_Irecv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &request);
+ SPDLOG_DEBUG("Irecv buf {}, size {:04}, src {:02}, tag {:04}, req {}",
+ (void*)(ar.getData()), msize, src, tag, static_cast(request));
+ }
+
+ void printLogs(const std::string& filename);
+
+ private:
+ std::vector gatherLocalLogs();
+ void sendLogsToRank0(const std::vector& localLogs);
+ std::vector gatherLogsFromAllRanks(const std::vector& localLogs);
+ void writeLogsToFile(const std::vector& allLogs, const std::string& filename);
- private:
- std::vector gatherLocalLogs();
- void sendLogsToRank0(const std::vector& localLogs);
- std::vector