diff --git a/CMakeLists.txt b/CMakeLists.txt index 737c30386..164b7d18d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,8 @@ option(IPPL_ENABLE_TESTS "Build integration tests in test/ directory" OFF) option(IPPL_ENABLE_COVERAGE "Enable code coverage" OFF) option(IPPL_ENABLE_NSYS_PROFILER "Enable Nvidia Nsys Profiler" OFF) option(IPPL_ENABLE_SANITIZER "Enable sanitizer(s)" OFF) +option(IPPL_ENABLE_ALIGNED_COMMUNICATION_BUFFERS "Enable aligned memory buffersvfor communication" + OFF) option(IPPL_USE_ALTERNATIVE_VARIANT "Use modified variant implementation (required for CUDA 12.2 + GCC 12.3.0)" OFF) option(IPPL_USE_STANDARD_FOLDERS "Put all generated binaries in bin/lib folders" OFF) @@ -51,6 +53,19 @@ option(IPPL_MARK_FAILING_TESTS "Prefix names of tests that are known to fail with 'known_fail' for filtering with ctest" OFF) option(IPPL_ENABLE_SCRIPTS "Generate job script templates for some benchmarks/tests" OFF) +option(IPPL_GPU_AWARE_MPI "Allow MPI to/from from device memory buffers" OFF) +# logging options +set(IPPL_LOG_LEVEL "off" CACHE STRING "Enable logging for messages of >=level") +set_property( + CACHE IPPL_LOG_LEVEL + PROPERTY STRINGS + off + trace + debug + info + warn + error + critical) # "Build IPPL as a shared library (ON) or static library (OFF)" OFF) if(IPPL_DYL) # set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) message(WARNING "IPPL_DYL is deprecated; use @@ -99,21 +114,6 @@ if(DEFINED USE_ALTERNATIVE_VARIANT) set(IPPL_USE_ALTERNATIVE_VARIANT ${USE_ALTERNATIVE_VARIANT} CACHE BOOL "" FORCE) endif() -# ------------------------------------------------------------------------------ -# Debug: This tells the compiler to replace occurrences of ${} with in debug info and error -# messages. -# ------------------------------------------------------------------------------ -add_compile_options( - $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.> - $<$:-ffile-prefix-map=${CMAKE_SOURCE_DIR}=.>) - -if(DEFINED FETCHCONTENT_BASE_DIR) - add_compile_options( - $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p> - $<$:-ffile-prefix-map=${FETCHCONTENT_BASE_DIR}=.3p> - ) -endif() - # ------------------------------------------------------------------------------ # Define sources for project # ------------------------------------------------------------------------------ diff --git a/alpine/CMakeLists.txt b/alpine/CMakeLists.txt index 4db043d4f..ff9856ba3 100644 --- a/alpine/CMakeLists.txt +++ b/alpine/CMakeLists.txt @@ -16,9 +16,13 @@ endfunction() if(IPPL_ENABLE_TESTS) # cmake-format: off + # Landau will write a CSV file to the data directory that we will validate later + make_directory("${PROJECT_BINARY_DIR}/alpine/data") + # Add the test add_ippl_integration_test(LandauDamping - ARGS 16 16 16 10000000 10 FFT 0.01 LeapFrog --overallocate 2.0 --info 10 - LABELS alpine integration) + ARGS "16" "16" "16" "10000000" "25" "FFT" "0.01" "LeapFrog" "--overallocate" "2.0" "--info" "10" + LABELS alpine integration + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/alpine/") # cmake-format: on else() add_alpine_example(LandauDamping) @@ -26,3 +30,5 @@ endif() add_alpine_example(PenningTrap) add_alpine_example(BumponTailInstability) + +add_subdirectory(validation) diff --git a/alpine/LandauDamping.cpp b/alpine/LandauDamping.cpp index 718672513..dc8cadfed 100644 --- a/alpine/LandauDamping.cpp +++ b/alpine/LandauDamping.cpp @@ -37,17 +37,22 @@ const char* TestName = "LandauDamping"; #include "Manager/datatypes.h" #include "Utility/IpplTimings.h" +#include "Utility/Logging.h" #include "LandauDampingManager.h" #include "Manager/PicManager.h" int main(int argc, char* argv[]) { +#if defined(SPDLOG_ACTIVE_LEVEL) && (SPDLOG_ACTIVE_LEVEL != SPDLOG_LEVEL_OFF) + spdlog::set_pattern("[%^%-8l%$]%t| %v"); + spdlog::set_level(spdlog::level::trace); +#endif ippl::initialize(argc, argv); { Inform msg(TestName); Inform msg2all(TestName, INFORM_ALL_NODES); - static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); + static IpplTimings::TimerRef mainTimer = IpplTimings::getTimer("total"); static IpplTimings::TimerRef initializeTimer = IpplTimings::getTimer("initialize"); IpplTimings::startTimer(mainTimer); IpplTimings::startTimer(initializeTimer); @@ -82,7 +87,7 @@ int main(int argc, char* argv[]) { manager.pre_run(); IpplTimings::stopTimer(initializeTimer); - + manager.setTime(0.0); msg << "Starting iterations ..." << endl; diff --git a/alpine/validation/CMakeLists.txt b/alpine/validation/CMakeLists.txt new file mode 100644 index 000000000..5236a9ba0 --- /dev/null +++ b/alpine/validation/CMakeLists.txt @@ -0,0 +1,23 @@ +# ----------------------------------------------------------------------------- +# validation check for LandauDamping test +# ----------------------------------------------------------------------------- + +if(BUILD_TESTING) + # Build the C++ correctness validation test + add_executable(LandauDampingCorrectness LandauDampingCorrectness.cpp) + + message("Adding test: LandauDampingCorrectness") + + # command line params are : output, reference, tolerance + add_test( + NAME LandauDampingCorrectnessValidation + COMMAND LandauDampingCorrectness "${PROJECT_BINARY_DIR}/alpine/data/FieldLandau_2_manager.csv" + "${PROJECT_SOURCE_DIR}/alpine/validation/FieldLandau_valid_result.csv" "4E-1" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/alpine/") + + set_tests_properties( + LandauDampingCorrectnessValidation + PROPERTIES LABELS "validation;alpine" TIMEOUT 30 + # This test should only run if the LandauDamping integration test passed + DEPENDS LandauDamping) +endif() diff --git a/alpine/validation/FieldLandau_valid_result.csv b/alpine/validation/FieldLandau_valid_result.csv new file mode 100644 index 000000000..9eb047189 --- /dev/null +++ b/alpine/validation/FieldLandau_valid_result.csv @@ -0,0 +1,27 @@ +time, Ex_field_energy, Ex_max_norm +0.0000000000000000e+00 9.6742715140721582e+00 1.0825163128256524e-01 +5.0000000000000003e-02 9.6368948973459112e+00 1.0808994906148438e-01 +1.0000000000000001e-01 9.5415075224669970e+00 1.0740779743767520e-01 +1.5000000000000002e-01 9.3882773328014082e+00 1.0630178072727425e-01 +2.0000000000000001e-01 9.1791183371635334e+00 1.0482602738140372e-01 +2.5000000000000000e-01 8.9172134334449193e+00 1.0306560288960222e-01 +2.9999999999999999e-01 8.6061662103776140e+00 1.0113820775231776e-01 +3.4999999999999998e-01 8.2505504653792148e+00 9.8691447720593309e-02 +3.9999999999999997e-01 7.8550417931569703e+00 9.5598706257118735e-02 +4.4999999999999996e-01 7.4249700904468465e+00 9.2872937917417214e-02 +4.9999999999999994e-01 6.9653510910124385e+00 9.0033318617501135e-02 +5.4999999999999993e-01 6.4822476651919327e+00 8.7257335757243495e-02 +5.9999999999999998e-01 5.9816257151911403e+00 8.4644036660629890e-02 +6.5000000000000002e-01 5.4696405485153790e+00 8.2154906259950061e-02 +7.0000000000000007e-01 4.9535573676676501e+00 7.9019365369956629e-02 +7.5000000000000011e-01 4.4392121571874581e+00 7.5190519676643197e-02 +8.0000000000000016e-01 3.9321278953403862e+00 7.1054552970285123e-02 +8.5000000000000020e-01 3.4384135389096344e+00 6.6356647790906387e-02 +9.0000000000000024e-01 2.9638108812800437e+00 6.1694793644303261e-02 +9.5000000000000029e-01 2.5131142881540285e+00 5.7498634682739851e-02 +1.0000000000000002e+00 2.0909400590908556e+00 5.3113907328247389e-02 +1.0500000000000003e+00 1.7017166009804880e+00 4.8965793799131234e-02 +1.1000000000000003e+00 1.3486230947669464e+00 4.4410412852319857e-02 +1.1500000000000004e+00 1.0343505136217537e+00 3.9926242617971765e-02 +1.2000000000000004e+00 7.6105675926529781e-01 3.5819185614705762e-02 +1.2500000000000004e+00 5.3020078549605387e-01 3.1521337179656390e-02 diff --git a/alpine/validation/LandauDampingCorrectness.cpp b/alpine/validation/LandauDampingCorrectness.cpp new file mode 100644 index 000000000..065c6c477 --- /dev/null +++ b/alpine/validation/LandauDampingCorrectness.cpp @@ -0,0 +1,333 @@ +// LandauDamping Correctness Validation Test +// +// This test compares the CSV output generated by the LandauDamping simulation +// with a reference (valid) result file. All numerical values must be within +// a tolerance of 1E-7 for the test to pass. +// +// Usage: +// ./LandauDampingCorrectness [output_csv] [reference_csv] [tolerance] +// +// Where: +// output_csv : Path to the generated FieldLandau_*.csv file +// (default: data/FieldLandau_*_manager.csv - finds first match) +// reference_csv : Path to the reference file +// (default: ../../alpine/validation/FieldLandau_valid_result.csv) +// tolerance : Absolute tolerance for comparison (default: 1e-7) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +// Structure to hold a row of CSV data +struct CSVRow { + double time; + double Ex_field_energy; + double Ex_max_norm; + + CSVRow() + : time(0.0) + , Ex_field_energy(0.0) + , Ex_max_norm(0.0) {} + + CSVRow(double t, double e, double n) + : time(t) + , Ex_field_energy(e) + , Ex_max_norm(n) {} +}; + +// Trim whitespace from string +std::string trim(const std::string& str) { + size_t first = str.find_first_not_of(" \t\r\n"); + if (first == std::string::npos) + return ""; + size_t last = str.find_last_not_of(" \t\r\n"); + return str.substr(first, (last - first + 1)); +} + +// Find first CSV file matching pattern in directory +std::string findCSVFile(const std::string& pattern) { + // If pattern is a file that exists, return it + if (fs::exists(pattern) && fs::is_regular_file(pattern)) { + return pattern; + } + + // Try to find a matching file in data/ directory + std::string dataDir = "data"; + if (fs::exists(dataDir) && fs::is_directory(dataDir)) { + for (const auto& entry : fs::directory_iterator(dataDir)) { + if (entry.is_regular_file() && entry.path().extension() == ".csv") { + std::string filename = entry.path().filename().string(); + if (filename.find("FieldLandau_") == 0 + && filename.find("_manager.csv") != std::string::npos) { + return entry.path().string(); + } + } + } + } + + return pattern; // Return original pattern if not found +} + +// Load CSV file into vector of CSVRow +bool loadCSV(const std::string& filepath, std::vector& data, bool& hasMaxNorm, + std::string& errorMsg) { + std::ifstream file(filepath); + if (!file.is_open()) { + errorMsg = "Could not open file: " + filepath; + return false; + } + + std::string line; + bool isFirstLine = true; + int lineNum = 0; + hasMaxNorm = false; + + while (std::getline(file, line)) { + lineNum++; + line = trim(line); + + // Skip empty lines + if (line.empty()) + continue; + + // Check header line + if (isFirstLine) { + isFirstLine = false; + // Check if we have Ex_max_norm column + hasMaxNorm = (line.find("Ex_max_norm") != std::string::npos); + continue; + } + + // Parse data line + std::istringstream iss(line); + double time, energy, maxNorm = 0.0; + + if (!(iss >> time >> energy)) { + errorMsg = "Failed to parse line " + std::to_string(lineNum) + ": " + line; + return false; + } + + // Try to read third column if present + if (hasMaxNorm) { + if (!(iss >> maxNorm)) { + // If header says we have max_norm but can't read it, it's an error + errorMsg = "Expected Ex_max_norm column but failed to parse at line " + + std::to_string(lineNum); + return false; + } + } + + data.emplace_back(time, energy, maxNorm); + } + + file.close(); + + if (data.empty()) { + errorMsg = "No data rows found in file: " + filepath; + return false; + } + + return true; +} + +// Compare two CSV datasets +bool compareCSVData(const std::vector& output, const std::vector& reference, + bool outputHasMaxNorm, bool refHasMaxNorm, double tolerance, + std::vector& errors) { + // Check row count + if (output.size() != reference.size()) { + std::ostringstream oss; + oss << "Row count mismatch: output has " << output.size() << " rows, reference has " + << reference.size() << " rows"; + errors.push_back(oss.str()); + return false; + } + + // Compare each row + std::vector timeErrors, energyErrors, normErrors; + double maxTimeDiff = 0.0, maxEnergyDiff = 0.0, maxNormDiff = 0.0; + + for (size_t i = 0; i < output.size(); ++i) { + // Compare time + double timeDiff = std::abs(output[i].time - reference[i].time); + if (timeDiff > tolerance) { + timeErrors.push_back(static_cast(i)); + maxTimeDiff = std::max(maxTimeDiff, timeDiff); + } + + // Compare Ex_field_energy + double energyDiff = std::abs(output[i].Ex_field_energy - reference[i].Ex_field_energy); + if (energyDiff > tolerance) { + energyErrors.push_back(static_cast(i)); + maxEnergyDiff = std::max(maxEnergyDiff, energyDiff); + } + + // Compare Ex_max_norm if both have it + if (outputHasMaxNorm && refHasMaxNorm) { + double normDiff = std::abs(output[i].Ex_max_norm - reference[i].Ex_max_norm); + if (normDiff > tolerance) { + normErrors.push_back(static_cast(i)); + maxNormDiff = std::max(maxNormDiff, normDiff); + } + } + } + + // Report errors + bool hasErrors = false; + + if (!timeErrors.empty()) { + hasErrors = true; + std::ostringstream oss; + oss << "Column 'time': " << timeErrors.size() << " value(s) outside tolerance " + << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxTimeDiff + << ") at rows ["; + for (size_t i = 0; i < std::min(timeErrors.size(), size_t(10)); ++i) { + if (i > 0) + oss << ", "; + oss << timeErrors[i]; + } + if (timeErrors.size() > 10) + oss << ", ..."; + oss << "]"; + errors.push_back(oss.str()); + } + + if (!energyErrors.empty()) { + hasErrors = true; + std::ostringstream oss; + oss << "Column 'Ex_field_energy': " << energyErrors.size() << " value(s) outside tolerance " + << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxEnergyDiff + << ") at rows ["; + for (size_t i = 0; i < std::min(energyErrors.size(), size_t(10)); ++i) { + if (i > 0) + oss << ", "; + oss << energyErrors[i]; + } + if (energyErrors.size() > 10) + oss << ", ..."; + oss << "]"; + errors.push_back(oss.str()); + } + + if (!normErrors.empty()) { + hasErrors = true; + std::ostringstream oss; + oss << "Column 'Ex_max_norm': " << normErrors.size() << " value(s) outside tolerance " + << "(tolerance=" << std::scientific << tolerance << ", max_diff=" << maxNormDiff + << ") at rows ["; + for (size_t i = 0; i < std::min(normErrors.size(), size_t(10)); ++i) { + if (i > 0) + oss << ", "; + oss << normErrors[i]; + } + if (normErrors.size() > 10) + oss << ", ..."; + oss << "]"; + errors.push_back(oss.str()); + } + + return !hasErrors; +} + +int main(int argc, char* argv[]) { + // Default values + std::string outputCSV = "data/FieldLandau_*_manager.csv"; + std::string referenceCSV = "../../alpine/validation/FieldLandau_valid_result.csv"; + double tolerance = 1e-7; + + // Parse command line arguments + if (argc > 1) { + if (std::string(argv[1]) == "--help" || std::string(argv[1]) == "-h") { + std::cout << "LandauDamping Correctness Validation Test\n\n" + << "Usage: " << argv[0] << " [output_csv] [reference_csv] [tolerance]\n\n" + << "Arguments:\n" + << " output_csv : Path to generated CSV file (default: " + "data/FieldLandau_*_manager.csv)\n" + << " reference_csv : Path to reference CSV file (default: " + "../../alpine/validation/FieldLandau_valid_result.csv)\n" + << " tolerance : Absolute tolerance for comparison (default: 1e-7)\n\n" + << "Exit codes:\n" + << " 0 : PASS - Output matches reference within tolerance\n" + << " 1 : FAIL - Output does not match reference\n" + << " 2 : ERROR - File I/O or parsing error\n\n" + << "Examples:\n" + << " " << argv[0] << "\n" + << " " << argv[0] << " data/FieldLandau_2_manager.csv\n" + << " " << argv[0] << " output.csv reference.csv 1e-6\n"; + return 0; + } + outputCSV = argv[1]; + } + if (argc > 2) { + referenceCSV = argv[2]; + } + if (argc > 3) { + tolerance = std::atof(argv[3]); + } + + // Find output file if pattern given + outputCSV = findCSVFile(outputCSV); + + std::cout << "LandauDamping Correctness Validation\n"; + std::cout << "=====================================\n"; + std::cout << "Output file: " << outputCSV << "\n"; + std::cout << "Reference file: " << referenceCSV << "\n"; + std::cout << "Tolerance: " << std::scientific << tolerance << "\n"; + std::cout << std::endl; + + // Load output CSV + std::vector outputData; + bool outputHasMaxNorm = false; + std::string errorMsg; + + if (!loadCSV(outputCSV, outputData, outputHasMaxNorm, errorMsg)) { + std::cerr << "✗ ERROR: Failed to load output CSV\n"; + std::cerr << " " << errorMsg << std::endl; + return 2; + } + + std::cout << "Loaded output: " << outputData.size() << " rows"; + if (outputHasMaxNorm) + std::cout << " (with Ex_max_norm column)"; + std::cout << std::endl; + + // Load reference CSV + std::vector referenceData; + bool refHasMaxNorm = false; + + if (!loadCSV(referenceCSV, referenceData, refHasMaxNorm, errorMsg)) { + std::cerr << "✗ ERROR: Failed to load reference CSV\n"; + std::cerr << " " << errorMsg << std::endl; + return 2; + } + + std::cout << "Loaded reference: " << referenceData.size() << " rows"; + if (refHasMaxNorm) + std::cout << " (with Ex_max_norm column)"; + std::cout << std::endl; + std::cout << std::endl; + + // Compare data + std::vector errors; + bool passed = compareCSVData(outputData, referenceData, outputHasMaxNorm, refHasMaxNorm, + tolerance, errors); + + if (passed) { + std::cout << "✓ PASS: Output matches reference within tolerance" << std::endl; + return 0; + } else { + std::cout << "✗ FAIL: Output does not match reference" << std::endl; + for (const auto& error : errors) { + std::cout << " - " << error << std::endl; + } + return 1; + } +} diff --git a/ci/cscs/common.yml b/ci/cscs/common.yml index 8439b1afd..aab7f3bef 100644 --- a/ci/cscs/common.yml +++ b/ci/cscs/common.yml @@ -6,7 +6,9 @@ include: - echo "CI_PROJECT_URL=$CI_PROJECT_URL" - echo "CI_COMMIT_SHORT_SHA=$CI_COMMIT_SHORT_SHA" - | - if [[ "$CI_COMMIT_REF_NAME" =~ pr([0-9]+)$ ]]; then + if [[ "$CI_COMMIT_REF_NAME" =~ gh-readonly-queue.*-pr-([0-9]+)- ]]; then + export CDASH_LABEL="Merge-Queue-PR-${BASH_REMATCH[1]}" + elif [[ "$CI_COMMIT_REF_NAME" =~ pr([0-9]+)$ ]]; then export CDASH_LABEL="PR-${BASH_REMATCH[1]}" else export CDASH_LABEL="$CI_COMMIT_REF_SLUG" diff --git a/ci/cscs/cuda/build_sm90.yml b/ci/cscs/cuda/build_sm90.yml index b54ffdbb4..6175f92c5 100644 --- a/ci/cscs/cuda/build_sm90.yml +++ b/ci/cscs/cuda/build_sm90.yml @@ -27,17 +27,18 @@ variables: - env | sort | grep CI script: - >- - ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake + ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake -DCTEST_SITE="$CTEST_SITE" - -DPRESET=alps-gh200 - -DCDASH_LABEL=$CDASH_LABEL + -DPRESET=alps-gh200 + -DCDASH_LABEL=$CDASH_LABEL -DBUILD_TYPE=$BUILD_TYPE -DBUILD_DIR=$BUILD_PATH - -DBUILD_ARCH=$BUILD_ARCH - -DKokkos_VERSION=git.4.7.02 - -DKokkos_ARCH_FLAG=Kokkos_ARCH_HOPPER90 - -DMPIEXEC_EXECUTABLE=/usr/bin/srun - -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" + -DBUILD_ARCH=$BUILD_ARCH + -DHeffte_VERSION=git.v2.4.1 + -DKokkos_VERSION=git.4.7.02 + -DKokkos_ARCH_FLAG=Kokkos_ARCH_HOPPER90 + -DMPIEXEC_EXECUTABLE=/usr/bin/srun + -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" -DMPIEXEC_MAX_NUMPROCS=4 - echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1) - find $BUILD_PATH -name \*.o -delete diff --git a/ci/cscs/dashboard-configure-build.cmake b/ci/cscs/dashboard-configure-build.cmake index b12f67b12..ed460f165 100644 --- a/ci/cscs/dashboard-configure-build.cmake +++ b/ci/cscs/dashboard-configure-build.cmake @@ -50,7 +50,12 @@ set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_EXECUTABLE=${M set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_PREFLAGS=${MPIEXEC_PREFLAGS}") set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DMPIEXEC_MAX_NUMPROCS=${MPIEXEC_MAX_NUMPROCS}") -set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DKokkos_VERSION=${Kokkos_VERSION}") +if(DEFINED Heffte_VERSION) + set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DHeffte_VERSION=${Heffte_VERSION}") +endif() +if(DEFINED Kokkos_VERSION) + set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -DKokkos_VERSION=${Kokkos_VERSION}") +endif() set(CTEST_CONFIGURE_COMMAND "${CTEST_CONFIGURE_COMMAND} -D${Kokkos_ARCH_FLAG}=ON") # --- configure & build --- diff --git a/ci/cscs/openmp/build_openmp.yml b/ci/cscs/openmp/build_openmp.yml index ff4896956..4387273f2 100644 --- a/ci/cscs/openmp/build_openmp.yml +++ b/ci/cscs/openmp/build_openmp.yml @@ -28,19 +28,19 @@ variables: - export CXX=g++ script: - >- - ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake + ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake -DCTEST_SITE="$CTEST_SITE" - -DPRESET=release-testing - -DCDASH_LABEL=$CDASH_LABEL - -DBUILD_TYPE=$BUILD_TYPE + -DPRESET=release-testing + -DCDASH_LABEL=$CDASH_LABEL + -DBUILD_TYPE=$BUILD_TYPE -DBUILD_DIR=$BUILD_PATH - -DBUILD_ARCH=$BUILD_ARCH + -DBUILD_ARCH=$BUILD_ARCH -DIPPL_PLATFORMS=OPENMP -DIPPL_OPENMP_THREADS=32 - -DKokkos_VERSION=git.4.7.02 - -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU - -DMPIEXEC_EXECUTABLE=/usr/bin/srun - -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" + -DKokkos_VERSION=git.4.7.02 + -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU + -DMPIEXEC_EXECUTABLE=/usr/bin/srun + -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" -DMPIEXEC_MAX_NUMPROCS=4 - echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1) - find $BUILD_PATH -name \*.o -delete diff --git a/ci/cscs/rocm/build_rocm-6.3.yml b/ci/cscs/rocm/build_rocm-6.3.yml index d09a7de6f..4735f8fa8 100644 --- a/ci/cscs/rocm/build_rocm-6.3.yml +++ b/ci/cscs/rocm/build_rocm-6.3.yml @@ -32,17 +32,17 @@ variables: - export CMAKE_PREFIX_PATH=$HSA_PATH:$CMAKE_PREFIX_PATH script: - >- - ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake + ctest -V -S $CI_PROJECT_DIR/ci/cscs/dashboard-configure-build.cmake -DCTEST_SITE="$CTEST_SITE" - -DPRESET=alps-mi300 - -DCDASH_LABEL=$CDASH_LABEL + -DPRESET=alps-mi300 + -DCDASH_LABEL=$CDASH_LABEL -DBUILD_TYPE=$BUILD_TYPE -DBUILD_DIR=$BUILD_PATH - -DBUILD_ARCH=$BUILD_ARCH - -DKokkos_VERSION=git.4.7.02 - -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU - -DMPIEXEC_EXECUTABLE=/usr/bin/srun - -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" + -DBUILD_ARCH=$BUILD_ARCH + -DKokkos_VERSION=git.4.7.02 + -DKokkos_ARCH_FLAG=Kokkos_ARCH_AMD_GFX942_APU + -DMPIEXEC_EXECUTABLE=/usr/bin/srun + -DMPIEXEC_PREFLAGS="$SRUN_FLAGS" -DMPIEXEC_MAX_NUMPROCS=4 # -DHeffte_ENABLE_GPU_AWARE_MPI=OFF - echo "Build directory size (before cleanup):" $(du -sh $BUILD_PATH | cut -f1) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 70bb39314..704d87a8b 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -44,6 +44,23 @@ if("OPENMP" IN_LIST IPPL_PLATFORMS) colour_message(STATUS ${Green} "✅ OpenMP platform requested OpenMP found ${OPENMP_VERSION}") endif() +# ------------------------------------------------------------------------------ +# spdlog logging library +# ------------------------------------------------------------------------------ +string(TOUPPER ${IPPL_LOG_LEVEL} IPPL_LOG_LEVEL_UPPERCASE) +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + find_package(spdlog REQUIRED) + colour_message(STATUS ${Green} "✅ spdlog found ${spdlog_VERSION}") +endif() + +# ------------------------------------------------------------------------------ +# fmt library (for formatting nice log messages) +# ------------------------------------------------------------------------------ +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + find_package(fmt REQUIRED) + colour_message(STATUS ${Green} "✅ fmt found ${fmt_VERSION}") +endif() + # ------------------------------------------------------------------------------ # Utility function to clear a list of vars one by one # ------------------------------------------------------------------------------ diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 4731c2f76..cad844a9d 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -9,6 +9,20 @@ message(STATUS "IPPL_MACHINENAME for script generation is: ${IPPL_MACHINENAME}") # ------------------------------------------------------------------------------ set(IPPL_JOB_SUBMISSION_ACCOUNT "c41" CACHE STRING "Account to use for job submission templates") +# populate IPPL_SCRIPTS_UENV from environment variable UENV_MOUNT_LIST (if present) +if(DEFINED ENV{UENV_MOUNT_LIST}) + set(IPPL_SCRIPTS_UENV "$ENV{UENV_MOUNT_LIST}" + CACHE STRING "UENV to use on alps when running scripts mount") + set(IPPL_SCRIPTS_UENV_VIEW "default" CACHE STRING "View to set in uenv") + colour_message(STATUS ${LightBlue} + "IPPL_SCRIPTS_UENV set from UENV_MOUNT_LIST: ${IPPL_SCRIPTS_UENV}") +else() + # Remove any cached and normal definitions so the variable is completely unset + unset(IPPL_SCRIPTS_UENV CACHE) + unset(IPPL_SCRIPTS_UENV_VIEW CACHE) + colour_message(STATUS ${LightBlue} "No UENV detected") +endif() + # ------------------------------------------------------------------------------ # utility function to get target path/name since we can't use generator expressions to set variables # directly diff --git a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm index 4c40a5bc2..338c7f682 100644 --- a/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm +++ b/scripts/landau/strong-scaling-alps/jobscript-gh200.slurm @@ -12,8 +12,8 @@ #SBATCH --cpus-per-task=72 #SBATCH --exclusive -#SBATCH --uenv=/capstor/store/cscs/cscs/public/uenvs/opal-x-gh200-mpich-gcc-2025-09-28.squashfs -#SBATCH --view=develop +#SBATCH --uenv=@IPPL_SCRIPTS_UENV@ +#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@ #SBATCH --output=landau__n_.out #SBATCH --error=landau__n_.error @@ -41,10 +41,9 @@ export JOB_DIR=job_dir export EXE=@LANDAU_BINARY@ export WRAPPER=@PROJECT_BINARY_DIR@/@JOB_SCRIPTS_PATH@/@JOB_WRAPPER_SCRIPT@ -mkdir -p data export OMP_PROC_BIND=spread export OMP_PLACES=threads -export OMP_NUM_THREADS=72 +export OMP_NUM_THREADS=2 # -------------------- # cray-mpich debug putput @@ -62,4 +61,14 @@ echo "N Particles: $nparticles" echo "Iterations: $iterations" # -------------------- -srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10 +mkdir -p ipc1 +pushd ipc1 +mkdir -p data +IPC_ON=1 srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10 +popd + +mkdir -p ipc0 +pushd ipc0 +mkdir -p data +IPC_ON=0 srun $WRAPPER $EXE $cubesize $cubesize $cubesize $nparticles $iterations FFT 1.0 LeapFrog --overallocate 2.0 --info 10 +popd diff --git a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm index de29cc9dc..1bd459192 100644 --- a/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm +++ b/scripts/landau/strong-scaling-alps/jobscript-mi300.slurm @@ -13,8 +13,8 @@ #SBATCH --cpus-per-task=48 #SBATCH --exclusive -#SBATCH --uenv=/capstor/scratch/cscs/biddisco/opal-x-mi300-mpich-gcc-2025-10-27.squashfs -#SBATCH --view=default +#SBATCH --uenv=@IPPL_SCRIPTS_UENV@ +#SBATCH --view=@IPPL_SCRIPTS_UENV_VIEW@ #SBATCH --output=landau__n_.out #SBATCH --error=landau__n_.error diff --git a/scripts/landau/strong-scaling-alps/wrapper-gh200.sh b/scripts/landau/strong-scaling-alps/wrapper-gh200.sh index b8a02caab..6522117af 100755 --- a/scripts/landau/strong-scaling-alps/wrapper-gh200.sh +++ b/scripts/landau/strong-scaling-alps/wrapper-gh200.sh @@ -88,9 +88,15 @@ export CUDA_VISIBLE_DEVICES=$gpu # --------------- # cray-mpich : see https://cpe.ext.hpe.com/docs/24.03/mpt/mpich/intro_mpi.html#general-mpich-environment-variables # --------------- -export MPICH_GPU_SUPPORT_ENABLED=1 +# profiling and debugging options #export MPICH_OFI_CXI_COUNTER_REPORT=1 -export MPICH_GPU_IPC_ENABLED=0 + +# gpu support +export MPICH_GPU_IPC_ENABLED=1 +export MPICH_GPU_IPC_CACHE_MAX_SIZE=256 +if [ "$IPC_ON" -eq "0" ]; then + export MPICH_GPU_IPC_ENABLED=0 +fi # --------------- # OpenMPI mappings for MCA variables diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c4778b174..51cfb879b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,14 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/IpplVersions.h.in add_library(ippl) +if(NOT "${IPPL_LOG_LEVEL_UPPERCASE}" MATCHES "OFF") + target_compile_definitions(ippl PUBLIC IPPL_LOGGING_ENABLED=1) + target_compile_definitions(ippl + PUBLIC SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${IPPL_LOG_LEVEL_UPPERCASE}) + target_link_libraries(ippl PUBLIC spdlog::spdlog $<$:ws2_32>) + target_link_libraries(ippl PUBLIC fmt::fmt) +endif() + target_compile_features(ippl PUBLIC cxx_std_20) target_compile_options( diff --git a/src/Communicate/AlignedBuffer.h b/src/Communicate/AlignedBuffer.h new file mode 100644 index 000000000..95e0cb74c --- /dev/null +++ b/src/Communicate/AlignedBuffer.h @@ -0,0 +1,168 @@ +#ifndef IPPL_ALIGNED_BUFFER_H +#define IPPL_ALIGNED_BUFFER_H + +#include "Types/ViewTypes.h" + +#include "Utility/Logging.h" +#include "Utility/TypeUtils.h" +// +#include "Communicate/Archive.h" + +namespace ippl::comms { + +#define DEFAULT_BUFFER_ALIGNMENT 1024 + // Here's a simple class that provides an aligned buffer, by default on the host + // but we can specialize the constructor/destructor for other memory spaces + template + struct AlignedBuffer { + using memory_space = MemorySpace; + void* ptrOriginal{nullptr}; + void* ptrAligned{nullptr}; + detail::size_type space{0}; + // + AlignedBuffer() {} + // + AlignedBuffer& operator=(AlignedBuffer&& other) { + ptrOriginal = other.ptrOriginal; + ptrAligned = other.ptrAligned; + space = other.space; + other.ptrOriginal = nullptr; + other.ptrAligned = nullptr; + other.space = 0; + return *this; + } + // + AlignedBuffer(std::size_t size) { + ptrOriginal = std::aligned_alloc(DEFAULT_BUFFER_ALIGNMENT, size); + ptrAligned = ptrOriginal; + space = size; + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + ~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying host buffer {}", ptrOriginal); + std::free(ptrOriginal); + } + } + }; + + // --------------------------------------------------------------------- +#if defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_CUDA) + // make number a multiple of the alignment + inline std::int64_t to_multiple(std::int64_t num) { + return ((2 * num + (DEFAULT_BUFFER_ALIGNMENT - 1)) & (-DEFAULT_BUFFER_ALIGNMENT)); + } +#endif + + // --------------------------------------------------------------------- +#ifdef KOKKOS_ENABLE_CUDA + // Specialize buffer allocation/free for cuda + template <> + inline AlignedBuffer::AlignedBuffer(std::size_t size) { + void* original; + space = to_multiple(size); + cudaMalloc(&original, space); + if (!original) { + throw std::runtime_error("Error allocating cuda memory in AlignedBuffer"); + } + ptrOriginal = original; + ptrAligned = std::align(DEFAULT_BUFFER_ALIGNMENT, size, original, space); + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + template <> + inline AlignedBuffer::~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying cuda buffer {}", ptrOriginal); + cudaFree(ptrOriginal); + } + } +#endif + + // --------------------------------------------------------------------- +#ifdef KOKKOS_ENABLE_HIP +#define HIP_CHECK(expression) \ + { \ + const hipError_t status = expression; \ + if (status != hipSuccess) { \ + std::cerr << "HIP error " << status << ": " << hipGetErrorString(status) << " at " \ + << __FILE__ << ":" << __LINE__ << std::endl; \ + } \ + } + + // Specialize buffer allocation/free for HIP + template <> + inline AlignedBuffer::AlignedBuffer(std::size_t size) { + void* original; + space = to_multiple(size); + HIP_CHECK(hipMalloc(&original, space)); + if (!original) { + throw std::runtime_error("Error allocating HIP memory in AlignedBuffer"); + } + ptrOriginal = original; + ptrAligned = std::align(DEFAULT_BUFFER_ALIGNMENT, size, original, space); + SPDLOG_TRACE("AlignedBuffer: original {}, aligned {}, size {}, space {}", + (void*)(ptrOriginal), (void*)(ptrAligned), size, space); + // sanity check should always be true when std::align used + assert(space >= size); + } + // + template <> + inline AlignedBuffer::~AlignedBuffer() { + if (ptrOriginal) { + SPDLOG_DEBUG("Destroying HIP buffer {}", ptrOriginal); + HIP_CHECK(hipFree(ptrOriginal)); + } + } +#endif + + // --------------------------------------------------------------------- + template + struct aligned_storage_wrapper { + // + using memory_space = MemorySpace; + using buffer_type = + ippl::detail::ViewType>::view_type; + using pointer_type = typename buffer_type::pointer_type; + using size_type = detail::size_type; + // + aligned_storage_wrapper(const std::string& /*name*/, size_type size) + : view() // we will construct the view manually + , buffer(size) // + { + SPDLOG_TRACE("Construct: view origin {}, aligned {}", (void*)(view.data()), + (void*)(buffer.ptrAligned)); + view = buffer_type((pointer_type)buffer.ptrAligned, size); + assert(view.data() == buffer.ptrAligned); + } + // + size_type size() const { return buffer.space; } + // + pointer_type data() { return view.data(); } + + // Note that this makes no effort to preserve any existing data + void reallocBuffer(size_type newsize) { + // wipe the old memory, before allocating new, (help prevent out-of-space errors) + buffer = AlignedBuffer(); + // allocate new + buffer = AlignedBuffer(newsize); + view = buffer_type((pointer_type)buffer.ptrAligned, newsize); + SPDLOG_DEBUG("Realloc : view {}, aligned {}, size {}, space {}", (void*)(view.data()), + (void*)(buffer.ptrAligned), newsize, buffer.space); + } + // + buffer_type view; + AlignedBuffer buffer; + }; + +} // namespace ippl::comms + +#endif diff --git a/src/Communicate/Archive.h b/src/Communicate/Archive.h index 6ec74f946..1f2623dac 100644 --- a/src/Communicate/Archive.h +++ b/src/Communicate/Archive.h @@ -21,14 +21,14 @@ namespace ippl { namespace detail { /*! * @file Archive.h - * Serialize and desesrialize particle attributes. + * Serialize and deserialize particle attributes. * @tparam Properties variadic template for Kokkos::View */ - template + template class Archive { public: - using buffer_type = typename ViewType::view_type; + using buffer_type = BufferType; using pointer_type = typename buffer_type::pointer_type; Archive(size_type size = 0); @@ -73,7 +73,7 @@ namespace ippl { /*! * @returns a pointer to the data of the buffer */ - pointer_type getBuffer() { return buffer_m.data(); } + pointer_type getData() { return buffer_m.data(); } /*! * @returns the size of the buffer @@ -82,10 +82,18 @@ namespace ippl { size_type getBufferSize() const { return buffer_m.size(); } - void resizeBuffer(size_type size) { Kokkos::resize(buffer_m, size); } - - void reallocBuffer(size_type size) { Kokkos::realloc(buffer_m, size); } + void reallocBuffer(size_type size) { +#ifdef IPPL_ALIGNED_COMMS_BUFFERS + buffer_m.reallocBuffer(size); +#else + Kokkos::realloc(buffer_m, size); +#endif + } + void resetReadWritePos() { + readpos_m = 0; + writepos_m = 0; + } void resetWritePos() { writepos_m = 0; } void resetReadPos() { readpos_m = 0; } @@ -97,7 +105,7 @@ namespace ippl { //! read position for deserialization size_type readpos_m; //! serialized data - buffer_type buffer_m; + BufferType buffer_m; }; } // namespace detail } // namespace ippl diff --git a/src/Communicate/Archive.hpp b/src/Communicate/Archive.hpp index f1d097a69..1135c8157 100644 --- a/src/Communicate/Archive.hpp +++ b/src/Communicate/Archive.hpp @@ -4,99 +4,116 @@ // #include +#include "Utility/Logging.h" + #include "Archive.h" namespace ippl { namespace detail { - template - Archive::Archive(size_type size) + template + Archive::Archive(size_type size) : writepos_m(0) , readpos_m(0) , buffer_m("buffer", size) {} - template + // ----------------------------------- + // Scalar serialize + template template - void Archive::serialize(const Kokkos::View& view, - size_type nsends) { - using exec_space = typename Kokkos::View::execution_space; - using policy_type = Kokkos::RangePolicy; - - size_t size = sizeof(T); - Kokkos::parallel_for( - "Archive::serialize()", policy_type(0, nsends), - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(buffer_m.data() + i * size + writepos_m, view.data() + i, size); - }); + void Archive::serialize(const Kokkos::View& view, + size_type nsends) { + constexpr size_t size = sizeof(T); + char* dst_ptr = (char*)(buffer_m.data()) + writepos_m; + char* src_ptr = (char*)(view.data()); + assert(writepos_m + (nsends * size) <= buffer_m.size()); + // construct temp views of the src/dst buffers of the correct size (bytes) + Kokkos::View src_view(src_ptr, size * nsends); + Kokkos::View dst_view(dst_ptr, size * nsends); + Kokkos::deep_copy(dst_view, src_view); Kokkos::fence(); - writepos_m += size * nsends; + SPDLOG_TRACE("Incrementing writepos: {}, from {}, to {}", (void*)dst_view.data(), + writepos_m, writepos_m + (nsends * size)); + writepos_m += (nsends * size); } - template + // ----------------------------------- + // Vector serialize + template template - void Archive::serialize( - const Kokkos::View*, ViewArgs...>& view, size_type nsends) { - using exec_space = typename Kokkos::View::execution_space; - - size_t size = sizeof(T); - // Default index type for range policies is int64, + void Archive::serialize(const Kokkos::View*, ViewArgs...>& view, + size_type nsends) { + constexpr size_t size = sizeof(T); + char* dst_ptr = (char*)(buffer_m.data()); + ippl::Vector* src_ptr = view.data(); + auto wp = writepos_m; + // The Kokkos range policies expect int64 // so we have to explicitly specify size_type (uint64) + using exec_space = typename Kokkos::View::execution_space; using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>; Kokkos::parallel_for( - "Archive::serialize()", - // The constructor for Kokkos range policies always - // expects int64 regardless of index type provided - // by template parameters, so the typecast is necessary - // to avoid compiler warnings - mdrange_t({0, 0}, {(long int)nsends, Dim}), - KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { - std::memcpy(buffer_m.data() + (Dim * i + d) * size + writepos_m, - &(*(view.data() + i))[d], size); + "Archive::serialize()", mdrange_t({0, 0}, {(long int)nsends, Dim}), + KOKKOS_LAMBDA(const size_type i, const size_t d) { + std::memcpy(dst_ptr + (Dim * i + d) * size + wp, &(*(src_ptr + i))[d], size); }); + Kokkos::fence(); writepos_m += Dim * size * nsends; } - template + // ----------------------------------- + // Scalar Deserialize + template template - void Archive::deserialize(Kokkos::View& view, - size_type nrecvs) { - using exec_space = typename Kokkos::View::execution_space; - using policy_type = Kokkos::RangePolicy; - - size_t size = sizeof(T); + void Archive::deserialize(Kokkos::View& view, + size_type nrecvs) { + // if we have to enlarge the destination view if (nrecvs > view.extent(0)) { + SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(), + view.extent(0), nrecvs); Kokkos::realloc(view, nrecvs); } - Kokkos::parallel_for( - "Archive::deserialize()", policy_type(0, nrecvs), - KOKKOS_CLASS_LAMBDA(const size_type i) { - std::memcpy(view.data() + i, buffer_m.data() + i * size + readpos_m, size); - }); - // Wait for deserialization kernel to complete - // (as with serialization kernels) + // + constexpr size_t size = sizeof(T); + char* src_ptr = (char*)(buffer_m.data()) + readpos_m; + char* dst_ptr = (char*)(view.data()); + assert(readpos_m + (nrecvs * size) <= buffer_m.size()); + // construct temp views of the src/dst buffers of the correct size (bytes) + Kokkos::View src_view(src_ptr, size * nrecvs); + Kokkos::View dst_view(dst_ptr, size * nrecvs); + Kokkos::deep_copy(dst_view, src_view); Kokkos::fence(); - readpos_m += size * nrecvs; + SPDLOG_TRACE("Incrementing readpos: {}, from {}, to {}", (void*)buffer_m.data(), + readpos_m, readpos_m + (nrecvs * size)); + readpos_m += (nrecvs * size); } - template + // ----------------------------------- + // Vecto Deserialize + template template - void Archive::deserialize(Kokkos::View*, ViewArgs...>& view, - size_type nrecvs) { - using exec_space = typename Kokkos::View::execution_space; - - size_t size = sizeof(T); + void Archive::deserialize(Kokkos::View*, ViewArgs...>& view, + size_type nrecvs) // + { + // if we have to enlarge the destination view if (nrecvs > view.extent(0)) { + SPDLOG_WARN("DeSerialization realloc: {}, from {}, to {}", (void*)view.data(), + view.extent(0), nrecvs); Kokkos::realloc(view, nrecvs); } + // + constexpr size_t size = sizeof(T); + char* src_ptr = (char*)(buffer_m.data()); + ippl::Vector* dst_ptr = view.data(); + auto rp = readpos_m; + using exec_space = typename Kokkos::View::execution_space; using mdrange_t = Kokkos::MDRangePolicy, Kokkos::IndexType, exec_space>; Kokkos::parallel_for( "Archive::deserialize()", mdrange_t({0, 0}, {(long int)nrecvs, Dim}), - KOKKOS_CLASS_LAMBDA(const size_type i, const size_t d) { - std::memcpy(&(*(view.data() + i))[d], - buffer_m.data() + (Dim * i + d) * size + readpos_m, size); + KOKKOS_LAMBDA(const size_type i, const size_t d) { + std::memcpy(&(*(dst_ptr + i))[d], src_ptr + (Dim * i + d) * size + rp, size); }); Kokkos::fence(); readpos_m += Dim * size * nrecvs; diff --git a/src/Communicate/BufferHandler.h b/src/Communicate/BufferHandler.h index 117c12f22..a42a1ac0a 100644 --- a/src/Communicate/BufferHandler.h +++ b/src/Communicate/BufferHandler.h @@ -4,9 +4,34 @@ #include #include +#include "Types/IpplTypes.h" +#include "Types/ViewTypes.h" + +#include "Utility/Logging.h" +#include "Utility/TypeUtils.h" + #include "Communicate/Archive.h" -namespace ippl { +#ifdef IPPL_ALIGNED_COMMS_BUFFERS +#include "Communicate/AlignedBuffer.h" +#endif + +namespace ippl::comms { + +#ifdef IPPL_ALIGNED_COMMS_BUFFERS + // alignment provided by AlignedBuffer wrapper + template + using archive_buffer = detail::Archive>; +#else + // default kokkos alignment + template + using communicator_storage = + ippl::detail::ViewType>::view_type; + + template + using archive_buffer = detail::Archive>; +#endif /** * @brief Interface for memory buffer handling. @@ -17,11 +42,11 @@ namespace ippl { * * @tparam MemorySpace The memory space type used for buffer allocation. */ - template + template class BufferHandler { public: - using archive_type = ippl::detail::Archive; - using buffer_type = std::shared_ptr; + using archive_type = Buffer; + using buffer_type = std::shared_ptr; using size_type = ippl::detail::size_type; virtual ~BufferHandler() {} @@ -92,11 +117,12 @@ namespace ippl { * @tparam MemorySpace The memory space type for the buffer (e.g., `Kokkos::HostSpace`). */ template - class DefaultBufferHandler : public BufferHandler { + class DefaultBufferHandler : public BufferHandler, MemorySpace> { public: - using typename BufferHandler::archive_type; - using typename BufferHandler::buffer_type; - using typename BufferHandler::size_type; + using buffer_type = + typename BufferHandler, MemorySpace>::buffer_type; + using typename BufferHandler, MemorySpace>::archive_type; + using typename BufferHandler, MemorySpace>::size_type; ~DefaultBufferHandler() override; @@ -106,8 +132,8 @@ namespace ippl { * Requests a memory buffer of the specified size, with the option * to request a buffer larger than the base size by an overallocation * multiplier. If a sufficiently large buffer is available, it is returned. If not, the - * largest free buffer is reallocated. If there are no free buffers available, only then a - * new buffer is allocated. + * largest free buffer is reallocated. If there are no free buffers available, only then + * a new buffer is allocated. * * @param size The required buffer size. * @param overallocation A multiplier to allocate additional buffer space. @@ -163,7 +189,7 @@ namespace ippl { buffer_set_type free_buffers{ &DefaultBufferHandler::bufferSizeComparator}; ///< Set of free buffers }; -} // namespace ippl +} // namespace ippl::comms #include "Communicate/BufferHandler.hpp" diff --git a/src/Communicate/BufferHandler.hpp b/src/Communicate/BufferHandler.hpp index c6d57f0a3..f654dea7f 100644 --- a/src/Communicate/BufferHandler.hpp +++ b/src/Communicate/BufferHandler.hpp @@ -1,7 +1,7 @@ #ifndef IPPL_BUFFER_HANDLER_HPP #define IPPL_BUFFER_HANDLER_HPP -namespace ippl { +namespace ippl::comms { template DefaultBufferHandler::~DefaultBufferHandler() {} @@ -112,6 +112,7 @@ namespace ippl { freeSize_m -= buffer->getBufferSize(); usedSize_m += buffer->getBufferSize(); + buffer->resetReadWritePos(); free_buffers.erase(buffer); used_buffers.insert(buffer); return buffer; @@ -128,6 +129,7 @@ namespace ippl { free_buffers.erase(buffer); buffer->reallocBuffer(requiredSize); + buffer->resetReadWritePos(); used_buffers.insert(buffer); return buffer; @@ -140,9 +142,10 @@ namespace ippl { usedSize_m += newBuffer->getBufferSize(); used_buffers.insert(newBuffer); + newBuffer->resetReadWritePos(); return newBuffer; } -} // namespace ippl +} // namespace ippl::comms #endif diff --git a/src/Communicate/Buffers.cpp b/src/Communicate/Buffers.cpp index 04cbb1808..026633fec 100644 --- a/src/Communicate/Buffers.cpp +++ b/src/Communicate/Buffers.cpp @@ -32,13 +32,13 @@ namespace ippl { } void Communicator::deleteAllBuffers() { - buffer_handlers_m.forAll([](BufferHandler&& bh) { + buffer_handlers_m->forAll([](BufferHandler&& bh) { bh.deleteAllBuffers(); }); } void Communicator::freeAllBuffers() { - buffer_handlers_m.forAll([](BufferHandler&& bh) { + buffer_handlers_m->forAll([](BufferHandler&& bh) { bh.freeAllBuffers(); }); } diff --git a/src/Communicate/Buffers.hpp b/src/Communicate/Buffers.hpp index c08dd2a69..90b477af3 100644 --- a/src/Communicate/Buffers.hpp +++ b/src/Communicate/Buffers.hpp @@ -20,22 +20,33 @@ // exchanging particle data between ranks. // +#include "Utility/Logging.h" +#include "Utility/TypeUtils.h" + namespace ippl { namespace mpi { - template - Communicator::buffer_type Communicator::getBuffer(size_type size, - double overallocation) { - auto& buffer_handler = buffer_handlers_m.get(); + // ----------------------------------- + template + Communicator::buffer_type Communicator::getBuffer(size_type size, + double overallocation) { + using memory_space = BufferType::memory_space; - return buffer_handler.getBuffer(size * sizeof(T), - std::max(overallocation, defaultOveralloc_m)); - } + auto& buffer_handler = buffer_handlers_m->get(); - template - void Communicator::freeBuffer(Communicator::buffer_type buffer) { - auto& buffer_handler = buffer_handlers_m.get(); + auto b = buffer_handler.getBuffer(size * sizeof(T), + std::max(overallocation, defaultOveralloc_m)); + SPDLOG_TRACE("{}, getBuffer {}, buf, {}, size {}", (void*)this, + ippl::debug::print_type(), (void*)(b->getData()), + size * sizeof(T)); + return b; + } + template + void Communicator::freeBuffer(Communicator::buffer_type buffer) { + using memory_space = BufferType::memory_space; + auto& buffer_handler = buffer_handlers_m->get(); + SPDLOG_TRACE("freeBuffer buf, {}", (void*)(buffer->getData())); buffer_handler.freeBuffer(buffer); } diff --git a/src/Communicate/CMakeLists.txt b/src/Communicate/CMakeLists.txt index 95e7b4819..31cb70e89 100644 --- a/src/Communicate/CMakeLists.txt +++ b/src/Communicate/CMakeLists.txt @@ -4,5 +4,9 @@ # Adds all Communicate-related sources and headers to the IPPL library. # ----------------------------------------------------------------------------- +if(IPPL_ENABLE_ALIGNED_COMMUNICATION_BUFFERS) + target_compile_definitions(ippl PUBLIC IPPL_ALIGNED_COMMS_BUFFERS) +endif() + target_sources(ippl PRIVATE Communicator.cpp CommunicatorLogging.cpp Environment.cpp Buffers.cpp Request.cpp LogEntry.cpp) diff --git a/src/Communicate/Collectives.hpp b/src/Communicate/Collectives.hpp index d273242ab..0ed2aa69b 100644 --- a/src/Communicate/Collectives.hpp +++ b/src/Communicate/Collectives.hpp @@ -2,62 +2,60 @@ #include "Communicate/Operations.h" -namespace ippl { - namespace mpi { - template - void Communicator::gather(const T* input, T* output, int count, int root) { - MPI_Datatype type = get_mpi_datatype(*input); +namespace ippl::mpi { + template + void Communicator::gather(const T* input, T* output, int count, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m); - } + MPI_Gather(const_cast(input), count, type, output, count, type, root, *comm_m); + } - template - void Communicator::scatter(const T* input, T* output, int count, int root) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::scatter(const T* input, T* output, int count, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m); - } + MPI_Scatter(const_cast(input), count, type, output, count, type, root, *comm_m); + } - template - void Communicator::reduce(const T* input, T* output, int count, Op, int root) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::reduce(const T* input, T* output, int count, Op, int root) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m); - } + MPI_Reduce(const_cast(input), output, count, type, mpiOp, root, *comm_m); + } - template - void Communicator::reduce(const T& input, T& output, int count, Op op, int root) { - reduce(&input, &output, count, op, root); - } + template + void Communicator::reduce(const T& input, T& output, int count, Op op, int root) { + reduce(&input, &output, count, op, root); + } - template - void Communicator::allreduce(const T* input, T* output, int count, Op) { - MPI_Datatype type = get_mpi_datatype(*input); + template + void Communicator::allreduce(const T* input, T* output, int count, Op) { + MPI_Datatype type = get_mpi_datatype(*input); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m); - } + MPI_Allreduce(const_cast(input), output, count, type, mpiOp, *comm_m); + } - template - void Communicator::allreduce(const T& input, T& output, int count, Op op) { - allreduce(&input, &output, count, op); - } + template + void Communicator::allreduce(const T& input, T& output, int count, Op op) { + allreduce(&input, &output, count, op); + } - template - void Communicator::allreduce(T* inout, int count, Op) { - MPI_Datatype type = get_mpi_datatype(*inout); + template + void Communicator::allreduce(T* inout, int count, Op) { + MPI_Datatype type = get_mpi_datatype(*inout); - MPI_Op mpiOp = get_mpi_op(); + MPI_Op mpiOp = get_mpi_op(); - MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m); - } + MPI_Allreduce(MPI_IN_PLACE, inout, count, type, mpiOp, *comm_m); + } - template - void Communicator::allreduce(T& inout, int count, Op op) { - allreduce(&inout, count, op); - } - } // namespace mpi -} // namespace ippl + template + void Communicator::allreduce(T& inout, int count, Op op) { + allreduce(&inout, count, op); + } +} // namespace ippl::mpi diff --git a/src/Communicate/Communicator.cpp b/src/Communicate/Communicator.cpp index 0ade37738..81376de62 100644 --- a/src/Communicate/Communicator.cpp +++ b/src/Communicate/Communicator.cpp @@ -1,42 +1,56 @@ #include "Communicate/Communicator.h" -namespace ippl { - namespace mpi { - - Communicator::Communicator() - : comm_m(new MPI_Comm(MPI_COMM_WORLD)) { - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - } - - Communicator::Communicator(MPI_Comm comm) { - comm_m = std::make_shared(comm); - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - } - - Communicator& Communicator::operator=(MPI_Comm comm) { - comm_m = std::make_shared(comm); - MPI_Comm_rank(*comm_m, &rank_m); - MPI_Comm_size(*comm_m, &size_m); - return *this; - } - - Communicator Communicator::Communicator::split(int color, int key) const { - MPI_Comm newcomm; - MPI_Comm_split(*comm_m, color, key, &newcomm); - return Communicator(newcomm); - } - - void Communicator::probe(int source, int tag, Status& status) { - MPI_Probe(source, tag, *comm_m, status); - } - - bool Communicator::iprobe(int source, int tag, Status& status) { - int flag = 0; - MPI_Iprobe(source, tag, *comm_m, &flag, status); - return (flag != 0); +namespace ippl::mpi { + + Communicator::Communicator() + : buffer_handlers_m(get_buffer_handler_instance()) + , comm_m(new MPI_Comm(MPI_COMM_WORLD)) { + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + } + + Communicator::Communicator(MPI_Comm comm) { + buffer_handlers_m = get_buffer_handler_instance(); + comm_m = std::make_shared(comm); + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + } + + Communicator& Communicator::operator=(MPI_Comm comm) { + buffer_handlers_m = get_buffer_handler_instance(); + comm_m = std::make_shared(comm); + MPI_Comm_rank(*comm_m, &rank_m); + MPI_Comm_size(*comm_m, &size_m); + return *this; + } + + Communicator Communicator::Communicator::split(int color, int key) const { + MPI_Comm newcomm; + MPI_Comm_split(*comm_m, color, key, &newcomm); + return Communicator(newcomm); + } + + void Communicator::probe(int source, int tag, Status& status) { + MPI_Probe(source, tag, *comm_m, status); + } + + bool Communicator::iprobe(int source, int tag, Status& status) { + int flag = 0; + MPI_Iprobe(source, tag, *comm_m, &flag, status); + return (flag != 0); + } + + // --------------------------------------- + // singleton access to buffer manager + // --------------------------------------- + std::shared_ptr Communicator::get_buffer_handler_instance() { + static std::shared_ptr comm_buff_handler_ptr{nullptr}; + if (comm_buff_handler_ptr == nullptr) { + comm_buff_handler_ptr = std::make_shared(); + SPDLOG_DEBUG("BufferHandler new: {}", + ippl::debug::print_type()); } - } // namespace mpi -} // namespace ippl + return comm_buff_handler_ptr; + } +} // namespace ippl::mpi diff --git a/src/Communicate/Communicator.h b/src/Communicate/Communicator.h index 63886368a..114b0b4a8 100644 --- a/src/Communicate/Communicator.h +++ b/src/Communicate/Communicator.h @@ -5,215 +5,218 @@ #ifndef IPPL_MPI_COMMUNICATOR_H #define IPPL_MPI_COMMUNICATOR_H -#include -#include - -#include "Communicate/BufferHandler.h" -#include "Communicate/LoggingBufferHandler.h" -#include "Communicate/Request.h" -#include "Communicate/Status.h" - -//////////////////////////////////////////////// -// For message size check; see below #include #include +#include +#include #include "Utility/TypeUtils.h" #include "Communicate/Archive.h" +#include "Communicate/BufferHandler.h" +#include "Communicate/LogEntry.h" +#include "Communicate/Request.h" +#include "Communicate/Status.h" #include "Communicate/TagMaker.h" #include "Communicate/Tags.h" -//////////////////////////////////////////////////// -namespace ippl { - namespace mpi { - - class Communicator : public TagMaker { - public: - Communicator(); +//////////////////////////////////////////////////// - Communicator(MPI_Comm comm); +namespace ippl::mpi { - Communicator& operator=(MPI_Comm comm); + class Communicator : public TagMaker { + public: + Communicator(); - ~Communicator() = default; + Communicator(MPI_Comm comm); - Communicator split(int color, int key) const; + Communicator& operator=(MPI_Comm comm); - operator const MPI_Comm&() const noexcept { return *comm_m; } + ~Communicator() = default; - int size() const noexcept { return size_m; } + Communicator split(int color, int key) const; - int rank() const noexcept { return rank_m; } + operator const MPI_Comm&() const noexcept { return *comm_m; } - void barrier() { MPI_Barrier(*comm_m); } + int size() const noexcept { return size_m; } - void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); } + int rank() const noexcept { return rank_m; } - /* - * Blocking point-to-point communication - * - */ + void barrier() { MPI_Barrier(*comm_m); } - template - void send(const T& buffer, int count, int dest, int tag); + void abort(int errorcode = -1) { MPI_Abort(*comm_m, errorcode); } - template - void send(const T* buffer, int count, int dest, int tag); + /* + * Blocking point-to-point communication + * + */ - template - void recv(T& output, int count, int source, int tag, Status& status); + template + void send(const T& buffer, int count, int dest, int tag); - template - void recv(T* output, int count, int source, int tag, Status& status); + template + void send(const T* buffer, int count, int dest, int tag); - void probe(int source, int tag, Status& status); + template + void recv(T& output, int count, int source, int tag, Status& status); - /* - * Non-blocking point-to-point communication - * - */ + template + void recv(T* output, int count, int source, int tag, Status& status); - template - void isend(const T& buffer, int count, int dest, int tag, Request& request); + void probe(int source, int tag, Status& status); - template - void isend(const T* buffer, int count, int dest, int tag, Request& request); + /* + * Non-blocking point-to-point communication + * + */ - template - void irecv(T& buffer, int count, int source, int tag, Request& request); + template + void isend(const T& buffer, int count, int dest, int tag, Request& request); - template - void irecv(T* buffer, int count, int source, int tag, Request& request); + template + void isend(const T* buffer, int count, int dest, int tag, Request& request); - bool iprobe(int source, int tag, Status& status); + template + void irecv(T& buffer, int count, int source, int tag, Request& request); - /* - * Collective communication - */ + template + void irecv(T* buffer, int count, int source, int tag, Request& request); - /* Gather the data in the given source container from all other nodes to a - * specific node (default: 0). - */ - template - void gather(const T* input, T* output, int count, int root = 0); + bool iprobe(int source, int tag, Status& status); - /* Scatter the data from all other nodes to a - * specific node (default: 0). - */ - template - void scatter(const T* input, T* output, int count, int root = 0); + /* + * Collective communication + */ - /* Reduce data coming from all nodes to a specific node - * (default: 0). Apply certain operation - * - */ - template - void reduce(const T* input, T* output, int count, Op op, int root = 0); + /* Gather the data in the given source container from all other nodes to a + * specific node (default: 0). + */ + template + void gather(const T* input, T* output, int count, int root = 0); - template - void reduce(const T& input, T& output, int count, Op op, int root = 0); + /* Scatter the data from all other nodes to a + * specific node (default: 0). + */ + template + void scatter(const T* input, T* output, int count, int root = 0); - template - void allreduce(const T* input, T* output, int count, Op op); + /* Reduce data coming from all nodes to a specific node + * (default: 0). Apply certain operation + * + */ + template + void reduce(const T* input, T* output, int count, Op op, int root = 0); - template - void allreduce(const T& input, T& output, int count, Op op); + template + void reduce(const T& input, T& output, int count, Op op, int root = 0); - template - void allreduce(T* inout, int count, Op op); + template + void allreduce(const T* input, T* output, int count, Op op); - template - void allreduce(T& inout, int count, Op op); + template + void allreduce(const T& input, T& output, int count, Op op); - ///////////////////////////////////////////////////////////////////////////////////// - template - using archive_type = detail::Archive; + template + void allreduce(T* inout, int count, Op op); - template - using buffer_type = std::shared_ptr>; + template + void allreduce(T& inout, int count, Op op); - private: - template - using buffer_container_type = LoggingBufferHandler; + private: + template + using buffer_container_type = comms::DefaultBufferHandler; - using buffer_handler_type = - typename detail::ContainerForAllSpaces::type; + using buffer_handler_type = + typename detail::ContainerForAllSpaces::type; - public: - using size_type = detail::size_type; - double getDefaultOverallocation() const { return defaultOveralloc_m; } - void setDefaultOverallocation(double factor); + public: + template + using buffer_type = buffer_container_type::buffer_type; - template - buffer_type getBuffer(size_type size, double overallocation = 1.0); + public: + using size_type = detail::size_type; + double getDefaultOverallocation() const { return defaultOveralloc_m; } + void setDefaultOverallocation(double factor); - void deleteAllBuffers(); - void freeAllBuffers(); + template + buffer_type getBuffer(size_type size, double overallocation = 1.0); - template - void freeBuffer(buffer_type buffer); + void deleteAllBuffers(); + void freeAllBuffers(); - const MPI_Comm& getCommunicator() const noexcept { return *comm_m; } + template + void freeBuffer(buffer_type buffer); - template - void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize, - size_type nrecvs) { - // Temporary fix. MPI communication seems to have problems when the - // count argument exceeds the range of int, so large messages should - // be split into smaller messages - if (msize > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - MPI_Status status; - MPI_Recv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &status); + const MPI_Comm& getCommunicator() const noexcept { return *comm_m; } - buffer.deserialize(ar, nrecvs); + template + void recv(int src, int tag, Buffer& buffer, Archive& ar, size_type msize, + size_type nrecvs) { + // Temporary fix. MPI communication seems to have problems when the + // count argument exceeds the range of int, so large messages should + // be split into smaller messages + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - - template - void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request, - size_type nsends) { - if (ar.getSize() > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - buffer.serialize(ar, nsends); - MPI_Isend(ar.getBuffer(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + MPI_Status status; + MPI_Recv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &status); + SPDLOG_DEBUG("Recv buf {}, size {:04}, src {:02}, tag {:04}", (void*)(ar.getData()), + msize, src, tag); + buffer.deserialize(ar, nrecvs); + } + + template + void isend(int dest, int tag, Buffer& buffer, Archive& ar, MPI_Request& request, + size_type nsends) // + { + if (ar.getSize() > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - - template - void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) { - if (msize > INT_MAX) { - std::cerr << "Message size exceeds range of int" << std::endl; - this->abort(); - } - MPI_Irecv(ar.getBuffer(), msize, MPI_BYTE, src, tag, *comm_m, &request); + buffer.serialize(ar, nsends); + MPI_Isend(ar.getData(), ar.getSize(), MPI_BYTE, dest, tag, *comm_m, &request); + SPDLOG_DEBUG("Isend buf {}, size {:04}, dst {:02}, tag {:04}, req {}", + (void*)(ar.getData()), ar.getSize(), dest, tag, + static_cast(request)); + } + + template + void irecv(int src, int tag, Archive& ar, MPI_Request& request, size_type msize) { + if (msize > INT_MAX) { + std::cerr << "Message size exceeds range of int" << std::endl; + this->abort(); } - void printLogs(const std::string& filename); + MPI_Irecv(ar.getData(), msize, MPI_BYTE, src, tag, *comm_m, &request); + SPDLOG_DEBUG("Irecv buf {}, size {:04}, src {:02}, tag {:04}, req {}", + (void*)(ar.getData()), msize, src, tag, static_cast(request)); + } + + void printLogs(const std::string& filename); + + private: + std::vector gatherLocalLogs(); + void sendLogsToRank0(const std::vector& localLogs); + std::vector gatherLogsFromAllRanks(const std::vector& localLogs); + void writeLogsToFile(const std::vector& allLogs, const std::string& filename); - private: - std::vector gatherLocalLogs(); - void sendLogsToRank0(const std::vector& localLogs); - std::vector gatherLogsFromAllRanks(const std::vector& localLogs); - void writeLogsToFile(const std::vector& allLogs, const std::string& filename); + std::shared_ptr buffer_handlers_m; + double defaultOveralloc_m = 1.0; - buffer_handler_type buffer_handlers_m; + ///////////////////////////////////////////////////////////////////////////////////// - double defaultOveralloc_m = 1.0; + protected: + std::shared_ptr comm_m; + int size_m; + int rank_m; - ///////////////////////////////////////////////////////////////////////////////////// + public: + std::shared_ptr get_buffer_handler_instance(); + }; - protected: - std::shared_ptr comm_m; - int size_m; - int rank_m; - }; - } // namespace mpi -} // namespace ippl +} // namespace ippl::mpi #include "Communicate/Collectives.hpp" #include "Communicate/PointToPoint.hpp" diff --git a/src/Communicate/CommunicatorLogging.cpp b/src/Communicate/CommunicatorLogging.cpp index 92e821ebd..3c3000695 100644 --- a/src/Communicate/CommunicatorLogging.cpp +++ b/src/Communicate/CommunicatorLogging.cpp @@ -6,119 +6,122 @@ #include "Utility/Inform.h" #include "Communicate/Communicator.h" -#include "Communicate/LogEntry.h" - -namespace ippl { - namespace mpi { - void Communicator::printLogs(const std::string& filename) { - std::vector localLogs = gatherLocalLogs(); - - std::vector allLogs; - if (rank() == 0) { - allLogs = gatherLogsFromAllRanks(localLogs); - } else { - sendLogsToRank0(localLogs); - } +#include "Communicate/LoggingBufferHandler.h" - if (rank() == 0) { - writeLogsToFile(allLogs, filename); - } +namespace ippl::mpi { + void Communicator::printLogs(const std::string& filename) { + std::vector localLogs = gatherLocalLogs(); + + std::vector allLogs; + if (rank() == 0) { + allLogs = gatherLogsFromAllRanks(localLogs); + } else { + sendLogsToRank0(localLogs); + } + + if (rank() == 0) { + writeLogsToFile(allLogs, filename); } + } - std::vector Communicator::gatherLocalLogs() { - std::vector localLogs; + template + struct is_a_logger : std::false_type {}; - buffer_handlers_m.forAll([&](auto& loggingHandler) { + template + struct is_a_logger > : std::true_type {}; + + std::vector Communicator::gatherLocalLogs() { + std::vector localLogs; + if constexpr (is_a_logger::value) { + buffer_handlers_m->forAll([&](auto& loggingHandler) { const auto& logs = loggingHandler.getLogs(); localLogs.insert(localLogs.end(), logs.begin(), logs.end()); }); - - return localLogs; } + return localLogs; + } - void Communicator::sendLogsToRank0(const std::vector& localLogs) { - std::vector buffer = serializeLogs(localLogs); + void Communicator::sendLogsToRank0(const std::vector& localLogs) { + std::vector buffer = serializeLogs(localLogs); - int logSize = buffer.size(); - - this->send(logSize, 1, 0, 0); - this->send(buffer.data(), logSize, 0, 0); - } + int logSize = buffer.size(); - std::vector Communicator::gatherLogsFromAllRanks( - const std::vector& localLogs) { - std::vector allLogs = localLogs; + this->send(logSize, 1, 0, 0); + this->send(buffer.data(), logSize, 0, 0); + } - for (int rank = 1; rank < size_m; ++rank) { - int logSize; - Status status; + std::vector Communicator::gatherLogsFromAllRanks( + const std::vector& localLogs) { + std::vector allLogs = localLogs; - this->recv(logSize, 1, rank, 0, status); + for (int rank = 1; rank < size_m; ++rank) { + int logSize; + Status status; - std::vector buffer(logSize); - this->recv(buffer.data(), logSize, rank, 0, status); + this->recv(logSize, 1, rank, 0, status); - std::vector deserializedLogs = deserializeLogs(buffer); - allLogs.insert(allLogs.end(), deserializedLogs.begin(), deserializedLogs.end()); - } + std::vector buffer(logSize); + this->recv(buffer.data(), logSize, rank, 0, status); - return allLogs; + std::vector deserializedLogs = deserializeLogs(buffer); + allLogs.insert(allLogs.end(), deserializedLogs.begin(), deserializedLogs.end()); } - std::vector serializeLogs(const std::vector& logs) { - std::vector buffer; + return allLogs; + } - for (const auto& logEntry : logs) { - std::vector serializedEntry = logEntry.serialize(); - buffer.insert(buffer.end(), serializedEntry.begin(), serializedEntry.end()); - } + std::vector serializeLogs(const std::vector& logs) { + std::vector buffer; - return buffer; + for (const auto& logEntry : logs) { + std::vector serializedEntry = logEntry.serialize(); + buffer.insert(buffer.end(), serializedEntry.begin(), serializedEntry.end()); } - std::vector deserializeLogs(const std::vector& buffer) { - std::vector logs; - size_t offset = 0; + return buffer; + } - while (offset < buffer.size()) { - LogEntry logEntry = LogEntry::deserialize(buffer, offset); + std::vector deserializeLogs(const std::vector& buffer) { + std::vector logs; + size_t offset = 0; - logs.push_back(logEntry); + while (offset < buffer.size()) { + LogEntry logEntry = LogEntry::deserialize(buffer, offset); - offset += logEntry.serialize().size(); - } - return logs; + logs.push_back(logEntry); + + offset += logEntry.serialize().size(); } + return logs; + } + + void Communicator::writeLogsToFile(const std::vector& allLogs, + const std::string& filename) { + Inform logFile(0, filename.c_str(), Inform::OVERWRITE, 0); + logFile.setOutputLevel(1); - void Communicator::writeLogsToFile(const std::vector& allLogs, - const std::string& filename) { - Inform logFile(0, filename.c_str(), Inform::OVERWRITE, 0); - logFile.setOutputLevel(1); - - logFile << "Timestamp,Method,Rank,MemorySpace,usedSize,FreeSize,Parameters" << endl; - - for (const auto& log : allLogs) { - auto timestamp = std::chrono::duration_cast( - log.timestamp.time_since_epoch()) - .count(); - - logFile << timestamp << "," << log.methodName << "," << log.rank << "," - << log.memorySpace << "," << log.usedSize << "," << log.freeSize; - - logFile << ",\""; - bool first = true; - for (const auto& [key, value] : log.parameters) { - if (!first) { - logFile << "; "; - } - logFile << key << ": " << value; - first = false; + logFile << "Timestamp,Method,Rank,MemorySpace,usedSize,FreeSize,Parameters" << endl; + + for (const auto& log : allLogs) { + auto timestamp = std::chrono::duration_cast( + log.timestamp.time_since_epoch()) + .count(); + + logFile << timestamp << "," << log.methodName << "," << log.rank << "," + << log.memorySpace << "," << log.usedSize << "," << log.freeSize; + + logFile << ",\""; + bool first = true; + for (const auto& [key, value] : log.parameters) { + if (!first) { + logFile << "; "; } - logFile << "\"" << endl; + logFile << key << ": " << value; + first = false; } - - logFile.flush(); + logFile << "\"" << endl; } - } // namespace mpi -} // namespace ippl + logFile.flush(); + } +} // namespace ippl::mpi diff --git a/src/Communicate/CommunicatorLogging.hpp b/src/Communicate/CommunicatorLogging.hpp index 5746c3df9..12caaa4d9 100644 --- a/src/Communicate/CommunicatorLogging.hpp +++ b/src/Communicate/CommunicatorLogging.hpp @@ -5,11 +5,9 @@ #include "Communicate/LogEntry.h" -namespace ippl { - namespace mpi { - std::vector serializeLogs(const std::vector& logs); - std::vector deserializeLogs(const std::vector& buffer); - } // namespace mpi -} // namespace ippl +namespace ippl::mpi { + std::vector serializeLogs(const std::vector& logs); + std::vector deserializeLogs(const std::vector& buffer); +} // namespace ippl::mpi #endif diff --git a/src/Communicate/LoggingBufferHandler.h b/src/Communicate/LoggingBufferHandler.h index f6a0bd5d6..fc3ad2eba 100644 --- a/src/Communicate/LoggingBufferHandler.h +++ b/src/Communicate/LoggingBufferHandler.h @@ -10,7 +10,7 @@ #include "Communicate/BufferHandler.h" #include "Communicate/LogEntry.h" -namespace ippl { +namespace ippl::comms { /** * @class LoggingBufferHandler @@ -28,10 +28,12 @@ namespace ippl { * Instead, it adds logging for monitoring purposes. */ template - class LoggingBufferHandler : public BufferHandler { + class LoggingBufferHandler : public BufferHandler, MemorySpace> { public: - using buffer_type = typename BufferHandler::buffer_type; - using size_type = typename BufferHandler::size_type; + using buffer_type = + typename BufferHandler, MemorySpace>::buffer_type; + using size_type = + typename BufferHandler, MemorySpace>::size_type; /** * @brief Constructs a LoggingBufferHandler with an existing buffer handler. @@ -39,7 +41,9 @@ namespace ippl { * operations. * @param rank The MPI rank for logging purposes, used to identify the source of logs. */ - LoggingBufferHandler(std::shared_ptr> handler, int rank); + LoggingBufferHandler( + std::shared_ptr, MemorySpace>> handler, + int rank); /** * @brief Default constructor, creates an internal `BufferHandler` for managing buffers. @@ -104,7 +108,7 @@ namespace ippl { const std::vector& getLogs() const; private: - std::shared_ptr> + std::shared_ptr, MemorySpace>> handler_m; ///< Internal handler for buffer management. std::vector logEntries_m; ///< Log entries for buffer operations. int rank_m; ///< MPI rank for identifying log sources. @@ -122,7 +126,7 @@ namespace ippl { void logMethod(const std::string& methodName, const std::map& parameters); }; -} // namespace ippl +} // namespace ippl::comms #include "Communicate/LoggingBufferHandler.hpp" diff --git a/src/Communicate/LoggingBufferHandler.hpp b/src/Communicate/LoggingBufferHandler.hpp index 33f4269cd..9517e1de9 100644 --- a/src/Communicate/LoggingBufferHandler.hpp +++ b/src/Communicate/LoggingBufferHandler.hpp @@ -4,11 +4,11 @@ #include #include -namespace ippl { +namespace ippl::comms { template LoggingBufferHandler::LoggingBufferHandler( - std::shared_ptr> handler, int rank) + std::shared_ptr, MemorySpace>> handler, int rank) : handler_m(std::move(handler)) , rank_m(rank) {} @@ -70,6 +70,6 @@ namespace ippl { std::chrono::high_resolution_clock::now()}); } -} // namespace ippl +} // namespace ippl::comms #endif diff --git a/src/Field/HaloCells.h b/src/Field/HaloCells.h index c4d87b9bd..c056177dd 100644 --- a/src/Field/HaloCells.h +++ b/src/Field/HaloCells.h @@ -22,7 +22,7 @@ namespace ippl { template struct FieldBufferData { using view_type = typename detail::ViewType::view_type; - using archive_type = Archive; + using archive_type = comms::archive_buffer; void serialize(archive_type& ar, size_type nsends) { ar.serialize(buffer, nsends); } @@ -139,7 +139,8 @@ namespace ippl { * unpack function call */ template - void exchangeBoundaries(view_type& view, Layout_t* layout, SendOrder order, int nghost = 1); + void exchangeBoundaries(view_type& view, Layout_t* layout, SendOrder order, + int nghost = 1); /*! * Extract the subview of the original data. This does not copy. diff --git a/src/Ippl.cpp b/src/Ippl.cpp index 3892e4ae9..fc9563535 100644 --- a/src/Ippl.cpp +++ b/src/Ippl.cpp @@ -71,6 +71,14 @@ namespace ippl { } auto factor = detail::getNumericalOption(argv[nargs]); Comm->setDefaultOverallocation(factor); + } else if (detail::checkOption(argv[nargs], "--debug", "-g")) { + ++nargs; + if (Comm->rank() == 0) { + std::cout << "Please attach debugger and hit return" << std::endl; + char c; + std::cin >> c; + } + Comm->barrier(); } else if (nargs > 0 && std::strstr(argv[nargs], "--kokkos") == nullptr) { notparsed.push_back(argv[nargs]); } diff --git a/src/Particle/ParticleAttrib.h b/src/Particle/ParticleAttrib.h index ca55ca81f..9ec60ceff 100644 --- a/src/Particle/ParticleAttrib.h +++ b/src/Particle/ParticleAttrib.h @@ -45,6 +45,7 @@ namespace ippl { using memory_space = typename view_type::memory_space; using execution_space = typename view_type::execution_space; + using archive_type = comms::archive_buffer; using size_type = detail::size_type; @@ -66,24 +67,22 @@ namespace ippl { void unpack(size_type) override; - void serialize(detail::Archive& ar, size_type nsends) override { - ar.serialize(buf_m, nsends); - } + void serialize(archive_type& ar, size_type nsends) override { ar.serialize(buf_m, nsends); } - void deserialize(detail::Archive& ar, size_type nrecvs) override { + void deserialize(archive_type& ar, size_type nrecvs) override { ar.deserialize(buf_m, nrecvs); } virtual ~ParticleAttrib() = default; - + size_type size() const override { return dview_m.extent(0); } - + size_type packedSize(const size_type count) const override { return count * sizeof(value_type); } - + void resize(size_type n) { Kokkos::resize(dview_m, n); } - + void realloc(size_type n) { Kokkos::realloc(dview_m, n); } void print() { diff --git a/src/Particle/ParticleAttribBase.h b/src/Particle/ParticleAttribBase.h index b0e973e79..6172751f7 100644 --- a/src/Particle/ParticleAttribBase.h +++ b/src/Particle/ParticleAttribBase.h @@ -35,9 +35,10 @@ namespace ippl { }; public: - using hash_type = ippl::detail::hash_type; using memory_space = MemorySpace; using execution_space = typename memory_space::execution_space; + using hash_type = detail::hash_type; + using archive_type = comms::archive_buffer; template using with_properties = typename WithMemSpace::type; @@ -66,9 +67,9 @@ namespace ippl { virtual void unpack(size_type) = 0; - virtual void serialize(Archive& ar, size_type nsends) = 0; + virtual void serialize(archive_type& ar, size_type nsends) = 0; - virtual void deserialize(Archive& ar, size_type nrecvs) = 0; + virtual void deserialize(archive_type& ar, size_type nrecvs) = 0; virtual size_type size() const = 0; @@ -78,7 +79,7 @@ namespace ippl { size_type getParticleCount() const { return *localNum_mp; } virtual void applyPermutation(const hash_type&) = 0; - virtual void internalCopy(const hash_type&) = 0; + virtual void internalCopy(const hash_type&) = 0; protected: const size_type* localNum_mp; diff --git a/src/Particle/ParticleSpatialLayout.hpp b/src/Particle/ParticleSpatialLayout.hpp index 00f256bc3..44ae03fa7 100644 --- a/src/Particle/ParticleSpatialLayout.hpp +++ b/src/Particle/ParticleSpatialLayout.hpp @@ -27,7 +27,6 @@ #include "Communicate/Window.h" - namespace ippl { /*! @@ -38,7 +37,7 @@ namespace ippl { */ struct increment_type { size_t count[2]; - + KOKKOS_FUNCTION void init() { count[0] = 0; count[1] = 0; @@ -72,14 +71,13 @@ namespace ippl { template void ParticleSpatialLayout::updateLayout(FieldLayout& fl, Mesh& mesh) { - //flayout_m = fl; + // flayout_m = fl; rlayout_m.changeDomain(fl, mesh); } template template void ParticleSpatialLayout::update(ParticleContainer& pc) { - /* Apply Boundary Conditions */ static IpplTimings::TimerRef ParticleBCTimer = IpplTimings::getTimer("particleBC"); IpplTimings::startTimer(ParticleBCTimer); @@ -89,7 +87,7 @@ namespace ippl { /* Update Timer for the rest of the function */ static IpplTimings::TimerRef ParticleUpdateTimer = IpplTimings::getTimer("updateParticle"); IpplTimings::startTimer(ParticleUpdateTimer); - + int nRanks = Comm->size(); if (nRanks < 2) { return; @@ -103,7 +101,7 @@ namespace ippl { */ // 1. figure out which particles need to go where -> locateParticles(...) ============= // - + static IpplTimings::TimerRef locateTimer = IpplTimings::getTimer("locateParticles"); IpplTimings::startTimer(locateTimer); @@ -112,40 +110,38 @@ namespace ippl { /* The indices correspond to the indices of the local particles, * the values correspond to the ranks to which the particles need to be sent - */ + */ locate_type particleRanks("particles' MPI ranks", localnum); /* The indices are the indices of the particles, - * the boolean values describe whether the particle has left the current rank + * the boolean values describe whether the particle has left the current rank * 0 --> particle valid (inside current rank) * 1 --> particle invalid (left rank) */ bool_type invalidParticles("validity of particles", localnum); /* The indices are the MPI ranks, - * the values are the number of particles are sent to that rank from myrank + * the values are the number of particles are sent to that rank from myrank */ locate_type rankSendCount_dview("rankSendCount Device", nRanks); - /* The indices have no particluar meaning, + /* The indices have no particluar meaning, * the values are the MPI ranks to which we need to send */ locate_type destinationRanks_dview("destinationRanks Device", nRanks); /* nInvalid is the number of invalid particles * nDestinationRanks is the number of MPI ranks we need to send to - */ - auto [nInvalid, nDestinationRanks] = - locateParticles(pc, - particleRanks, invalidParticles, - rankSendCount_dview, destinationRanks_dview); + */ + auto [nInvalid, nDestinationRanks] = locateParticles( + pc, particleRanks, invalidParticles, rankSendCount_dview, destinationRanks_dview); /* Host space copy of rankSendCount_dview */ - auto rankSendCount_hview = + auto rankSendCount_hview = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), rankSendCount_dview); - + /* Host Space copy of destinationRanks_dview */ - auto destinationRanks_hview = + auto destinationRanks_hview = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), destinationRanks_dview); IpplTimings::stopTimer(locateTimer); @@ -153,18 +149,18 @@ namespace ippl { // 2. fill send buffer and send particles =============================================== // // 2.1 Remote Memory Access window for one-sided communication - + static IpplTimings::TimerRef preprocTimer = IpplTimings::getTimer("sendPreprocess"); IpplTimings::startTimer(preprocTimer); - - std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); + + std::fill(nRecvs_m.begin(), nRecvs_m.end(), 0); window_m.fence(0); - - // Prepare RMA window for the ranks we need to send to - for(size_t ridx=0; ridx < nDestinationRanks; ridx++){ + + // Prepare RMA window for the ranks we need to send to + for (size_t ridx = 0; ridx < nDestinationRanks; ridx++) { int rank = destinationRanks_hview[ridx]; - if (rank == Comm->rank()){ + if (rank == Comm->rank()) { // we do not need to send to ourselves continue; } @@ -175,30 +171,29 @@ namespace ippl { IpplTimings::stopTimer(preprocTimer); - // 2.2 Particle Sends + // 2.2 Particle Sends static IpplTimings::TimerRef sendTimer = IpplTimings::getTimer("particleSend"); IpplTimings::startTimer(sendTimer); - + std::vector requests(0); int tag = Comm->next_tag(mpi::tag::P_SPATIAL_LAYOUT, mpi::tag::P_LAYOUT_CYCLE); - for(size_t ridx=0; ridx < nDestinationRanks; ridx++){ + for (size_t ridx = 0; ridx < nDestinationRanks; ridx++) { int rank = destinationRanks_hview[ridx]; - if(rank == Comm->rank()){ - continue; - } + if (rank == Comm->rank()) { + continue; + } hash_type hash("hash", rankSendCount_hview(rank)); fillHash(rank, particleRanks, hash); pc.sendToRank(rank, tag, requests, hash); } - - IpplTimings::stopTimer(sendTimer); + IpplTimings::stopTimer(sendTimer); // 3. Internal destruction of invalid particles ======================================= // - + static IpplTimings::TimerRef destroyTimer = IpplTimings::getTimer("particleDestroy"); IpplTimings::startTimer(destroyTimer); @@ -206,9 +201,9 @@ namespace ippl { Kokkos::fence(); IpplTimings::stopTimer(destroyTimer); - - // 4. Receive Particles ================================================================ // - + + // 4. Receive Particles ================================================================ // + static IpplTimings::TimerRef recvTimer = IpplTimings::getTimer("particleRecv"); IpplTimings::startTimer(recvTimer); @@ -219,13 +214,13 @@ namespace ippl { } IpplTimings::stopTimer(recvTimer); - IpplTimings::startTimer(sendTimer); if (requests.size() > 0) { MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE); } IpplTimings::stopTimer(sendTimer); + Comm->freeAllBuffers(); IpplTimings::stopTimer(ParticleUpdateTimer); } @@ -238,9 +233,9 @@ namespace ippl { return ((pos[Idx] > region[Idx].min()) && ...) && ((pos[Idx] <= region[Idx].max()) && ...); }; - - /* Helper function that evaluates the total number of neighbors for the current rank in Dim dimensions. - */ + /* Helper function that evaluates the total number of neighbors for the current rank in Dim + * dimensions. + */ template detail::size_type ParticleSpatialLayout::getNeighborSize( const neighbor_list& neighbors) const { @@ -253,29 +248,29 @@ namespace ippl { return totalSize; } - /** - * @brief This function determines to which rank particles need to be sent after the iteration step. - * It starts by first scanning direct rank neighbors, and only does a global scan if there are still - * unfound particles. It then calculates how many particles need to be sent to each rank and how many - * ranks are sent to in total. + * @brief This function determines to which rank particles need to be sent after the iteration + * step. It starts by first scanning direct rank neighbors, and only does a global scan if there + * are still unfound particles. It then calculates how many particles need to be sent to each + * rank and how many ranks are sent to in total. * * @param pc Particle Container - * @param ranks A vector the length of the number of particles on the current rank, where each value refers - * to the new rank of the particle - * @param invalid A vector marking the particles that need to be sent away, and thus locally deleted - * @param nSends_dview Device view the length of number of ranks, where each value determines the number - * of particles sent to that rank from the current rank + * @param ranks A vector the length of the number of particles on the current rank, where + * each value refers to the new rank of the particle + * @param invalid A vector marking the particles that need to be sent away, and thus + * locally deleted + * @param nSends_dview Device view the length of number of ranks, where each value determines + * the number of particles sent to that rank from the current rank * @param sends_dview Device view for the number of ranks that are sent to from current rank * * @return tuple with the number of particles sent away and the number of ranks sent to */ template template - std::pair ParticleSpatialLayout::locateParticles( - const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, + std::pair + ParticleSpatialLayout::locateParticles( + const ParticleContainer& pc, locate_type& ranks, bool_type& invalid, locate_type& nSends_dview, locate_type& sends_dview) const { - auto positions = pc.R.getView(); region_view_type Regions = rlayout_m.getdLocalRegions(); @@ -291,9 +286,9 @@ namespace ippl { locate_type outsideIds("Particles outside of neighborhood", size_type(pc.getLocalNum())); /// outsideCount: Tracks the number of particles that travelled outside of the neighborhood. - size_type outsideCount = 0; + size_type outsideCount = 0; /// invalidCount: Tracks the number of particles that need to be sent to other ranks. - size_type invalidCount = 0; + size_type invalidCount = 0; /// neighborSize: Size of a neighborhood in D dimentions. const size_type neighborSize = getNeighborSize(neighbors); @@ -301,11 +296,11 @@ namespace ippl { /// neighbors_view: Kokkos view with the IDs of the neighboring MPI ranks. locate_type neighbors_view("Nearest neighbors IDs", neighborSize); - /* red_val: Used to reduce both the number of invalid particles and the number of particles - * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction values, so we - * use the helper class increment_type). First element updates InvalidCount, second - * one updates outsideCount. - */ + /* red_val: Used to reduce both the number of invalid particles and the number of particles + * outside of the neighborhood (Kokkos::parallel_scan doesn't allow multiple reduction + * values, so we use the helper class increment_type). First element updates InvalidCount, + * second one updates outsideCount. + */ increment_type red_val; red_val.init(); @@ -317,7 +312,7 @@ namespace ippl { for (const auto& componentNeighbors : neighbors) { for (size_t j = 0; j < componentNeighbors.size(); ++j) { neighbors_mirror(k) = componentNeighbors[j]; - //std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl; + // std::cout << "Neighbor: " << neighbors_mirror(k) << std::endl; k++; } } @@ -338,21 +333,21 @@ namespace ippl { Kokkos::RangePolicy(0, ranks.extent(0)), KOKKOS_LAMBDA(const size_type i, increment_type& val, const bool final) { /* Step 1 - * inCurr: True if the particle hasn't left the current MPI rank. - * inNeighbor: True if the particle is found in a neighboring rank. - * found: True either if inCurr = True or inNeighbor = True. - * increment: Helper variable to update red_val. - */ - bool inCurr = false; + * inCurr: True if the particle hasn't left the current MPI rank. + * inNeighbor: True if the particle is found in a neighboring rank. + * found: True either if inCurr = True or inNeighbor = True. + * increment: Helper variable to update red_val. + */ + bool inCurr = false; bool inNeighbor = false; - bool found = false; + bool found = false; bool increment[2]; - inCurr = positionInRegion(is, positions(i), Regions(myRank)); + inCurr = positionInRegion(is, positions(i), Regions(myRank)); - ranks(i) = inCurr * myRank; - invalid(i) = !inCurr; - found = inCurr || found; + ranks(i) = inCurr * myRank; + invalid(i) = !inCurr; + found = inCurr || found; /// Step 2 for (size_t j = 0; j < neighbors_view.extent(0); ++j) { @@ -360,34 +355,34 @@ namespace ippl { inNeighbor = positionInRegion(is, positions(i), Regions(rank)); - ranks(i) = !(inNeighbor) * ranks(i) + inNeighbor * rank; - found = inNeighbor || found; + ranks(i) = !(inNeighbor)*ranks(i) + inNeighbor * rank; + found = inNeighbor || found; } /// Step 3 - /* isOut: When the last thread has finished the search, checks whether the particle has been found - * either in the current rank or in a neighboring one. - * Used to avoid race conditions when updating outsideIds. + /* isOut: When the last thread has finished the search, checks whether the particle + * has been found either in the current rank or in a neighboring one. Used to avoid + * race conditions when updating outsideIds. */ - if(final && !found) { + if (final && !found) { outsideIds(val.count[1]) = i; } - //outsideIds(val.count[1]) = i * isOut; + // outsideIds(val.count[1]) = i * isOut; increment[0] = invalid(i); increment[1] = !found; val += increment; - }, red_val); Kokkos::fence(); - invalidCount = red_val.count[0]; - outsideCount = red_val.count[1]; + invalidCount = red_val.count[0]; + outsideCount = red_val.count[1]; IpplTimings::stopTimer(neighborSearch); - /// Step 4 - static IpplTimings::TimerRef nonNeighboringParticles = IpplTimings::getTimer("nonNeighboringParticles"); + /// Step 4 + static IpplTimings::TimerRef nonNeighboringParticles = + IpplTimings::getTimer("nonNeighboringParticles"); IpplTimings::startTimer(nonNeighboringParticles); if (outsideCount > 0) { Kokkos::parallel_for( @@ -396,37 +391,35 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i, const size_type j) { /// pID: (local) ID of the particle that is currently being searched. size_type pId = outsideIds(i); - + /// inRegion: Checks whether particle pID is inside region j. bool inRegion = positionInRegion(is, positions(pId), Regions(j)); - if(inRegion){ + if (inRegion) { ranks(pId) = j; - } + } }); Kokkos::fence(); } IpplTimings::stopTimer(nonNeighboringParticles); - - Kokkos::parallel_for("Calculate nSends", - Kokkos::RangePolicy(0, ranks.extent(0)), - KOKKOS_LAMBDA(const size_t i){ - size_type rank = ranks(i); - Kokkos::atomic_fetch_add(&nSends_dview(rank),1); - } - ); - - // Number of Ranks we need to send to + + Kokkos::parallel_for( + "Calculate nSends", Kokkos::RangePolicy(0, ranks.extent(0)), + KOKKOS_LAMBDA(const size_t i) { + size_type rank = ranks(i); + Kokkos::atomic_fetch_add(&nSends_dview(rank), 1); + }); + + // Number of Ranks we need to send to Kokkos::View rankSends("Number of Ranks we need to send to"); - - Kokkos::parallel_for("Calculate sends", - Kokkos::RangePolicy(0, nSends_dview.extent(0)), - KOKKOS_LAMBDA(const size_t rank){ - if(nSends_dview(rank) != 0){ - size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1); - sends_dview(index) = rank; - } - } - ); + + Kokkos::parallel_for( + "Calculate sends", Kokkos::RangePolicy(0, nSends_dview.extent(0)), + KOKKOS_LAMBDA(const size_t rank) { + if (nSends_dview(rank) != 0) { + size_type index = Kokkos::atomic_fetch_add(&rankSends(), 1); + sends_dview(index) = rank; + } + }); size_type temp; Kokkos::deep_copy(temp, rankSends); @@ -454,7 +447,6 @@ namespace ippl { } }); Kokkos::fence(); - } template diff --git a/src/Utility/IpplTimings.cpp b/src/Utility/IpplTimings.cpp index 2648fe50f..8d75823fc 100644 --- a/src/Utility/IpplTimings.cpp +++ b/src/Utility/IpplTimings.cpp @@ -35,20 +35,22 @@ #ifdef IPPL_ENABLE_NSYS_PROFILER #include "nvtx3/nvToolsExt.h" -const uint32_t colors[] = { 0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff }; -const int num_colors = sizeof(colors)/sizeof(uint32_t); -#define PUSH_RANGE(name,cid) { \ - int color_id = cid; \ - color_id = color_id%num_colors;\ - nvtxEventAttributes_t eventAttrib = {0}; \ - eventAttrib.version = NVTX_VERSION; \ - eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ - eventAttrib.colorType = NVTX_COLOR_ARGB; \ - eventAttrib.color = colors[color_id]; \ - eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ - eventAttrib.message.ascii = name; \ - nvtxRangePushEx(&eventAttrib); \ -} +const uint32_t colors[] = {0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, + 0xff00ffff, 0xffff0000, 0xffffffff}; +const int num_colors = sizeof(colors) / sizeof(uint32_t); +#define PUSH_RANGE(name, cid) \ + { \ + int color_id = cid; \ + color_id = color_id % num_colors; \ + nvtxEventAttributes_t eventAttrib = {0}; \ + eventAttrib.version = NVTX_VERSION; \ + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ + eventAttrib.colorType = NVTX_COLOR_ARGB; \ + eventAttrib.color = colors[color_id]; \ + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ + eventAttrib.message.ascii = name; \ + nvtxRangePushEx(&eventAttrib); \ + } #endif Timing* IpplTimings::instance = new Timing(); @@ -88,9 +90,9 @@ Timing::TimerRef Timing::getTimer(const char* nm) { void Timing::startTimer(TimerRef t) { if (t >= TimerList.size()) return; - #ifdef IPPL_ENABLE_NSYS_PROFILER +#ifdef IPPL_ENABLE_NSYS_PROFILER PUSH_RANGE(TimerList[t]->name.c_str(), (int)t); - #endif +#endif TimerList[t]->start(); } @@ -98,9 +100,9 @@ void Timing::startTimer(TimerRef t) { void Timing::stopTimer(TimerRef t) { if (t >= TimerList.size()) return; - #ifdef IPPL_ENABLE_NSYS_PROFILER +#ifdef IPPL_ENABLE_NSYS_PROFILER nvtxRangePop(); - #endif +#endif TimerList[t]->stop(); } @@ -111,6 +113,12 @@ void Timing::clearTimer(TimerRef t) { TimerList[t]->clear(); } +const std::string& Timing::timerName(TimerRef t) { + if (t >= TimerList.size()) + return EmptyName; + return TimerList[t]->name; +} + // print out the timing results void Timing::print() { if (TimerList.size() < 1) diff --git a/src/Utility/IpplTimings.h b/src/Utility/IpplTimings.h index aea1332cd..e2ea93c5b 100644 --- a/src/Utility/IpplTimings.h +++ b/src/Utility/IpplTimings.h @@ -31,6 +31,7 @@ #include #include +#include "Utility/Logging.h" #include "Utility/PAssert.h" #include "Utility/Timer.h" #include "Utility/my_auto_ptr.h" @@ -117,6 +118,9 @@ struct Timing { // clear a timer, by turning it off and throwing away its time void clearTimer(TimerRef); + // access the timer's name + const std::string& timerName(TimerRef); + // return a TimerInfo struct by asking for the name TimerInfo* infoTimer(const char* nm) { return TimerMap[std::string(nm)]; } @@ -136,6 +140,8 @@ struct Timing { // a map of timers, keyed by string TimerMap_t TimerMap; + + std::string EmptyName; }; class IpplTimings { @@ -150,10 +156,16 @@ class IpplTimings { static TimerRef getTimer(const char* nm) { return instance->getTimer(nm); } // start a timer - static void startTimer(TimerRef t) { instance->startTimer(t); } + static void startTimer(TimerRef t) { + SPDLOG_TRACE("Starting timer [{}]", instance->timerName(t)); + instance->startTimer(t); + } // stop a timer, and accumulate it's values - static void stopTimer(TimerRef t) { instance->stopTimer(t); } + static void stopTimer(TimerRef t) { + SPDLOG_TRACE("Stopping timer [{}]", instance->timerName(t)); + instance->stopTimer(t); + } // clear a timer, by turning it off and throwing away its time static void clearTimer(TimerRef t) { instance->clearTimer(t); } diff --git a/src/Utility/Logging.h b/src/Utility/Logging.h new file mode 100644 index 000000000..99bc2d08b --- /dev/null +++ b/src/Utility/Logging.h @@ -0,0 +1,39 @@ +#pragma once + +#if defined(IPPL_LOGGING_ENABLED) + +#include +#include +#include +#include +#include +#include "Utility/PrintType.h" + +template +struct scoped_var { + // capture tuple elements by reference - no temp vars in constructor please + std::tuple const message_; + // + explicit scoped_var(Args const&... args) + : message_(args...) // + { + SPDLOG_CRITICAL("SCOPE >> enter << {}", message_); + } + + ~scoped_var() { SPDLOG_CRITICAL("SCOPE << leave >> {}", message_); } +}; +#define SPDLOG_SCOPE(...) scoped_var scope(__VA_ARGS__); + +#else + +// In increasing level +#define SPDLOG_TRACE(...) +#define SPDLOG_DEBUG(...) +#define SPDLOG_INFO(...) +#define SPDLOG_WARN(...) +#define SPDLOG_ERROR(...) +#define SPDLOG_CRITICAL(...) +// +#define SPDLOG_SCOPE(...) + +#endif diff --git a/src/Utility/PrintType.h b/src/Utility/PrintType.h new file mode 100644 index 000000000..c250c3d8a --- /dev/null +++ b/src/Utility/PrintType.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include +#include +#include +#include + +// gcc and clang both provide this heaader +#if __has_include() +#include +using cxxabi_supported__ = std::true_type; +#else +using cxxabi_supported__ = std::false_type; +// create some dummmy function to make the compiler happy in the true_type instantiation +namespace abi { + template + char* __cxa_demangle(Ts... ts) { + return nullptr; + } +} // namespace abi +#endif + +// -------------------------------------------------------------------- +namespace ippl::debug::detail { + // default : use built-in typeid to get the best info we can + template + struct demangle_helper { + char const* type_id() const { return typeid(T).name(); } + }; + + // if available : demangle an arbitrary c++ type using gnu utility + template + struct demangle_helper { + demangle_helper() + : demangled_{abi::__cxa_demangle(typeid(T).name(), nullptr, nullptr, nullptr), + std::free} {} + + char const* type_id() const { return demangled_ ? demangled_.get() : typeid(T).name(); } + + private: + std::unique_ptr demangled_; + }; + + template + using cxx_type_id = demangle_helper; +} // namespace ippl::debug::detail + +// -------------------------------------------------------------------- +// print type information +// usage : std::cout << debug::print_type("separator") +// separator is appended if the number of types > 1 +// -------------------------------------------------------------------- +namespace ippl::debug { + template // print a single type + inline std::string print_type(char const* = "") { + return std::string(detail::cxx_type_id().type_id()); + } + + template <> // fallback for an empty type + inline std::string print_type<>(char const*) { + return "<>"; + } + + template // print a list of types + inline std::enable_if_t print_type(char const* delim = "") { + std::string temp(print_type()); + return temp + delim + print_type(delim); + } +} // namespace ippl::debug diff --git a/unit_tests/Communicate/BufferHandler.cpp b/unit_tests/Communicate/BufferHandler.cpp index eef387da9..15b1b6bf2 100644 --- a/unit_tests/Communicate/BufferHandler.cpp +++ b/unit_tests/Communicate/BufferHandler.cpp @@ -14,10 +14,10 @@ class TypedBufferHandlerTest : public ::testing::Test { protected: using memory_space = MemorySpace; - class TestableBufferHandler : public ippl::DefaultBufferHandler { + class TestableBufferHandler : public ippl::comms::DefaultBufferHandler { public: - using ippl::DefaultBufferHandler::deleteAllBuffers; - using ippl::DefaultBufferHandler::freeAllBuffers; + using ippl::comms::DefaultBufferHandler::deleteAllBuffers; + using ippl::comms::DefaultBufferHandler::freeAllBuffers; size_t usedBuffersSize() const { return this->used_buffers.size(); } @@ -109,9 +109,9 @@ TYPED_TEST(TypedBufferHandlerTest, GetBuffer_ExactSizeMatch) { // Test: Freeing a buffer that does not exist in the used pool has no effect TYPED_TEST(TypedBufferHandlerTest, FreeNonExistentBuffer) { - auto buffer = this->handler->getBuffer(100, 1.0); - auto newBuffer = - std::make_shared>(200); + using archive_type = ippl::comms::archive_buffer; + auto buffer = this->handler->getBuffer(100, 1.0); + auto newBuffer = std::make_shared(200); this->handler->freeBuffer(newBuffer); EXPECT_EQ(this->handler->usedBuffersSize(), 1); @@ -182,7 +182,8 @@ TYPED_TEST(TypedBufferHandlerTest, GetAllocatedAndFreeSize_AfterDeleteAllBuffers EXPECT_EQ(this->handler->getFreeSize(), 0); } -// Test: Buffer size is correctly accounted for if a free buffer is available but we request a larger one, thus reallocating this one +// Test: Buffer size is correctly accounted for if a free buffer is available but we request a +// larger one, thus reallocating this one TYPED_TEST(TypedBufferHandlerTest, GetAllocatedAndFreeSize_ResizeBufferLargerThanAvailable) { auto smallBuffer = this->handler->getBuffer(50, 1.0); this->handler->freeBuffer(smallBuffer); diff --git a/unit_tests/Communicate/LoggingBufferHandler.cpp b/unit_tests/Communicate/LoggingBufferHandler.cpp index a0db38366..8a1d9f38e 100644 --- a/unit_tests/Communicate/LoggingBufferHandler.cpp +++ b/unit_tests/Communicate/LoggingBufferHandler.cpp @@ -12,14 +12,14 @@ class TypedLoggingBufferHandlerTest : public ::testing::Test { protected: void SetUp() override { rank = 0; - this->bufferHandler = std::make_shared>(); + this->bufferHandler = std::make_shared>(); this->loggingHandler = - std::make_shared>(bufferHandler, rank); + std::make_shared>(bufferHandler, rank); } int rank; - std::shared_ptr> bufferHandler; - std::shared_ptr> loggingHandler; + std::shared_ptr> bufferHandler; + std::shared_ptr> loggingHandler; }; TYPED_TEST_SUITE(TypedLoggingBufferHandlerTest, MemorySpaces);