diff --git a/CMakeLists.txt b/CMakeLists.txt index 21951fd7ea1..8b3c611e306 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -237,12 +237,14 @@ if (LBANN_WITH_DISTCONV) find_package(DiHydrogen 0.3.0 CONFIG REQUIRED COMPONENTS Meta Patterns DistConv) set(LBANN_HAS_DISTCONV TRUE) set(LBANN_H2_LIBS + H2::H2Core H2::H2Meta H2::H2Patterns H2::H2DistConv) else () find_package(DiHydrogen CONFIG REQUIRED COMPONENTS Meta Patterns) set(LBANN_H2_LIBS + H2::H2Core H2::H2Meta H2::H2Patterns) endif () @@ -660,6 +662,7 @@ target_link_libraries(lbann PUBLIC ${CLARA_LIBRARIES} ${LBANN_PYTHON_LIBS} protobuf::libprotobuf + spdlog::spdlog ${CEREAL_LIBRARIES} ZSTR::ZSTR) diff --git a/include/lbann/lbann.hpp b/include/lbann/lbann.hpp index 09201768733..e1a65b4b84c 100644 --- a/include/lbann/lbann.hpp +++ b/include/lbann/lbann.hpp @@ -231,5 +231,6 @@ #include "lbann/utils/stack_profiler.hpp" #include "lbann/utils/stack_trace.hpp" #include "lbann/utils/summary.hpp" +#include "lbann/utils/logging.hpp" #endif // LBANN_LBANN_HPP_INCLUDED diff --git a/include/lbann/utils/CMakeLists.txt b/include/lbann/utils/CMakeLists.txt index 9d6dd9fa10b..309462983d5 100644 --- a/include/lbann/utils/CMakeLists.txt +++ b/include/lbann/utils/CMakeLists.txt @@ -52,6 +52,7 @@ set_full_path(THIS_DIR_HEADERS im2col.hpp jag_utils.hpp lbann_library.hpp + logging.hpp make_abstract.hpp memory.hpp mild_exception.hpp diff --git a/include/lbann/utils/exception.hpp b/include/lbann/utils/exception.hpp index 7671ec04931..537292ec934 100644 --- a/include/lbann/utils/exception.hpp +++ b/include/lbann/utils/exception.hpp @@ -28,6 +28,7 @@ #define LBANN_UTILS_EXCEPTION_HPP_INCLUDED #include "lbann/comm.hpp" +#include "lbann/utils/logging.hpp" #include #include @@ -50,22 +51,7 @@ } while (0) // Macro to print a warning to standard error stream. -#define LBANN_WARNING(...) \ - do { \ - const int rank_LBANN_WARNING = lbann::get_rank_in_world(); \ - std::cerr << lbann::build_string( \ - "LBANN warning", \ - (rank_LBANN_WARNING >= 0 \ - ? " on rank " + std::to_string(rank_LBANN_WARNING) \ - : std::string()), \ - " (", \ - __FILE__, \ - ":", \ - __LINE__, \ - "): ", \ - __VA_ARGS__) \ - << std::endl; \ - } while (0) +#define LBANN_WARNING(...) LBANN_WARN(lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__) // Macro to print a message to standard cout stream. #define LBANN_MSG(...) \ diff --git a/include/lbann/utils/logging.hpp b/include/lbann/utils/logging.hpp new file mode 100644 index 00000000000..9910cd1397b --- /dev/null +++ b/include/lbann/utils/logging.hpp @@ -0,0 +1,115 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. +// Written by the LBANN Research Team (B. Van Essen, et al.) listed in +// the CONTRIBUTORS file. +// +// LLNL-CODE-697807. +// All rights reserved. +// +// This file is part of LBANN: Livermore Big Artificial Neural Network +// Toolkit. For details, see http://software.llnl.gov/LBANN or +// https://github.com/LLNL/LBANN. +// +// Licensed under the Apache License, Version 2.0 (the "Licensee"); you +// may not use this file except in compliance with the License. You may +// obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the license. +//////////////////////////////////////////////////////////////////////////////// + +#ifndef LBANN_LOGGING_HPP_INCLUDED +#define LBANN_LOGGING_HPP_INCLUDED + +#include

+ +#include +#include +#include + +namespace lbann { +namespace logging { + +// Better than using raw strings +enum LBANN_Logger_ID +{ + LOG_RT, + LOG_IO, + LOG_TRAIN, +}; + +// +void setup_loggers(); + +// Raw string may be useful for debugging +char const* logger_id_str(LBANN_Logger_ID id); + +// Access the actual logger object +h2::Logger& get(LBANN_Logger_ID id); + +}// namespace logging +}// namespace lbann + +// #defines can go here. Make sure they can go anywhere: +#define LBANN_LOG(logger_id, level, ...) \ + do { \ + auto& lbann_log_logger = ::lbann::logging::get(logger_id); \ + if (lbann_log_logger.should_log(level)) { \ + lbann_log_logger.get().log(::spdlog::source_loc{__FILE__, __LINE__, SPDLOG_FUNCTION}, ::h2::to_spdlog_level(level), __VA_ARGS__); \ + } \ + } while (0) + +#define LBANN_TRACE(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::TRACE, __VA_ARGS__) +#define LBANN_DEBUG(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::DEBUG, __VA_ARGS__) +#define LBANN_INFO(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::INFO, __VA_ARGS__) +#define LBANN_WARN(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::WARN, __VA_ARGS__) +#define LBANN_ERR(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::ERROR, __VA_ARGS__) +#define LBANN_CRIT(logger_id, ...) LBANN_LOG(logger_id, ::h2::Logger::LogLevelType::CRITICAL, __VA_ARGS__) + +// Run time +#define LBANN_RT_TRACE(...) LBANN_TRACE(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__) + +#define LBANN_RT_DEBUG(...) LBANN_DEBUG(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__) + +#define LBANN_RT_INFO(...) LBANN_INFO(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__) + +#define LBANN_RT_WARN(...) LBANN_WARN(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__) + +#define LBANN_RT_ERR(...) LBANN_ERR(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__) + +#define LBANN_RT_CRIT(...) LBANN_CRIT(::lbann::logging::LBANN_Logger_ID::LOG_RT, __VA_ARGS__) + +// IO +#define LBANN_IO_TRACE(...) LBANN_TRACE(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__) + +#define LBANN_IO_DEBUG(...) LBANN_DEBUG(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__) + +#define LBANN_IO_INFO(...) LBANN_INFO(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__) + +#define LBANN_IO_WARN(...) LBANN_WARN(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__) + +#define LBANN_IO_ERR(...) LBANN_ERR(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__) + +#define LBANN_IO_CRIT(...) LBANN_CRIT(::lbann::logging::LBANN_Logger_ID::LOG_IO, __VA_ARGS__) + +// Training +#define LBANN_TRAIN_TRACE(...) LBANN_TRACE(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__) + +#define LBANN_TRAIN_DEBUG(...) LBANN_DEBUG(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__) + +#define LBANN_TRAIN_INFO(...) LBANN_INFO(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__) + +#define LBANN_TRAIN_WARN(...) LBANN_WARN(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__) + +#define LBANN_TRAIN_ERR(...) LBANN_ERR(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__) + +#define LBANN_TRAIN_CRIT(...) LBANN_CRIT(::lbann::logging::LBANN_Logger_ID::LOG_TRAIN, __VA_ARGS__) + + +#endif // LBANN_LOGGING_HPP_INCLUDED diff --git a/src/base.cpp b/src/base.cpp index 5e1b851c9b6..3f1c2b79add 100644 --- a/src/base.cpp +++ b/src/base.cpp @@ -45,6 +45,7 @@ #include "lbann/utils/omp_diagnostics.hpp" #include "lbann/utils/options.hpp" #include "lbann/utils/stack_trace.hpp" +#include "lbann/utils/logging.hpp" #ifdef LBANN_HAS_DNN_LIB #include "lbann/utils/dnn_lib/helpers.hpp" @@ -85,7 +86,6 @@ lbann_comm& get_current_comm() noexcept { return *world_comm_; } auto lbann::initialize_lbann(El::mpi::Comm&& c) -> std::unique_ptr { - // Parse command-line arguments and environment variables auto& arg_parser = global_argument_parser(); (void)arg_parser; @@ -191,6 +191,9 @@ void lbann::finalize_lbann(lbann_comm* comm) auto lbann::initialize(int& argc, char**& argv) -> world_comm_ptr { + //FIXME(KLG): Can this go here? + logging::setup_loggers(); + // Parse command-line arguments and environment variables auto& arg_parser = global_argument_parser(); (void)arg_parser; diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index 8ae72501bcd..05557d5a33c 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -37,6 +37,7 @@ set_full_path(THIS_DIR_SOURCES im2col.cpp jag_common.cpp lbann_library.cpp + logging.cpp miopen.cpp number_theory.cpp omp_diagnostics.cpp diff --git a/src/utils/logging.cpp b/src/utils/logging.cpp new file mode 100644 index 00000000000..4d80e592da5 --- /dev/null +++ b/src/utils/logging.cpp @@ -0,0 +1,79 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory. +// Written by the LBANN Research Team (B. Van Essen, et al.) listed in +// the CONTRIBUTORS file. +// +// LLNL-CODE-697807. +// All rights reserved. +// +// This file is part of LBANN: Livermore Big Artificial Neural Network +// Toolkit. For details, see http://software.llnl.gov/LBANN or +// https://github.com/LLNL/LBANN. +// +// Licensed under the Apache License, Version 2.0 (the "Licensee"); you +// may not use this file except in compliance with the License. You may +// obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the license. +//////////////////////////////////////////////////////////////////////////////// + +#include +#include "lbann/utils/exception.hpp" +#include

+ +#include +#include +#include + +namespace lbann { +namespace logging { + +static h2::Logger io_logger("IO"); +static h2::Logger rt_logger("RT"); +static h2::Logger train_logger("TRAIN"); +static std::vector logger_vec; + +void setup_loggers() +{ + logger_vec.insert(logger_vec.end(), { + &io_logger, &rt_logger, &train_logger }); + h2::setup_levels(logger_vec, "LBANN_LOG_LEVEL"); +} + +char const* logger_id_str(LBANN_Logger_ID id) +{ + switch (id) { + case LBANN_Logger_ID::LOG_RT: + return "LOG_RT"; + case LBANN_Logger_ID::LOG_IO: + return "LOG_IO"; + case LBANN_Logger_ID::LOG_TRAIN: + return "LOG_TRAIN"; + default: + throw lbann_exception("Unknown LBANN_Logger_ID"); + } +} + +h2::Logger& get(LBANN_Logger_ID id) +{ + switch (id) { + case LBANN_Logger_ID::LOG_RT: + return rt_logger; + case LBANN_Logger_ID::LOG_IO: + return io_logger; + case LBANN_Logger_ID::LOG_TRAIN: + return train_logger; + default: + throw lbann_exception("Unknown LBANN_Logger_ID"); + } +} + +}// namespace logging +}// namespace lbann