From f1d1a6e5bece475ccc5007fdb351785e25717454 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Mon, 22 Dec 2025 14:01:39 -0800 Subject: [PATCH] src/analysis/nanopore.cpp: adding a more verbose error message when the modifications are not as expected --- src/analysis/nanopore.cpp | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/src/analysis/nanopore.cpp b/src/analysis/nanopore.cpp index ad693c53..0027e74a 100644 --- a/src/analysis/nanopore.cpp +++ b/src/analysis/nanopore.cpp @@ -313,6 +313,11 @@ struct mod_prob_buffer { bam_parse_basemod(aln.b, m.get()); // ADS: or bam_parse_basemod2(aln.b, m, HTS_MOD_REPORT_UNCHECKED) + int n_types{}; + const auto types = bam_mods_recorded(m.get(), &n_types); + if (n_types < 2 || (types[h_idx] != 'h' && types[m_idx] != 'm')) + return false; + int pos{}; int n{}; while ((n = bam_next_basemod(aln.b, m.get(), d, max_mods, &pos)) > 0) { @@ -502,7 +507,7 @@ struct mod_prob_stats { mod_prob_stats() : m{hts_base_mod_state_alloc(), &hts_base_mod_state_free} {}; - [[nodiscard]] auto + auto operator()(const bamxx::bam_rec &aln) { static constexpr auto h_idx = 0; static constexpr auto m_idx = 1; @@ -514,6 +519,11 @@ struct mod_prob_stats { bam_parse_basemod(aln.b, m.get()); // ADS: or bam_parse_basemod2(aln.b, m, HTS_MOD_REPORT_UNCHECKED) + int n_types{}; + const auto types = bam_mods_recorded(m.get(), &n_types); + if (n_types < 2 || (types[h_idx] != 'h' && types[m_idx] != 'm')) + return; + int pos{}; int n{}; while ((n = bam_next_basemod(aln.b, m.get(), d, max_mods, &pos)) > 0) { @@ -907,7 +917,8 @@ struct read_processor { [[nodiscard]] static auto valid_modification_types(const std::string &infile, - const std::uint32_t n_reads_to_check) -> bool { + const std::uint32_t n_reads_to_check) + -> std::pair { using mstate = hts_base_mod_state; std::unique_ptr m(hts_base_mod_state_alloc(), &hts_base_mod_state_free); @@ -918,6 +929,8 @@ valid_modification_types(const std::string &infile, if (!hdr) throw std::runtime_error("failed to read header"); + std::string message; + std::uint32_t read_count{}; bool valid_types{true}; bamxx::bam_rec aln; @@ -929,9 +942,17 @@ valid_modification_types(const std::string &infile, // ADS: or bam_parse_basemod2(aln.b, m, HTS_MOD_REPORT_UNCHECKED) int n_types{}; const auto types = bam_mods_recorded(m.get(), &n_types); - valid_types = n_types >= 2 && types[0] == 'h' && types[1] == 'm'; + valid_types = (n_types == 1 && types[0] == 'C') || + (n_types >= 2 && types[0] == 'h' && types[1] == 'm'); + if (!valid_types) { + message = "n_types: " + std::to_string(n_types) + "\n"; + for (auto i = 0; i < n_types; ++i) { + message += "type[" + std::to_string(i) + + "]=" + std::to_string(static_cast(types[i])) + "\n"; + } + } } - return valid_types; + return std::make_pair(valid_types, message); } [[nodiscard]] static auto @@ -1053,9 +1074,11 @@ main_nanocount(int argc, char *argv[]) { // NOLINT(*-avoid-c-arrays) std::ostringstream cmd; std::copy(argv, argv + argc, std::ostream_iterator(cmd, " ")); - if (!valid_modification_types(mapped_reads_file, n_reads_to_check)) { - std::cerr << "modification types are not valid\n" - << "expected h=0 and m=1\n"; + const auto [is_valid, message] = + valid_modification_types(mapped_reads_file, n_reads_to_check); + if (!is_valid) { + std::cerr << "modification types are not valid. Violation:\n" + << message << '\n'; return EXIT_FAILURE; }