From f9f99968ac4f120de58699908f46aae6ac9a3594 Mon Sep 17 00:00:00 2001 From: dmonti Date: Mon, 26 Jan 2026 09:50:11 -0800 Subject: [PATCH 01/20] added recombination info to output --- src/algorithms/chain_items.cpp | 116 +++++++++++++++++++++------ src/algorithms/chain_items.hpp | 20 ++++- src/minimizer_mapper_from_chains.cpp | 30 ++++--- 3 files changed, 131 insertions(+), 35 deletions(-) diff --git a/src/algorithms/chain_items.cpp b/src/algorithms/chain_items.cpp index c31839910e..8306d56abd 100644 --- a/src/algorithms/chain_items.cpp +++ b/src/algorithms/chain_items.cpp @@ -48,26 +48,32 @@ void TracedScore::max_in(const vector& options, size_t option_numbe // This is the new winner. this->score = option.score; this->source = option_number; + this->paths = option.paths; + this->rec_num = option.rec_num; } } TracedScore TracedScore::score_from(const vector& options, size_t option_number) { TracedScore got = options[option_number]; got.source = option_number; + got.paths = options[option_number].paths; + got.rec_num = options[option_number].rec_num; return got; } TracedScore TracedScore::add_points(int adjustment) const { - return {this->score + adjustment, this->source, this->paths}; + return {this->score + adjustment, this->source, this->paths, this->rec_num}; } TracedScore TracedScore::set_shared_paths(const std::pair& new_paths) const { size_t updated_paths; + size_t rec_num = this->rec_num; if(new_paths.first == new_paths.second) { // if the paths are the same, there is no recombination inside the anchor. check if there is a recombination between anchors now if ((this->paths & new_paths.first) == 0) { // there is a recombination between anchors, so we "reset" the current paths updated_paths = new_paths.first; + rec_num++; } else { // there is no recombination between anchors, so we update the current paths updated_paths = this->paths & new_paths.first; @@ -79,7 +85,8 @@ TracedScore TracedScore::set_shared_paths(const std::pair& new_pa return { this->score, this->source, - updated_paths + updated_paths, + rec_num }; } @@ -783,7 +790,10 @@ vector, int>> chain_items_traceback(const vector(chain_scores[here].paths) << std::endl; + //std::cerr << "\t\tanchor #" << here << ": " << to_chain[here] << std::endl; // Mark here as used. Happens once per item, and so limits runtime. item_is_used[here] = true; size_t next = chain_scores[here].source; @@ -804,6 +814,7 @@ vector, int>> chain_items_traceback(const vector, int>> chain_items_traceback(const vector>> find_best_chains(const VectorView& to_chain, +ChainsResult find_best_chains(const VectorView& to_chain, const SnarlDistanceIndex& distance_index, const HandleGraph& graph, int gap_open, @@ -840,9 +851,14 @@ vector>> find_best_chains(const VectorView& to_ double points_per_possible_match, size_t max_indel_bases, bool show_work) { - + + ChainsResult result; if (to_chain.empty()) { - return {{0, vector()}}; + ChainWithRec empty_entry; + empty_entry.scored_chain = {0, vector()}; + empty_entry.rec_positions = {}; + result.chains.emplace_back(std::move(empty_entry)); + return result; } // We actually need to do DP @@ -861,24 +877,71 @@ vector>> find_best_chains(const VectorView& to_ max_indel_bases, recomb_penalty, show_work); + //std::cerr << "[REC INFO] Recombination number for chain: " << best_past_ending_score_ever.rec_num << "\tscore: " << best_past_ending_score_ever.score << "\tpaths: " << best_past_ending_score_ever.paths << std::endl; // Then do the tracebacks vector, int>> tracebacks = chain_items_traceback( chain_scores, to_chain, best_past_ending_score_ever, item_bonus, item_scale, max_chains); if (tracebacks.empty()) { // Somehow we got nothing - return {{0, vector()}}; + ChainWithRec empty_entry; + empty_entry.scored_chain = {0, vector()}; + empty_entry.rec_positions = {}; + result.chains.emplace_back(std::move(empty_entry)); + return result; } // Convert from traceback and penalty to score and traceback. // Everything is already sorted. - vector>> to_return; - to_return.reserve(tracebacks.size()); + result.chains.reserve(tracebacks.size()); for (auto& traceback : tracebacks) { // Move over the list of items and convert penalty to score - to_return.emplace_back(best_past_ending_score_ever.score - traceback.second, std::move(traceback.first)); + int score = best_past_ending_score_ever.score - traceback.second; + std::vector chain_indexes = std::move(traceback.first); + + // Compute the anchor indices in this chain that introduce an + // inter-anchor recombination event. We simulate the path-bit + // propagation along the chain using the same logic as + // TracedScore::set_shared_paths (but without modifying the state). + std::vector rec_positions; + if (!chain_indexes.empty()) { + // Start with the endpoint paths of the first anchor. + size_t first_idx = chain_indexes.front(); + path_flags_t current_paths = to_chain[first_idx].anchor_end_paths(); + + // Walk the chain from the second anchor onward and apply the + // same recombination-detection rules used in set_shared_paths. + for (size_t k = 1; k < chain_indexes.size(); ++k) { + size_t anchor_idx = chain_indexes[k]; + auto new_paths = to_chain[anchor_idx].anchor_paths(); + // If the anchor's start and end paths are equal, it's not an + // internally recombinant anchor; check inter-anchor overlap. + if (new_paths.first == new_paths.second) { + if ((current_paths & new_paths.first) == 0) { + // No overlap -> inter-anchor recombination occurred here. + rec_positions.push_back(anchor_idx); + // Reset current paths to the anchor's start paths. + current_paths = new_paths.first; + } else { + // Intersect supported paths and continue. + current_paths &= new_paths.first; + } + } else { + // Recombinant anchor: do not count as inter-anchor + // recombination per original logic; reset paths to the + // anchor's end paths. + // TODO: since no fragmenting, probably unnecessary to track + current_paths = new_paths.second; + } + } + } + + ChainWithRec entry; + entry.scored_chain = {score, std::move(chain_indexes)}; + entry.rec_positions = std::move(rec_positions); + result.chains.emplace_back(std::move(entry)); } - return to_return; + return result; } pair> find_best_chain(const VectorView& to_chain, @@ -894,21 +957,24 @@ pair> find_best_chain(const VectorView& to_chain, double points_per_possible_match, size_t max_indel_bases) { - return find_best_chains( - to_chain, - distance_index, - graph, - gap_open, - gap_extension, - recomb_penalty, - 1, - for_each_transition, - item_bonus, - item_scale, - gap_scale, - points_per_possible_match, - max_indel_bases - ).front(); + { + ChainsResult cr = find_best_chains( + to_chain, + distance_index, + graph, + gap_open, + gap_extension, + recomb_penalty, + 1, + for_each_transition, + item_bonus, + item_scale, + gap_scale, + points_per_possible_match, + max_indel_bases + ); + return cr.chains.front().scored_chain; + } } int score_best_chain(const VectorView& to_chain, const SnarlDistanceIndex& distance_index, diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp index be46d3d1a2..9ac7b792e0 100644 --- a/src/algorithms/chain_items.hpp +++ b/src/algorithms/chain_items.hpp @@ -350,6 +350,7 @@ class TracedScore { size_t source; /// Supported paths path_flags_t paths; + size_t rec_num=0; }; } @@ -396,6 +397,23 @@ struct transition_info { : from_anchor(from), to_anchor(to), graph_distance(graph_dist), read_distance(read_dist) {} }; +/// A single chain result: scored chain plus the recombination count observed +/// on its endpoint. +struct ChainWithRec { + std::pair> scored_chain; + // Positions (anchor indices) in the chain that introduce a recombination + // event between anchors. These correspond to anchors where we had to + // reset supported paths because the previous path set did not overlap + // with the next anchor's start paths. + std::vector rec_positions; +}; + +/// Result of finding best chains: a list of chains each paired with the +/// recombination count observed at that chain's endpoint. +struct ChainsResult { + std::vector chains; +}; + /** * Iteratee function type which can be called with each transition between * anchors. @@ -538,7 +556,7 @@ vector, int>> chain_items_traceback(const vector>> find_best_chains(const VectorView& to_chain, +ChainsResult find_best_chains(const VectorView& to_chain, const SnarlDistanceIndex& distance_index, const HandleGraph& graph, int gap_open, diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp index b8509e8959..e85bf64fac 100644 --- a/src/minimizer_mapper_from_chains.cpp +++ b/src/minimizer_mapper_from_chains.cpp @@ -1433,7 +1433,7 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& VectorView anchor_view {anchors_to_chain, anchor_indexes}; // This will hold our chaining results - std::vector>> results; + algorithms::ChainsResult chain_results; if (show_work) { #pragma omp critical (cerr) @@ -1461,7 +1461,7 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& graph_lookback_limit, read_lookback_limit ); - results = algorithms::find_best_chains( + chain_results = algorithms::find_best_chains( anchor_view, *distance_index, gbwt_graph, @@ -1479,17 +1479,19 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& ); if (show_work) { #pragma omp critical (cerr) - cerr << log_name() << "Found " << results.size() << " chains in zip code tree " << item_num + cerr << log_name() << "Found " << chain_results.chains.size() << " chains in zip code tree " << item_num << " running " << anchors_to_chain[anchor_indexes.front()] << " to " << anchors_to_chain[anchor_indexes.back()] << std::endl; } - for (size_t result = 0; result < results.size(); result++) { + for (size_t result = 0; result < chain_results.chains.size(); result++) { // For each result - auto& scored_chain = results[result]; - if (show_work) { + auto& entry = chain_results.chains[result]; + auto& scored_chain = entry.scored_chain; + auto& chain_rec_positions = entry.rec_positions; + if (true) { #ifdef debug - if(true) + if(show_work) #else if (result < MANY_LIMIT) #endif @@ -1498,9 +1500,19 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& #pragma omp critical (cerr) { cerr << log_name() << "\tChain with score " << scored_chain.first - << " and length " << scored_chain.second.size() + << " (rec num =" << chain_rec_positions.size() << ") and length " << scored_chain.second.size() << " running " << anchor_view[scored_chain.second.front()] << " to " << anchor_view[scored_chain.second.back()] << std::endl; + if (!chain_rec_positions.empty()) { + { + cerr << log_name() << "\t\tRecombination introduced at anchors: "; + for (size_t pi = 0; pi < chain_rec_positions.size(); ++pi) { + if (pi) cerr << ", "; + cerr << chain_rec_positions[pi]; + } + cerr << std::endl; + } + } #ifdef debug for (auto& anchor_number : scored_chain.second) { @@ -1512,7 +1524,7 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& } } else if (result == MANY_LIMIT) { #pragma omp critical (cerr) - std::cerr << log_name() << "\t<" << (results.size() - result) << " more chains>" << std::endl; + std::cerr << log_name() << "\t<" << (chain_results.chains.size() - result) << " more chains>" << std::endl; } } From a4ff94230e6d223854d2df63bd3ab475a6274f34 Mon Sep 17 00:00:00 2001 From: dmonti Date: Mon, 2 Feb 2026 07:45:16 -0800 Subject: [PATCH 02/20] scatter.py can now show recombinant anchors --- scripts/scatter.py | 32 ++++++++++++-- src/minimizer_mapper.hpp | 3 +- src/minimizer_mapper_from_chains.cpp | 62 +++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 15 deletions(-) diff --git a/scripts/scatter.py b/scripts/scatter.py index 39764c9500..8d99e00774 100755 --- a/scripts/scatter.py +++ b/scripts/scatter.py @@ -99,6 +99,8 @@ def parse_args(args): help="width of connecting line") parser.add_argument("--tsv", action="store_true", help="use only tabs as separators in input file") + parser.add_argument("--show_rec", action="store_true", + help="highlight points whose last TSV column is REC by overplotting them") parser.add_argument("--no_sort", dest="sort", action="store_false", help="do not sort categories from input file") parser.add_argument("--categories", nargs="+", default=None, @@ -419,8 +421,9 @@ def main(args): # Make the figure with the appropriate size. figure = pyplot.figure(figsize=(options.width, options.height)) - # This holds lists of x, y, weight lists for each data series. - series = collections.defaultdict(lambda: [list(), list(), list()]) + # This holds lists of x, y, weight lists and rec flags for each data series. + # series[name] -> [x_list, y_list, weight_list, rec_flag_list] + series = collections.defaultdict(lambda: [list(), list(), list(), list()]) # This holds the order in which series were first encountered initial_series_order = collections.OrderedDict() @@ -434,10 +437,19 @@ def main(args): for line in options.data: # Unpack the line, splitting on tabs only if requested - parts = line.split("\t" if options.tsv else None) + parts = line.rstrip("\n").split("\t" if options.tsv else None) + # Strip whitespace from each field + parts = [p.strip() for p in parts] # We can weight samples weight = 1 + rec_flag = False + + # If the last column is the REC sentinel, mark and remove it so the + # remaining parsing can proceed normally. + if len(parts) >= 1 and parts[-1] == "REC": + rec_flag = True + parts = parts[:-1] if options.dotplot: # We parse a two-column name/sample format @@ -464,6 +476,7 @@ def main(args): if len(parts) >= 4: # There's also a weight weight = float(parts[3]) + # If there was a REC column originally, rec_flag already captured it else: # Use the default series @@ -477,6 +490,7 @@ def main(args): series[series_name][0].append(x_value) series[series_name][1].append(y_value) series[series_name][2].append(weight) + series[series_name][3].append(rec_flag) # Note this series in the ordering if we haven't already initial_series_order[series_name] = None @@ -688,6 +702,18 @@ def main(args): else: plotted_items.append(plot_result) + # If requested, highlight points marked REC in the input's last column + if options.show_rec: + rec_flags = series[series_name][3] + if any(rec_flags): + rec_x = [series[series_name][0][i] for i, f in enumerate(rec_flags) if f] + rec_y = [series[series_name][1][i] for i, f in enumerate(rec_flags) if f] + # Choose a sensible marker size if none set + rec_ms = options.marker_size if options.marker_size is not None else 40 + # Overplot REC points with red crosses + rec_plot = series_plot.scatter(rec_x, rec_y, marker='x', color='red', s=rec_ms, zorder=10) + plotted_items.append(rec_plot) + if options.fit: # We need to do a curve fit diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index ef50dc6f0f..3b3a4963d1 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -867,7 +867,7 @@ class MinimizerMapper : public AlignerClient { */ void do_chaining_on_trees(Alignment& aln, const ZipCodeForest& zip_code_forest, const std::vector& seeds, const VectorView& minimizers, const vector& seed_anchors, - std::vector>& chains, std::vector& chain_source_tree, + std::vector>& chains, std::vector>& chain_rec_flags, std::vector& chain_source_tree, std::vector& chain_score_estimates, std::vector>& minimizer_kept_chain_count, std::vector& multiplicity_by_chain, std::vector& alignments, SmallBitset& minimizer_explored, vector& multiplicity_by_alignment, @@ -1462,6 +1462,7 @@ class MinimizerMapper : public AlignerClient { const std::vector& seeds, const VectorView& minimizers, const std::vector>& chains, + const std::vector>& chain_rec_flags, const std::vector& chain_source_tree, const PathPositionHandleGraph* path_graph); diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp index e85bf64fac..d0df8bcc41 100644 --- a/src/minimizer_mapper_from_chains.cpp +++ b/src/minimizer_mapper_from_chains.cpp @@ -31,6 +31,7 @@ #include #include #include +#include // Turn on debugging prints //#define debug @@ -190,6 +191,7 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest, const std::vector& seeds, const VectorView& minimizers, const std::vector>& chains, + const std::vector>& chain_rec_flags, const std::vector& chain_source_tree, const PathPositionHandleGraph* path_graph) { if (!path_graph) { @@ -257,10 +259,19 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest, for (size_t run_number = 0; run_number < kv.second.size(); run_number++) { // For each run of seeds in it const std::vector& included_seeds = kv.second[run_number]; - for (auto& seed_num : included_seeds) { - // For each seed in the run + for (size_t idx = 0; idx < included_seeds.size(); ++idx) { + // For each seed in the run (index-based so we can consult chain flags) + size_t seed_num = included_seeds[idx]; auto& seed = seeds.at(seed_num); + // Determine whether this seed (in chain mode) is from an anchor that is recombinant + bool is_recomb = false; + if (!marker.empty() && marker == "chain") { + if (chain_num < chain_rec_flags.size() && idx < chain_rec_flags[chain_num].size()) { + is_recomb = chain_rec_flags[chain_num][idx]; + } + } + // Get its effective path positions auto& offsets = seed_positions.at(seed_num); @@ -280,6 +291,16 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest, exp.field(position.first); // Offset in read *of the pin point* (not of the forward-strand start of the minimizer) exp.field(minimizers[seed.source].pin_offset()); + // Recombination flag column (only meaningful for chain rows) + if (!marker.empty()) { + if (marker == "chain") { + exp.field(is_recomb ? "REC" : ""); + } else { + exp.field(""); + } + } else { + exp.field(""); + } } } if (offsets.empty()) { @@ -296,6 +317,12 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest, exp.field(0); // Offset in read *of the pin point* (not of the forward-strand start of the minimizer) exp.field(minimizers[seed.source].pin_offset()); + // Recombination flag column + if (marker == "chain") { + exp.field(is_recomb ? "REC" : ""); + } else { + exp.field(""); + } } } @@ -707,6 +734,8 @@ vector MinimizerMapper::map_from_chains(Alignment& aln) { // For each chain, we need: // The chain itself, pointing into seeds std::vector> chains; + // For each chain, mark per-seed whether it came from a recombinant anchor + std::vector> chain_rec_flags; // The zip code tree it came from std::vector chain_source_tree; // An estimated alignment score @@ -717,7 +746,7 @@ vector MinimizerMapper::map_from_chains(Alignment& aln) { std::vector multiplicity_by_chain; do_chaining_on_trees(aln, zip_code_forest, seeds, minimizers, seed_anchors, - chains, chain_source_tree, chain_score_estimates, + chains, chain_rec_flags, chain_source_tree, chain_score_estimates, minimizer_kept_chain_count, multiplicity_by_chain, alignments, minimizer_explored, multiplicity_by_alignment, rng, funnel); @@ -732,7 +761,7 @@ vector MinimizerMapper::map_from_chains(Alignment& aln) { // Dump all chains if requested (do this before alignments, while chains still exist) if (show_work && !chains.empty() && this->path_graph != nullptr) { - dump_debug_chains(zip_code_forest, seeds, minimizers, chains, chain_source_tree, this->path_graph); + dump_debug_chains(zip_code_forest, seeds, minimizers, chains, chain_rec_flags, chain_source_tree, this->path_graph); } if (alignments.size() == 0) { @@ -1027,13 +1056,13 @@ vector MinimizerMapper::map_from_chains(Alignment& aln) { } void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& zip_code_forest, - const std::vector& seeds, const VectorView& minimizers, - const vector& seed_anchors, - std::vector>& chains, std::vector& chain_source_tree, - std::vector& chain_score_estimates, std::vector>& minimizer_kept_chain_count, - std::vector& multiplicity_by_chain, - std::vector& alignments, SmallBitset& minimizer_explored, vector& multiplicity_by_alignment, - LazyRNG& rng, Funnel& funnel) const { + const std::vector& seeds, const VectorView& minimizers, + const vector& seed_anchors, + std::vector>& chains, std::vector>& chain_rec_flags, std::vector& chain_source_tree, + std::vector& chain_score_estimates, std::vector>& minimizer_kept_chain_count, + std::vector& multiplicity_by_chain, + std::vector& alignments, SmallBitset& minimizer_explored, vector& multiplicity_by_alignment, + LazyRNG& rng, Funnel& funnel) const { // Keep track of which chain each alignment comes from for the funnel std::vector alignment_source_chain; @@ -1533,13 +1562,24 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& // Translate chains into seed numbers and not local anchor numbers. chains.emplace_back(); + // Also make a parallel vector that marks whether each seed in the chain + // comes from an anchor that introduced recombination. + chain_rec_flags.emplace_back(); chains.back().reserve(scored_chain.second.size() * 2); + chain_rec_flags.back().reserve(scored_chain.second.size() * 2); + + // Build a set of the anchor indices inside scored_chain that are recombinant + std::unordered_set rec_anchor_set(chain_rec_positions.begin(), chain_rec_positions.end()); + for (auto& selected_number : scored_chain.second) { // For each anchor in the chain, get its number in the whole group of anchors. size_t anchor_number = anchor_indexes.at(selected_number); + bool anchor_is_recomb = rec_anchor_set.count(selected_number) > 0; for (auto& seed_number : anchor_seed_sequences.at(anchor_number)) { // And get all the seeds it actually uses in sequence and put them in the chain. chains.back().push_back(seed_number); + // Mark whether this seed came from a recombinant anchor + chain_rec_flags.back().push_back(anchor_is_recomb); } for (auto& seed_number : anchor_represented_seeds.at(anchor_number)) { // And get all the seeds it represents exploring and mark their minimizers explored. From 6e03520d69c65e02106d01b9a9f861f0350b2d23 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 4 Feb 2026 12:23:25 -0800 Subject: [PATCH 03/20] Handle type errors in alt text generator --- scripts/scatter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/scatter.py b/scripts/scatter.py index 8d99e00774..8f142f35a5 100755 --- a/scripts/scatter.py +++ b/scripts/scatter.py @@ -15,6 +15,7 @@ import argparse, sys, os, itertools, math, collections, random, re import matplotlib, matplotlib.ticker, matplotlib.cm, numpy import copy +import traceback # Scipy allows curve fitting, but we might not have it try: @@ -911,6 +912,10 @@ def main(args): except ImportError: sys.stderr.write("Install the matplotalt package to generate figure alt text\n") alt_text = None + except Exception as e: + print("Could not generate alt text due to internal error") + traceback.print_exc() + alt_text = None if options.save is not None: # Save the figure to a file From f1f840f42533222efff5a46d8a654e66e60e83bb Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 08:34:46 -0800 Subject: [PATCH 04/20] Add more dumping for DP info --- src/algorithms/chain_items.cpp | 24 ++++++++++++++---------- src/minimizer_mapper.hpp | 1 + src/minimizer_mapper_from_chains.cpp | 24 +++++++++++++++++++++--- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/algorithms/chain_items.cpp b/src/algorithms/chain_items.cpp index 8306d56abd..5182c91ae0 100644 --- a/src/algorithms/chain_items.cpp +++ b/src/algorithms/chain_items.cpp @@ -11,7 +11,7 @@ #include #include -//#define debug_chaining +#define debug_chaining //#define debug_transition namespace vg { @@ -535,20 +535,13 @@ TracedScore chain_items_dp(vector& chain_scores, size_t max_indel_bases, int recomb_penalty, bool show_work) { - -#ifdef debug_chaining + DiagramExplainer diagram(show_work); -#else - DiagramExplainer diagram(false); -#endif + TSVExplainer dump(show_work, "chaindump"); if (diagram) { diagram.add_globals({{"rankdir", "LR"}}); } -#ifdef debug_chaining - show_work = true; -#endif - if (show_work) { cerr << "Chaining group of " << to_chain.size() << " items" << endl; } @@ -686,6 +679,17 @@ TracedScore chain_items_dp(vector& chain_scores, }); } } + if (dump) { + // Dump a TSV of source anchor description, dest anchor description, and score achieved. + dump.line(); + std::stringstream source_stream; + source_stream << source; + dump.field(source_stream.str()); + std::stringstream here_stream; + here_stream << here; + dump.field(here_stream.str()); + dump.field(from_source_score.score); + } } else { if (show_work) { cerr << "\t\tTransition is impossible." << endl; diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index c3f1d2c341..41a376069e 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -1463,6 +1463,7 @@ class MinimizerMapper : public AlignerClient { static void dump_debug_chains(const ZipCodeForest& zip_code_forest, const std::vector& seeds, const VectorView& minimizers, + const vector& seed_anchors, const std::vector>& chains, const std::vector>& chain_rec_flags, const std::vector& chain_source_tree, diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp index d0df8bcc41..13835d4535 100644 --- a/src/minimizer_mapper_from_chains.cpp +++ b/src/minimizer_mapper_from_chains.cpp @@ -190,6 +190,7 @@ static bool chain_ranges_are_equivalent(const MinimizerMapper::Seed& start_seed1 void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest, const std::vector& seeds, const VectorView& minimizers, + const vector& seed_anchors, const std::vector>& chains, const std::vector>& chain_rec_flags, const std::vector& chain_source_tree, @@ -236,6 +237,9 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest, // The read context should already be set by the caller TSVExplainer exp(true, chain_file_name); + // We make another TSV that's more parseable, with all the seeds. + TSVExplainer seedpos(true, "chain" + std::to_string(chain_num) + "-seeds"); + // Determine the positions of all the involved seeds. std::unordered_map seed_positions; for (auto& kv : seed_sets) { @@ -247,7 +251,21 @@ void MinimizerMapper::dump_debug_chains(const ZipCodeForest& zip_code_forest, auto found = seed_positions.find(seed_num); if (found == seed_positions.end()) { // If we don't know the seed's positions yet, get them - seed_positions.emplace_hint(found, seed_num, algorithms::nearest_offsets_in_paths(path_graph, seed.pos, 100)); + found = seed_positions.emplace_hint(found, seed_num, algorithms::nearest_offsets_in_paths(path_graph, seed.pos, 100)); + for (auto& handle_and_positions : found->second) { + std::string path_name = path_graph->get_path_name(handle_and_positions.first); + for (auto& position : handle_and_positions.second) { + // Dump all the seed positions so we can select seeds we want to know about + seedpos.line(); + seedpos.field(seed_anchors.at(seed_num).read_start()); + seedpos.field(path_name); + seedpos.field(position.first); + seedpos.field(seed_num); + std::stringstream ss; + ss << seed_anchors.at(seed_num); + seedpos.field(ss.str()); + } + } } } } @@ -761,7 +779,7 @@ vector MinimizerMapper::map_from_chains(Alignment& aln) { // Dump all chains if requested (do this before alignments, while chains still exist) if (show_work && !chains.empty() && this->path_graph != nullptr) { - dump_debug_chains(zip_code_forest, seeds, minimizers, chains, chain_rec_flags, chain_source_tree, this->path_graph); + dump_debug_chains(zip_code_forest, seeds, minimizers, seed_anchors, chains, chain_rec_flags, chain_source_tree, this->path_graph); } if (alignments.size() == 0) { @@ -1504,7 +1522,7 @@ void MinimizerMapper::do_chaining_on_trees(Alignment& aln, const ZipCodeForest& this->gap_scale, this->points_per_possible_match, indel_limit, - false + show_work ); if (show_work) { #pragma omp critical (cerr) From 09e9d44463a6ba79e4d8077df4a410c9783aedcf Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 12:14:54 -0500 Subject: [PATCH 05/20] Add synthesized code to make interactive visualizations from chain dump data --- scripts/build-chain-viz.py | 464 +++++++++++++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100755 scripts/build-chain-viz.py diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py new file mode 100755 index 0000000000..728edcd1b9 --- /dev/null +++ b/scripts/build-chain-viz.py @@ -0,0 +1,464 @@ +#!/usr/bin/env python3 +""" +Build an interactive SVG visualization of a chaining problem using D3.js. + +Usage: build-chain-viz.py +""" + +import sys +import os +import glob +import re +import json +import struct +from collections import defaultdict + + +def uint64_to_int64(val): + """Reinterpret a 64-bit unsigned integer as a signed integer.""" + return struct.unpack('q', struct.pack('Q', val))[0] + + +def parse_seeds_file(filepath): + """ + Parse a chain seeds file. + Returns list of dicts with keys: read_pos, ref_name, ref_pos, seed_num, seed_id + """ + seeds = [] + with open(filepath, 'r') as f: + for line in f: + line = line.strip() + if not line: + continue + parts = line.split('\t') + if len(parts) < 5: + continue + seeds.append({ + 'read_pos': int(parts[0]), + 'ref_name': parts[1], + 'ref_pos': int(parts[2]), + 'seed_num': int(parts[3]), + 'seed_id': parts[4] + }) + return seeds + + +def parse_chaindump_file(filepath): + """ + Parse a chaindump file. + Returns list of dicts with keys: source_id, dest_id, score (signed) + """ + transitions = [] + with open(filepath, 'r') as f: + for line in f: + line = line.strip() + if not line: + continue + parts = line.split('\t') + if len(parts) < 3: + continue + score_uint64 = int(parts[2]) + score_int64 = uint64_to_int64(score_uint64) + transitions.append({ + 'source_id': parts[0], + 'dest_id': parts[1], + 'score': score_int64 + }) + return transitions + + +def find_seeds_file(data_dir, chain_num): + """Find the seeds file for the given chain number.""" + pattern = os.path.join(data_dir, f'chain{chain_num}-seeds*.tsv') + matches = glob.glob(pattern) + if not matches: + return None + return matches[0] + + +def find_all_chaindump_files(data_dir): + """Find all chaindump files in the directory.""" + pattern = os.path.join(data_dir, 'chaindump*.tsv') + return glob.glob(pattern) + + +def generate_svg(seeds, transitions, output_path): + """Generate an SVG file with embedded D3.js visualization.""" + + # Build seed lookup by ID + seed_by_id = {s['seed_id']: s for s in seeds} + seed_ids = set(seed_by_id.keys()) + + # Filter transitions to only include those where both endpoints are in our seeds + # and score is positive + relevant_transitions = [] + for t in transitions: + if t['source_id'] in seed_ids and t['dest_id'] in seed_ids and t['score'] > 0: + relevant_transitions.append(t) + + # Compute max score per destination + max_score_by_dest = defaultdict(lambda: float('-inf')) + for t in relevant_transitions: + if t['score'] > max_score_by_dest[t['dest_id']]: + max_score_by_dest[t['dest_id']] = t['score'] + + # Add score info to seeds + seeds_data = [] + for s in seeds: + max_score = max_score_by_dest.get(s['seed_id'], 0) + seeds_data.append({ + 'read_pos': s['read_pos'], + 'ref_pos': s['ref_pos'], + 'seed_id': s['seed_id'], + 'max_score': max_score if max_score > float('-inf') else 0 + }) + + # Prepare transitions data with fraction of max + transitions_data = [] + for t in relevant_transitions: + max_score = max_score_by_dest[t['dest_id']] + fraction = t['score'] / max_score if max_score > 0 else 0 + is_max = (t['score'] == max_score) + transitions_data.append({ + 'source_id': t['source_id'], + 'dest_id': t['dest_id'], + 'score': t['score'], + 'fraction': fraction, + 'is_max': is_max + }) + + seeds_json = json.dumps(seeds_data) + transitions_json = json.dumps(transitions_data) + + svg_content = f''' + + + + + +''' + + with open(output_path, 'w') as f: + f.write(svg_content) + + +def main(): + if len(sys.argv) != 4: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + + data_dir = sys.argv[1] + chain_num = int(sys.argv[2]) + output_path = sys.argv[3] + + if not os.path.isdir(data_dir): + print(f"Error: {data_dir} is not a directory", file=sys.stderr) + sys.exit(1) + + # Find and parse seeds file + seeds_file = find_seeds_file(data_dir, chain_num) + if not seeds_file: + print(f"Error: No seeds file found for chain {chain_num}", file=sys.stderr) + sys.exit(1) + + seeds = parse_seeds_file(seeds_file) + if not seeds: + print(f"Warning: No seeds found in {seeds_file}", file=sys.stderr) + + print(f"Loaded {len(seeds)} seeds from {seeds_file}") + + # Find and parse all chaindump files + chaindump_files = find_all_chaindump_files(data_dir) + all_transitions = [] + for cf in chaindump_files: + transitions = parse_chaindump_file(cf) + all_transitions.extend(transitions) + print(f"Loaded {len(transitions)} transitions from {cf}") + + print(f"Total transitions: {len(all_transitions)}") + + # Generate SVG + generate_svg(seeds, all_transitions, output_path) + print(f"Generated {output_path}") + + +if __name__ == '__main__': + main() From 2aaf8d686ad772af254973618f4c769c06aedf55 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 12:31:25 -0500 Subject: [PATCH 06/20] Automatically revise visualization generator to appear to work --- scripts/build-chain-viz.py | 319 ++++++++++++++++++++++++------------- 1 file changed, 210 insertions(+), 109 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index 728edcd1b9..7131ef2c74 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -8,7 +8,6 @@ import sys import os import glob -import re import json import struct from collections import defaultdict @@ -47,6 +46,10 @@ def parse_chaindump_file(filepath): """ Parse a chaindump file. Returns list of dicts with keys: source_id, dest_id, score (signed) + + Note: The file format has columns ordered as (from_id, to_id, score) but + the DP runs from high read position to low, so we swap them here so that + source has larger read coordinate than dest. """ transitions = [] with open(filepath, 'r') as f: @@ -59,9 +62,10 @@ def parse_chaindump_file(filepath): continue score_uint64 = int(parts[2]) score_int64 = uint64_to_int64(score_uint64) + # Swap source and dest so source has larger read coordinate transitions.append({ - 'source_id': parts[0], - 'dest_id': parts[1], + 'source_id': parts[1], # was dest in file + 'dest_id': parts[0], # was source in file 'score': score_int64 }) return transitions @@ -89,6 +93,9 @@ def generate_svg(seeds, transitions, output_path): seed_by_id = {s['seed_id']: s for s in seeds} seed_ids = set(seed_by_id.keys()) + # Assign numeric indices to seeds for use in CSS class names + seed_id_to_index = {s['seed_id']: i for i, s in enumerate(seeds)} + # Filter transitions to only include those where both endpoints are in our seeds # and score is positive relevant_transitions = [] @@ -104,9 +111,10 @@ def generate_svg(seeds, transitions, output_path): # Add score info to seeds seeds_data = [] - for s in seeds: + for i, s in enumerate(seeds): max_score = max_score_by_dest.get(s['seed_id'], 0) seeds_data.append({ + 'index': i, 'read_pos': s['read_pos'], 'ref_pos': s['ref_pos'], 'seed_id': s['seed_id'], @@ -119,9 +127,11 @@ def generate_svg(seeds, transitions, output_path): max_score = max_score_by_dest[t['dest_id']] fraction = t['score'] / max_score if max_score > 0 else 0 is_max = (t['score'] == max_score) + dest_index = seed_id_to_index.get(t['dest_id'], -1) transitions_data.append({ 'source_id': t['source_id'], 'dest_id': t['dest_id'], + 'dest_index': dest_index, 'score': t['score'], 'fraction': fraction, 'is_max': is_max @@ -155,6 +165,10 @@ def generate_svg(seeds, transitions, output_path): stroke-width: 3px; opacity: 1; }} + .transition.secondary {{ + stroke-width: 1px; + opacity: 0.4; + }} .axis text {{ font-family: sans-serif; font-size: 12px; @@ -162,18 +176,6 @@ def generate_svg(seeds, transitions, output_path): .axis path, .axis line {{ stroke: #333; }} - .tooltip {{ - position: absolute; - background: rgba(0, 0, 0, 0.8); - color: white; - padding: 8px; - border-radius: 4px; - font-family: monospace; - font-size: 11px; - pointer-events: none; - max-width: 400px; - word-wrap: break-word; - }} .axis-label {{ font-family: sans-serif; font-size: 14px; @@ -188,21 +190,29 @@ def generate_svg(seeds, transitions, output_path): const transitions = {transitions_json}; const margin = {{top: 40, right: 40, bottom: 60, left: 80}}; - const width = 1200 - margin.left - margin.right; - const height = 800 - margin.top - margin.bottom; + const svgWidth = 1200; + const svgHeight = 800; + const width = svgWidth - margin.left - margin.right; + const height = svgHeight - margin.top - margin.bottom; const svg = d3.select('svg'); - // Create tooltip div - const tooltip = d3.select('body').append('div') - .attr('class', 'tooltip') - .style('opacity', 0) - .style('position', 'absolute'); - // Build lookup const seedById = new Map(); seeds.forEach(s => seedById.set(s.seed_id, s)); + // Group transitions by destination index for efficient lookup + const transitionsByDestIndex = new Map(); + transitions.forEach(t => {{ + if (!transitionsByDestIndex.has(t.dest_index)) {{ + transitionsByDestIndex.set(t.dest_index, []); + }} + transitionsByDestIndex.get(t.dest_index).push(t); + }}); + + // Separate top transitions (shown by default) from others + const topTransitions = transitions.filter(t => t.is_max); + // Compute data extents (handle empty data) let xExtent = d3.extent(seeds, d => d.ref_pos); let yExtent = d3.extent(seeds, d => d.read_pos); @@ -211,17 +221,37 @@ def generate_svg(seeds, transitions, output_path): if (xExtent[0] === undefined) xExtent = [0, 1000]; if (yExtent[0] === undefined) yExtent = [0, 1000]; - // Add some padding - const xPadding = (xExtent[1] - xExtent[0]) * 0.05 || 100; - const yPadding = (yExtent[1] - yExtent[0]) * 0.05 || 100; + // Compute ranges + const xRange = xExtent[1] - xExtent[0] || 1000; + const yRange = yExtent[1] - yExtent[0] || 1000; + + // For 1:1 aspect ratio, we need pixels per data unit to be equal for both axes + // Compute the scale factor that fits both dimensions + const xPixelsPerUnit = width / xRange; + const yPixelsPerUnit = height / yRange; + const pixelsPerUnit = Math.min(xPixelsPerUnit, yPixelsPerUnit); - // Scales + // Compute centered domains with 1:1 aspect ratio + const xCenter = (xExtent[0] + xExtent[1]) / 2; + const yCenter = (yExtent[0] + yExtent[1]) / 2; + const xHalfRange = (width / pixelsPerUnit) / 2; + const yHalfRange = (height / pixelsPerUnit) / 2; + + // Add 5% padding + const xPadding = xHalfRange * 0.05; + const yPadding = yHalfRange * 0.05; + + // Initial scale (store for zoom reset) + const initialXDomain = [xCenter - xHalfRange - xPadding, xCenter + xHalfRange + xPadding]; + const initialYDomain = [yCenter - yHalfRange - yPadding, yCenter + yHalfRange + yPadding]; + + // Scales - note: both use the same pixels-per-unit ratio const xScale = d3.scaleLinear() - .domain([xExtent[0] - xPadding, xExtent[1] + xPadding]) + .domain(initialXDomain) .range([0, width]); const yScale = d3.scaleLinear() - .domain([yExtent[0] - yPadding, yExtent[1] + yPadding]) + .domain(initialYDomain) .range([height, 0]); // Inverted so higher read pos is at top // Color scale for transitions (fraction of max score) @@ -272,47 +302,102 @@ def generate_svg(seeds, transitions, output_path): const plotArea = g.append('g') .attr('clip-path', `url(#${{clipId}})`); + // Add a background rect for capturing zoom events + plotArea.append('rect') + .attr('width', width) + .attr('height', height) + .attr('fill', 'white'); + const zoomG = plotArea.append('g'); - // Draw transitions first (so seeds are on top) - const transitionLines = zoomG.selectAll('.transition') - .data(transitions) + // Layer for transitions (below seeds) + const transitionLayer = zoomG.append('g').attr('class', 'transition-layer'); + // Layer for dynamically added secondary transitions + const secondaryTransitionLayer = zoomG.append('g').attr('class', 'secondary-transition-layer'); + // Layer for seeds (on top) + const seedLayer = zoomG.append('g').attr('class', 'seed-layer'); + + // Current scale references (updated by zoom) + let currentXScale = xScale; + let currentYScale = yScale; + + // Helper to compute line coordinates + function getLineCoords(d, xS, yS) {{ + const src = seedById.get(d.source_id); + const dest = seedById.get(d.dest_id); + return {{ + x1: src ? xS(src.ref_pos) : 0, + y1: src ? yS(src.read_pos) : 0, + x2: dest ? xS(dest.ref_pos) : 0, + y2: dest ? yS(dest.read_pos) : 0 + }}; + }} + + // Draw top transitions only (with SVG title tooltips) + const transitionLines = transitionLayer.selectAll('.transition') + .data(topTransitions) .enter() .append('line') .attr('class', 'transition') - .attr('x1', d => {{ - const src = seedById.get(d.source_id); - return src ? xScale(src.ref_pos) : 0; - }}) - .attr('y1', d => {{ - const src = seedById.get(d.source_id); - return src ? yScale(src.read_pos) : 0; - }}) - .attr('x2', d => {{ - const dest = seedById.get(d.dest_id); - return dest ? xScale(dest.ref_pos) : 0; - }}) - .attr('y2', d => {{ - const dest = seedById.get(d.dest_id); - return dest ? yScale(dest.read_pos) : 0; + .each(function(d) {{ + const coords = getLineCoords(d, xScale, yScale); + d3.select(this) + .attr('x1', coords.x1) + .attr('y1', coords.y1) + .attr('x2', coords.x2) + .attr('y2', coords.y2); }}) .attr('stroke', d => d.is_max ? 'red' : colorScale(d.fraction)) .on('mouseover', function(event, d) {{ d3.select(this).classed('highlighted', true); - tooltip.transition().duration(200).style('opacity', 0.9); - tooltip.html(`Score: ${{d.score}}
Fraction of max: ${{(d.fraction * 100).toFixed(1)}}%`) - .style('left', (event.pageX + 10) + 'px') - .style('top', (event.pageY - 28) + 'px'); }}) .on('mouseout', function() {{ d3.select(this).classed('highlighted', false); - tooltip.transition().duration(500).style('opacity', 0); }}); - // Draw seeds - let selectedSeed = null; + // Add SVG title tooltips to transitions + transitionLines.append('title') + .text(d => `Score: ${{d.score}}\\nFraction of max: ${{(d.fraction * 100).toFixed(1)}}%`); - const seedCircles = zoomG.selectAll('.seed') + // Track state + let selectedSeed = null; + let hoveredSeed = null; + + // Function to show all transitions to a seed (by index) + function showSecondaryTransitions(seedIndex) {{ + const allTrans = transitionsByDestIndex.get(seedIndex) || []; + const secondaryTrans = allTrans.filter(t => !t.is_max); + + // Use numeric index for reliable class selection + const className = 'secondary-dest-' + seedIndex; + + const lines = secondaryTransitionLayer.selectAll('.' + className) + .data(secondaryTrans, d => d.source_id + '->' + d.dest_id); + + lines.enter() + .append('line') + .attr('class', 'transition secondary ' + className) + .each(function(d) {{ + const coords = getLineCoords(d, currentXScale, currentYScale); + d3.select(this) + .attr('x1', coords.x1) + .attr('y1', coords.y1) + .attr('x2', coords.x2) + .attr('y2', coords.y2); + }}) + .attr('stroke', d => colorScale(d.fraction)) + .append('title') + .text(d => `Score: ${{d.score}}\\nFraction of max: ${{(d.fraction * 100).toFixed(1)}}%`); + }} + + // Function to hide secondary transitions to a seed (by index) + function hideSecondaryTransitions(seedIndex) {{ + const className = 'secondary-dest-' + seedIndex; + secondaryTransitionLayer.selectAll('.' + className).remove(); + }} + + // Draw seeds with SVG title tooltips + const seedCircles = seedLayer.selectAll('.seed') .data(seeds) .enter() .append('circle') @@ -321,95 +406,111 @@ def generate_svg(seeds, transitions, output_path): .attr('cy', d => yScale(d.read_pos)) .attr('r', 5) .on('mouseover', function(event, d) {{ + hoveredSeed = d; if (selectedSeed !== d) {{ d3.select(this).classed('hovered', true); }} - // Highlight incoming transitions - transitionLines.filter(t => t.dest_id === d.seed_id) + // Highlight incoming top transitions + transitionLines.filter(t => t.dest_index === d.index) .classed('highlighted', true); - tooltip.transition().duration(200).style('opacity', 0.9); - tooltip.html(`Seed ID: ${{d.seed_id}}
Read: ${{d.read_pos}}
Ref: ${{d.ref_pos}}
Max score: ${{d.max_score}}`) - .style('left', (event.pageX + 10) + 'px') - .style('top', (event.pageY - 28) + 'px'); + // Show all secondary transitions + showSecondaryTransitions(d.index); }}) .on('mouseout', function(event, d) {{ + hoveredSeed = null; if (selectedSeed !== d) {{ d3.select(this).classed('hovered', false); - }} - if (selectedSeed === null || selectedSeed.seed_id !== d.seed_id) {{ - transitionLines.filter(t => t.dest_id === d.seed_id) + transitionLines.filter(t => t.dest_index === d.index) + .classed('highlighted', false); + // Hide secondary transitions + hideSecondaryTransitions(d.index); + }} else {{ + // Seed is selected, keep transitions visible but unhighlight top ones + transitionLines.filter(t => t.dest_index === d.index) .classed('highlighted', false); }} - tooltip.transition().duration(500).style('opacity', 0); }}) .on('click', function(event, d) {{ - // Deselect previous - if (selectedSeed) {{ - seedCircles.filter(s => s.seed_id === selectedSeed.seed_id) - .classed('selected', false) - .classed('hovered', false); - transitionLines.filter(t => t.dest_id === selectedSeed.seed_id) - .classed('highlighted', false); - }} + const prevSelected = selectedSeed; + // If clicking same seed, deselect it if (selectedSeed === d) {{ - // Clicking same seed deselects selectedSeed = null; + d3.select(this).classed('selected', false); + // If not still hovering, hide transitions + if (hoveredSeed !== d) {{ + transitionLines.filter(t => t.dest_index === d.index) + .classed('highlighted', false); + hideSecondaryTransitions(d.index); + }} }} else {{ + // Deselect previous if any + if (prevSelected) {{ + seedCircles.filter(s => s.index === prevSelected.index) + .classed('selected', false) + .classed('hovered', false); + // Hide previous seed's transitions if not hovering it + if (hoveredSeed === null || hoveredSeed.index !== prevSelected.index) {{ + transitionLines.filter(t => t.dest_index === prevSelected.index) + .classed('highlighted', false); + hideSecondaryTransitions(prevSelected.index); + }} + }} + // Select new seed selectedSeed = d; d3.select(this).classed('selected', true); - transitionLines.filter(t => t.dest_id === d.seed_id) + transitionLines.filter(t => t.dest_index === d.index) .classed('highlighted', true); + showSecondaryTransitions(d.index); }} }}); - // Zoom behavior + // Add SVG title tooltips to seeds + seedCircles.append('title') + .text(d => `Seed ID: ${{d.seed_id}}\\nRead: ${{d.read_pos}}\\nRef: ${{d.ref_pos}}\\nMax score: ${{d.max_score}}`); + + // Zoom behavior with 1:1 aspect ratio constraint const zoom = d3.zoom() .scaleExtent([0.1, 50]) .on('zoom', function(event) {{ const transform = event.transform; - // Update scales - const newXScale = transform.rescaleX(xScale); - const newYScale = transform.rescaleY(yScale); + // Update scales (both use same transform to maintain 1:1 ratio) + currentXScale = transform.rescaleX(xScale); + currentYScale = transform.rescaleY(yScale); // Update axes - xAxisG.call(xAxis.scale(newXScale)); - yAxisG.call(yAxis.scale(newYScale)); + xAxisG.call(xAxis.scale(currentXScale)); + yAxisG.call(yAxis.scale(currentYScale)); // Update seed positions seedCircles - .attr('cx', d => newXScale(d.ref_pos)) - .attr('cy', d => newYScale(d.read_pos)); - - // Update transition lines - transitionLines - .attr('x1', d => {{ - const src = seedById.get(d.source_id); - return src ? newXScale(src.ref_pos) : 0; - }}) - .attr('y1', d => {{ - const src = seedById.get(d.source_id); - return src ? newYScale(src.read_pos) : 0; - }}) - .attr('x2', d => {{ - const dest = seedById.get(d.dest_id); - return dest ? newXScale(dest.ref_pos) : 0; - }}) - .attr('y2', d => {{ - const dest = seedById.get(d.dest_id); - return dest ? newYScale(dest.read_pos) : 0; - }}); + .attr('cx', d => currentXScale(d.ref_pos)) + .attr('cy', d => currentYScale(d.read_pos)); + + // Update top transition lines + transitionLines.each(function(d) {{ + const coords = getLineCoords(d, currentXScale, currentYScale); + d3.select(this) + .attr('x1', coords.x1) + .attr('y1', coords.y1) + .attr('x2', coords.x2) + .attr('y2', coords.y2); + }}); + + // Update secondary transition lines + secondaryTransitionLayer.selectAll('.transition').each(function(d) {{ + const coords = getLineCoords(d, currentXScale, currentYScale); + d3.select(this) + .attr('x1', coords.x1) + .attr('y1', coords.y1) + .attr('x2', coords.x2) + .attr('y2', coords.y2); + }}); }}); svg.call(zoom); - - // Add a background rect for capturing zoom events - plotArea.insert('rect', ':first-child') - .attr('width', width) - .attr('height', height) - .attr('fill', 'white'); }}); ]]> From ae05555a4ac029bf6eb9a7c90386425e92b20b8d Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 13:44:08 -0500 Subject: [PATCH 07/20] Flip source and destination back to agree with the logs and increase zoomability --- scripts/build-chain-viz.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index 7131ef2c74..7fae5ffa47 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -46,10 +46,6 @@ def parse_chaindump_file(filepath): """ Parse a chaindump file. Returns list of dicts with keys: source_id, dest_id, score (signed) - - Note: The file format has columns ordered as (from_id, to_id, score) but - the DP runs from high read position to low, so we swap them here so that - source has larger read coordinate than dest. """ transitions = [] with open(filepath, 'r') as f: @@ -62,10 +58,9 @@ def parse_chaindump_file(filepath): continue score_uint64 = int(parts[2]) score_int64 = uint64_to_int64(score_uint64) - # Swap source and dest so source has larger read coordinate transitions.append({ - 'source_id': parts[1], # was dest in file - 'dest_id': parts[0], # was source in file + 'source_id': parts[0], + 'dest_id': parts[1], 'score': score_int64 }) return transitions @@ -472,7 +467,7 @@ def generate_svg(seeds, transitions, output_path): // Zoom behavior with 1:1 aspect ratio constraint const zoom = d3.zoom() - .scaleExtent([0.1, 50]) + .scaleExtent([0.1, 1000]) .on('zoom', function(event) {{ const transform = event.transform; From cd3dc4f7298771b44a2b2010a07f4a64cb904353 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 13:56:17 -0500 Subject: [PATCH 08/20] Add auto-generated code for traceback --- scripts/build-chain-viz.py | 157 ++++++++++++++++++++++++------------- 1 file changed, 103 insertions(+), 54 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index 7fae5ffa47..83d7bd6ea5 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -123,8 +123,10 @@ def generate_svg(seeds, transitions, output_path): fraction = t['score'] / max_score if max_score > 0 else 0 is_max = (t['score'] == max_score) dest_index = seed_id_to_index.get(t['dest_id'], -1) + source_index = seed_id_to_index.get(t['source_id'], -1) transitions_data.append({ 'source_id': t['source_id'], + 'source_index': source_index, 'dest_id': t['dest_id'], 'dest_index': dest_index, 'score': t['score'], @@ -151,6 +153,11 @@ def generate_svg(seeds, transitions, output_path): fill: orange; r: 8; }} + .seed.on-traceback {{ + fill: #ff6666; + stroke: darkred; + stroke-width: 2px; + }} .transition {{ stroke-width: 1.5px; fill: none; @@ -164,6 +171,11 @@ def generate_svg(seeds, transitions, output_path): stroke-width: 1px; opacity: 0.4; }} + .transition.traceback {{ + stroke: red; + stroke-width: 3px; + opacity: 1; + }} .axis text {{ font-family: sans-serif; font-size: 12px; @@ -205,8 +217,13 @@ def generate_svg(seeds, transitions, output_path): transitionsByDestIndex.get(t.dest_index).push(t); }}); - // Separate top transitions (shown by default) from others - const topTransitions = transitions.filter(t => t.is_max); + // Find best (max score) transition to each seed + const bestTransitionToSeed = new Map(); + transitions.forEach(t => {{ + if (t.is_max) {{ + bestTransitionToSeed.set(t.dest_index, t); + }} + }}); // Compute data extents (handle empty data) let xExtent = d3.extent(seeds, d => d.ref_pos); @@ -305,9 +322,9 @@ def generate_svg(seeds, transitions, output_path): const zoomG = plotArea.append('g'); - // Layer for transitions (below seeds) - const transitionLayer = zoomG.append('g').attr('class', 'transition-layer'); - // Layer for dynamically added secondary transitions + // Layer for traceback transitions (red path) + const tracebackLayer = zoomG.append('g').attr('class', 'traceback-layer'); + // Layer for dynamically added secondary transitions (on hover) const secondaryTransitionLayer = zoomG.append('g').attr('class', 'secondary-transition-layer'); // Layer for seeds (on top) const seedLayer = zoomG.append('g').attr('class', 'seed-layer'); @@ -328,46 +345,83 @@ def generate_svg(seeds, transitions, output_path): }}; }} - // Draw top transitions only (with SVG title tooltips) - const transitionLines = transitionLayer.selectAll('.transition') - .data(topTransitions) - .enter() - .append('line') - .attr('class', 'transition') - .each(function(d) {{ - const coords = getLineCoords(d, xScale, yScale); - d3.select(this) - .attr('x1', coords.x1) - .attr('y1', coords.y1) - .attr('x2', coords.x2) - .attr('y2', coords.y2); - }}) - .attr('stroke', d => d.is_max ? 'red' : colorScale(d.fraction)) - .on('mouseover', function(event, d) {{ - d3.select(this).classed('highlighted', true); - }}) - .on('mouseout', function() {{ - d3.select(this).classed('highlighted', false); - }}); - - // Add SVG title tooltips to transitions - transitionLines.append('title') - .text(d => `Score: ${{d.score}}\\nFraction of max: ${{(d.fraction * 100).toFixed(1)}}%`); - // Track state let selectedSeed = null; let hoveredSeed = null; + let tracebackSeedIndices = new Set(); + + // Compute traceback path from a seed + function computeTraceback(seedIndex) {{ + const path = []; + const visited = new Set(); + let currentIndex = seedIndex; + + while (currentIndex !== undefined && !visited.has(currentIndex)) {{ + visited.add(currentIndex); + const bestTrans = bestTransitionToSeed.get(currentIndex); + if (bestTrans) {{ + path.push(bestTrans); + currentIndex = bestTrans.source_index; + }} else {{ + break; + }} + }} + return path; + }} + + // Show traceback path for selected seed + function showTraceback(seedIndex) {{ + const path = computeTraceback(seedIndex); + tracebackSeedIndices.clear(); + + // Collect all seed indices on the traceback + if (path.length > 0) {{ + path.forEach(t => {{ + tracebackSeedIndices.add(t.source_index); + tracebackSeedIndices.add(t.dest_index); + }}); + }} - // Function to show all transitions to a seed (by index) + // Update seed styling for traceback + seedCircles.classed('on-traceback', d => tracebackSeedIndices.has(d.index) && d.index !== seedIndex); + + // Draw traceback transitions + const lines = tracebackLayer.selectAll('.traceback') + .data(path, d => d.source_id + '->' + d.dest_id); + + lines.enter() + .append('line') + .attr('class', 'transition traceback') + .each(function(d) {{ + const coords = getLineCoords(d, currentXScale, currentYScale); + d3.select(this) + .attr('x1', coords.x1) + .attr('y1', coords.y1) + .attr('x2', coords.x2) + .attr('y2', coords.y2); + }}) + .append('title') + .text(d => `Score: ${{d.score}}\\nFraction of max: ${{(d.fraction * 100).toFixed(1)}}%`); + + lines.exit().remove(); + }} + + // Hide traceback path + function hideTraceback() {{ + tracebackLayer.selectAll('.traceback').remove(); + tracebackSeedIndices.clear(); + seedCircles.classed('on-traceback', false); + }} + + // Function to show all transitions to a seed (by index) on hover function showSecondaryTransitions(seedIndex) {{ const allTrans = transitionsByDestIndex.get(seedIndex) || []; - const secondaryTrans = allTrans.filter(t => !t.is_max); // Use numeric index for reliable class selection const className = 'secondary-dest-' + seedIndex; const lines = secondaryTransitionLayer.selectAll('.' + className) - .data(secondaryTrans, d => d.source_id + '->' + d.dest_id); + .data(allTrans, d => d.source_id + '->' + d.dest_id); lines.enter() .append('line') @@ -380,7 +434,7 @@ def generate_svg(seeds, transitions, output_path): .attr('x2', coords.x2) .attr('y2', coords.y2); }}) - .attr('stroke', d => colorScale(d.fraction)) + .attr('stroke', d => d.is_max ? 'red' : colorScale(d.fraction)) .append('title') .text(d => `Score: ${{d.score}}\\nFraction of max: ${{(d.fraction * 100).toFixed(1)}}%`); }} @@ -405,24 +459,15 @@ def generate_svg(seeds, transitions, output_path): if (selectedSeed !== d) {{ d3.select(this).classed('hovered', true); }} - // Highlight incoming top transitions - transitionLines.filter(t => t.dest_index === d.index) - .classed('highlighted', true); - // Show all secondary transitions + // Show all transitions to this seed showSecondaryTransitions(d.index); }}) .on('mouseout', function(event, d) {{ hoveredSeed = null; if (selectedSeed !== d) {{ d3.select(this).classed('hovered', false); - transitionLines.filter(t => t.dest_index === d.index) - .classed('highlighted', false); // Hide secondary transitions hideSecondaryTransitions(d.index); - }} else {{ - // Seed is selected, keep transitions visible but unhighlight top ones - transitionLines.filter(t => t.dest_index === d.index) - .classed('highlighted', false); }} }}) .on('click', function(event, d) {{ @@ -432,10 +477,9 @@ def generate_svg(seeds, transitions, output_path): if (selectedSeed === d) {{ selectedSeed = null; d3.select(this).classed('selected', false); + hideTraceback(); // If not still hovering, hide transitions if (hoveredSeed !== d) {{ - transitionLines.filter(t => t.dest_index === d.index) - .classed('highlighted', false); hideSecondaryTransitions(d.index); }} }} else {{ @@ -446,8 +490,6 @@ def generate_svg(seeds, transitions, output_path): .classed('hovered', false); // Hide previous seed's transitions if not hovering it if (hoveredSeed === null || hoveredSeed.index !== prevSelected.index) {{ - transitionLines.filter(t => t.dest_index === prevSelected.index) - .classed('highlighted', false); hideSecondaryTransitions(prevSelected.index); }} }} @@ -455,8 +497,7 @@ def generate_svg(seeds, transitions, output_path): // Select new seed selectedSeed = d; d3.select(this).classed('selected', true); - transitionLines.filter(t => t.dest_index === d.index) - .classed('highlighted', true); + showTraceback(d.index); showSecondaryTransitions(d.index); }} }}); @@ -467,7 +508,7 @@ def generate_svg(seeds, transitions, output_path): // Zoom behavior with 1:1 aspect ratio constraint const zoom = d3.zoom() - .scaleExtent([0.1, 1000]) + .scaleExtent([0.1, 10000]) .on('zoom', function(event) {{ const transform = event.transform; @@ -484,8 +525,8 @@ def generate_svg(seeds, transitions, output_path): .attr('cx', d => currentXScale(d.ref_pos)) .attr('cy', d => currentYScale(d.read_pos)); - // Update top transition lines - transitionLines.each(function(d) {{ + // Update traceback transition lines + tracebackLayer.selectAll('.transition').each(function(d) {{ const coords = getLineCoords(d, currentXScale, currentYScale); d3.select(this) .attr('x1', coords.x1) @@ -506,6 +547,14 @@ def generate_svg(seeds, transitions, output_path): }}); svg.call(zoom); + + // Auto-select the seed with highest max_score + const bestSeed = seeds.reduce((best, s) => (s.max_score > best.max_score ? s : best), seeds[0]); + if (bestSeed && bestSeed.max_score > 0) {{ + selectedSeed = bestSeed; + seedCircles.filter(s => s.index === bestSeed.index).classed('selected', true); + showTraceback(bestSeed.index); + }} }}); ]]> From 9900bb97cfc1a8a619a7d4c848749e8f10a304f2 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 14:19:37 -0500 Subject: [PATCH 09/20] Add slightly adjusted synthetic code to let us see the transition to the selected seed, and the top chain always --- scripts/build-chain-viz.py | 157 ++++++++++++++++++++++++++++++++++++- 1 file changed, 154 insertions(+), 3 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index 83d7bd6ea5..83cfb2b90f 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -146,7 +146,12 @@ def generate_svg(seeds, transitions, output_path): stroke-width: 1px; cursor: pointer; }} - .seed:hover, .seed.hovered {{ + .seed.on-best-chain {{ + fill: #90EE90; + stroke: darkgreen; + stroke-width: 2px; + }} + .seed:hover, .seed.hovered, .seed.on-best-chain:hover, .seed.on-best-chain.hovered {{ r: 8; }} .seed.selected {{ @@ -167,6 +172,11 @@ def generate_svg(seeds, transitions, output_path): stroke-width: 3px; opacity: 1; }} + .transition.to-selected {{ + stroke: black !important; + stroke-width: 4px !important; + opacity: 1 !important; + }} .transition.secondary {{ stroke-width: 1px; opacity: 0.4; @@ -217,6 +227,15 @@ def generate_svg(seeds, transitions, output_path): transitionsByDestIndex.get(t.dest_index).push(t); }}); + // Group transitions by source index for efficient lookup + const transitionsBySourceIndex = new Map(); + transitions.forEach(t => {{ + if (!transitionsBySourceIndex.has(t.source_index)) {{ + transitionsBySourceIndex.set(t.source_index, []); + }} + transitionsBySourceIndex.get(t.source_index).push(t); + }}); + // Find best (max score) transition to each seed const bestTransitionToSeed = new Map(); transitions.forEach(t => {{ @@ -225,6 +244,12 @@ def generate_svg(seeds, transitions, output_path): }} }}); + // Get transition from source to dest (if exists) + function getTransitionFromTo(sourceIndex, destIndex) {{ + const fromSource = transitionsBySourceIndex.get(sourceIndex) || []; + return fromSource.find(t => t.dest_index === destIndex); + }} + // Compute data extents (handle empty data) let xExtent = d3.extent(seeds, d => d.ref_pos); let yExtent = d3.extent(seeds, d => d.read_pos); @@ -445,6 +470,48 @@ def generate_svg(seeds, transitions, output_path): secondaryTransitionLayer.selectAll('.' + className).remove(); }} + // Function to update seed tooltip based on current selection + function updateSeedTooltip(circle, d) {{ + let tooltipText = `Seed ID: ${{d.seed_id}}\\nRead: ${{d.read_pos}}\\nRef: ${{d.ref_pos}}\\nMax score: ${{d.max_score}}`; + if (selectedSeed && selectedSeed !== d) {{ + const trans = getTransitionFromTo(d.index, selectedSeed.index); + if (trans) {{ + tooltipText += `\\nScore to selected: ${{trans.score}}`; + }} + // Compute insertion/deletion offset + const refDiff = Math.abs(d.ref_pos - selectedSeed.ref_pos); + const readDiff = Math.abs(d.read_pos - selectedSeed.read_pos); + const offset = Math.abs(refDiff - readDiff); + let offsetStr; + if (readDiff > refDiff) {{ + offsetStr = `${{offset}}bp INS`; + }} else if (refDiff > readDiff) {{ + offsetStr = `${{offset}}bp DEL`; + }} else {{ + offsetStr = `0bp`; + }} + tooltipText += `\\nOffset: ${{offsetStr}}`; + }} + circle.select('title').text(tooltipText); + }} + + // Function to highlight transition from hovered to selected + function highlightTransitionToSelected(hoveredIndex) {{ + if (!selectedSeed || hoveredIndex === selectedSeed.index) return; + // Find and bold the transition line from hovered to selected in secondary layer + secondaryTransitionLayer.selectAll('.secondary-dest-' + selectedSeed.index) + .filter(t => t.source_index === hoveredIndex) + .classed('to-selected', true); + }} + + // Function to unhighlight transition from hovered to selected + function unhighlightTransitionToSelected(hoveredIndex) {{ + if (!selectedSeed) return; + secondaryTransitionLayer.selectAll('.secondary-dest-' + selectedSeed.index) + .filter(t => t.source_index === hoveredIndex) + .classed('to-selected', false); + }} + // Draw seeds with SVG title tooltips const seedCircles = seedLayer.selectAll('.seed') .data(seeds) @@ -459,8 +526,12 @@ def generate_svg(seeds, transitions, output_path): if (selectedSeed !== d) {{ d3.select(this).classed('hovered', true); }} + // Update tooltip to show transition score to selected if applicable + updateSeedTooltip(d3.select(this), d); // Show all transitions to this seed showSecondaryTransitions(d.index); + // Highlight transition from this seed to selected seed + highlightTransitionToSelected(d.index); }}) .on('mouseout', function(event, d) {{ hoveredSeed = null; @@ -469,6 +540,8 @@ def generate_svg(seeds, transitions, output_path): // Hide secondary transitions hideSecondaryTransitions(d.index); }} + // Unhighlight transition to selected + unhighlightTransitionToSelected(d.index); }}) .on('click', function(event, d) {{ const prevSelected = selectedSeed; @@ -502,7 +575,7 @@ def generate_svg(seeds, transitions, output_path): }} }}); - // Add SVG title tooltips to seeds + // Add SVG title tooltips to seeds (initial) seedCircles.append('title') .text(d => `Seed ID: ${{d.seed_id}}\\nRead: ${{d.read_pos}}\\nRef: ${{d.ref_pos}}\\nMax score: ${{d.max_score}}`); @@ -548,12 +621,90 @@ def generate_svg(seeds, transitions, output_path): svg.call(zoom); - // Auto-select the seed with highest max_score + // Function to zoom to fit a set of seed indices + function zoomToFitSeeds(seedIndices) {{ + if (seedIndices.size === 0) return; + + const seedsToFit = seeds.filter(s => seedIndices.has(s.index)); + if (seedsToFit.length === 0) return; + + const xMin = d3.min(seedsToFit, s => s.ref_pos); + const xMax = d3.max(seedsToFit, s => s.ref_pos); + const yMin = d3.min(seedsToFit, s => s.read_pos); + const yMax = d3.max(seedsToFit, s => s.read_pos); + + // Add padding (50% on each side for a more zoomed-out view) + const xPad = (xMax - xMin) * 0.5 || 100; + const yPad = (yMax - yMin) * 0.5 || 100; + + const targetXMin = xMin - xPad; + const targetXMax = xMax + xPad; + const targetYMin = yMin - yPad; + const targetYMax = yMax + yPad; + + const targetXRange = targetXMax - targetXMin; + const targetYRange = targetYMax - targetYMin; + + // Compute scale to fit while maintaining 1:1 aspect ratio + const scaleX = width / targetXRange; + const scaleY = height / targetYRange; + const scale = Math.min(scaleX, scaleY); + + // Compute the center of the target region + const targetXCenter = (targetXMin + targetXMax) / 2; + const targetYCenter = (targetYMin + targetYMax) / 2; + + // Compute translation to center the target region + // The base scales map initialXDomain/initialYDomain to [0,width]/[height,0] + // We need to find the transform that maps our target to fill the view + + // In the base scale, what pixel coords would the target center be at? + const baseCenterX = xScale(targetXCenter); + const baseCenterY = yScale(targetYCenter); + + // We want the target center to be at the view center (width/2, height/2) + // After applying transform: newX = transform.k * baseX + transform.x + // We want: transform.k * baseCenterX + transform.x = width/2 + // And: transform.k * baseCenterY + transform.y = height/2 + + // First compute the scale factor relative to base scale + const baseScaleXRange = initialXDomain[1] - initialXDomain[0]; + const k = baseScaleXRange / targetXRange * (width / width); // simplified: scale relative to base + + // Actually, let's compute it more directly + // The base xScale maps initialXDomain to [0, width] + // We want to map targetX range to [0, width] with scale k + // k = (initialXDomain range) / (targetX range) when both map to same pixel range + const k2 = (initialXDomain[1] - initialXDomain[0]) / targetXRange; + + // Translation: we want targetXCenter to map to width/2 + // xScale(targetXCenter) gives us the pixel position in base coords + // After transform: k * xScale(targetXCenter) + tx = width/2 + const tx = width / 2 - k2 * xScale(targetXCenter); + const ty = height / 2 - k2 * yScale(targetYCenter); + + const transform = d3.zoomIdentity.translate(tx, ty).scale(k2); + svg.call(zoom.transform, transform); + }} + + // Auto-select the seed with highest max_score and zoom to traceback const bestSeed = seeds.reduce((best, s) => (s.max_score > best.max_score ? s : best), seeds[0]); + const bestChainSeedIndices = new Set(); if (bestSeed && bestSeed.max_score > 0) {{ selectedSeed = bestSeed; seedCircles.filter(s => s.index === bestSeed.index).classed('selected', true); showTraceback(bestSeed.index); + + // Store the best chain seed indices permanently + tracebackSeedIndices.forEach(idx => bestChainSeedIndices.add(idx)); + + // Mark seeds on the best chain with persistent styling + seedCircles.classed('on-best-chain', d => bestChainSeedIndices.has(d.index)); + + // Zoom to fit the traceback + if (tracebackSeedIndices.size > 0) {{ + zoomToFitSeeds(tracebackSeedIndices); + }} }} }}); ]]> From c730224f9c142b156677150b0df1e05376295daa Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 14:24:06 -0500 Subject: [PATCH 10/20] Add synthetic code to report type-able seed numbers --- scripts/build-chain-viz.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index 83cfb2b90f..9f4a0b201c 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -110,6 +110,7 @@ def generate_svg(seeds, transitions, output_path): max_score = max_score_by_dest.get(s['seed_id'], 0) seeds_data.append({ 'index': i, + 'seed_num': s['seed_num'], 'read_pos': s['read_pos'], 'ref_pos': s['ref_pos'], 'seed_id': s['seed_id'], @@ -472,7 +473,7 @@ def generate_svg(seeds, transitions, output_path): // Function to update seed tooltip based on current selection function updateSeedTooltip(circle, d) {{ - let tooltipText = `Seed ID: ${{d.seed_id}}\\nRead: ${{d.read_pos}}\\nRef: ${{d.ref_pos}}\\nMax score: ${{d.max_score}}`; + let tooltipText = `Seed #${{d.seed_num}}\\nSeed ID: ${{d.seed_id}}\\nRead: ${{d.read_pos}}\\nRef: ${{d.ref_pos}}\\nMax score: ${{d.max_score}}`; if (selectedSeed && selectedSeed !== d) {{ const trans = getTransitionFromTo(d.index, selectedSeed.index); if (trans) {{ @@ -577,7 +578,7 @@ def generate_svg(seeds, transitions, output_path): // Add SVG title tooltips to seeds (initial) seedCircles.append('title') - .text(d => `Seed ID: ${{d.seed_id}}\\nRead: ${{d.read_pos}}\\nRef: ${{d.ref_pos}}\\nMax score: ${{d.max_score}}`); + .text(d => `Seed #${{d.seed_num}}\\nSeed ID: ${{d.seed_id}}\\nRead: ${{d.read_pos}}\\nRef: ${{d.ref_pos}}\\nMax score: ${{d.max_score}}`); // Zoom behavior with 1:1 aspect ratio constraint const zoom = d3.zoom() From d86fcefc23662c3d28c2fd9d51b6c5d836776d9e Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 14:46:55 -0500 Subject: [PATCH 11/20] Add synthesized code to show negative-score transitions when they exist --- scripts/build-chain-viz.py | 54 ++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index 9f4a0b201c..45fc3429ed 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -92,16 +92,16 @@ def generate_svg(seeds, transitions, output_path): seed_id_to_index = {s['seed_id']: i for i, s in enumerate(seeds)} # Filter transitions to only include those where both endpoints are in our seeds - # and score is positive + # (keep negative scores for hovered-to-selected highlighting) relevant_transitions = [] for t in transitions: - if t['source_id'] in seed_ids and t['dest_id'] in seed_ids and t['score'] > 0: + if t['source_id'] in seed_ids and t['dest_id'] in seed_ids: relevant_transitions.append(t) - # Compute max score per destination + # Compute max score per destination (only from positive scores) max_score_by_dest = defaultdict(lambda: float('-inf')) for t in relevant_transitions: - if t['score'] > max_score_by_dest[t['dest_id']]: + if t['score'] > 0 and t['score'] > max_score_by_dest[t['dest_id']]: max_score_by_dest[t['dest_id']] = t['score'] # Add score info to seeds @@ -352,6 +352,8 @@ def generate_svg(seeds, transitions, output_path): const tracebackLayer = zoomG.append('g').attr('class', 'traceback-layer'); // Layer for dynamically added secondary transitions (on hover) const secondaryTransitionLayer = zoomG.append('g').attr('class', 'secondary-transition-layer'); + // Layer for hovered-to-selected transition highlight + const hoveredToSelectedLayer = zoomG.append('g').attr('class', 'hovered-to-selected-layer'); // Layer for seeds (on top) const seedLayer = zoomG.append('g').attr('class', 'seed-layer'); @@ -442,12 +444,14 @@ def generate_svg(seeds, transitions, output_path): // Function to show all transitions to a seed (by index) on hover function showSecondaryTransitions(seedIndex) {{ const allTrans = transitionsByDestIndex.get(seedIndex) || []; + // Only show positive score transitions in the secondary display + const positiveTrans = allTrans.filter(t => t.score > 0); // Use numeric index for reliable class selection const className = 'secondary-dest-' + seedIndex; const lines = secondaryTransitionLayer.selectAll('.' + className) - .data(allTrans, d => d.source_id + '->' + d.dest_id); + .data(positiveTrans, d => d.source_id + '->' + d.dest_id); lines.enter() .append('line') @@ -496,21 +500,33 @@ def generate_svg(seeds, transitions, output_path): circle.select('title').text(tooltipText); }} - // Function to highlight transition from hovered to selected + // Function to highlight transition from hovered to selected (draws the line directly) function highlightTransitionToSelected(hoveredIndex) {{ if (!selectedSeed || hoveredIndex === selectedSeed.index) return; - // Find and bold the transition line from hovered to selected in secondary layer - secondaryTransitionLayer.selectAll('.secondary-dest-' + selectedSeed.index) - .filter(t => t.source_index === hoveredIndex) - .classed('to-selected', true); + // Find the transition from hovered to selected (including negative scores) + const trans = getTransitionFromTo(hoveredIndex, selectedSeed.index); + if (!trans) return; + + // Draw the transition line in the hovered-to-selected layer + const line = hoveredToSelectedLayer.append('line') + .datum(trans) + .attr('class', 'transition to-selected') + .each(function(d) {{ + const coords = getLineCoords(d, currentXScale, currentYScale); + d3.select(this) + .attr('x1', coords.x1) + .attr('y1', coords.y1) + .attr('x2', coords.x2) + .attr('y2', coords.y2); + }}); + line.append('title') + .text(d => `Score: ${{d.score}}\\nFraction of max: ${{(d.fraction * 100).toFixed(1)}}%`); }} // Function to unhighlight transition from hovered to selected function unhighlightTransitionToSelected(hoveredIndex) {{ - if (!selectedSeed) return; - secondaryTransitionLayer.selectAll('.secondary-dest-' + selectedSeed.index) - .filter(t => t.source_index === hoveredIndex) - .classed('to-selected', false); + // Simply remove all lines from the hovered-to-selected layer + hoveredToSelectedLayer.selectAll('*').remove(); }} // Draw seeds with SVG title tooltips @@ -618,6 +634,16 @@ def generate_svg(seeds, transitions, output_path): .attr('x2', coords.x2) .attr('y2', coords.y2); }}); + + // Update hovered-to-selected transition line + hoveredToSelectedLayer.selectAll('.transition').each(function(d) {{ + const coords = getLineCoords(d, currentXScale, currentYScale); + d3.select(this) + .attr('x1', coords.x1) + .attr('y1', coords.y1) + .attr('x2', coords.x2) + .attr('y2', coords.y2); + }}); }}); svg.call(zoom); From f14d5f5140170721c0c044ed853f4085ccd6a0c2 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 5 Feb 2026 14:53:05 -0500 Subject: [PATCH 12/20] Add autogenerated code to gzip the giant JSONs in the SVGs so they are manageably sized --- scripts/build-chain-viz.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index 45fc3429ed..5b38c21806 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -10,6 +10,8 @@ import glob import json import struct +import gzip +import base64 from collections import defaultdict @@ -135,8 +137,12 @@ def generate_svg(seeds, transitions, output_path): 'is_max': is_max }) - seeds_json = json.dumps(seeds_data) - transitions_json = json.dumps(transitions_data) + # Compress JSON data using gzip and base64 encode for embedding + seeds_json_bytes = json.dumps(seeds_data).encode('utf-8') + transitions_json_bytes = json.dumps(transitions_data).encode('utf-8') + + seeds_compressed = base64.b64encode(gzip.compress(seeds_json_bytes)).decode('ascii') + transitions_compressed = base64.b64encode(gzip.compress(transitions_json_bytes)).decode('ascii') svg_content = f''' @@ -203,9 +209,29 @@ def generate_svg(seeds, transitions, output_path): - -''' + ''' + + svg_content = '\n'.join([ + '', + '', + css, + ' ', + data_script, + js, + '', + '' + ]) with open(output_path, 'w') as f: f.write(svg_content) From c65edbf588656b86ba179dc88e209750348bb7e9 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 6 Feb 2026 14:04:24 -0500 Subject: [PATCH 16/20] Reduce margin on initial load --- scripts/build-chain-viz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index d82b00b2a6..dc0eac5b23 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -668,7 +668,7 @@ def generate_svg(seeds, transitions, output_path): showTraceback(bestSeed.index); const bestChainSeedIndices = new Set(tracebackSeedIndices); seedCircles.classed('on-best-chain', d => bestChainSeedIndices.has(d.index)); - zoomToFit(seeds.filter(s => tracebackSeedIndices.has(s.index)), 0.5); + zoomToFit(seeds.filter(s => tracebackSeedIndices.has(s.index)), 0.05); } else { zoomToFit(seeds, 0.05); } From 6efdce46a5780d8d39304e2188c407b6c74752ec Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 6 Feb 2026 14:39:58 -0500 Subject: [PATCH 17/20] Automatically class-ify the visualization code to try and improve coherence --- scripts/build-chain-viz.py | 808 +++++++++++++++++++++---------------- 1 file changed, 454 insertions(+), 354 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index dc0eac5b23..a8fd1687f0 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -210,245 +210,433 @@ def generate_svg(seeds, transitions, output_path): js = ''' ''' From dc2ebd1276f8d079ee3e396d3858d7a98d47107a Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 6 Feb 2026 14:46:58 -0500 Subject: [PATCH 18/20] Privatize all the methods that can be --- scripts/build-chain-viz.py | 82 +++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/scripts/build-chain-viz.py b/scripts/build-chain-viz.py index a8fd1687f0..0ce4b98f2d 100755 --- a/scripts/build-chain-viz.py +++ b/scripts/build-chain-viz.py @@ -341,7 +341,7 @@ class ChainViz { /// secondary transition display and avoid redundant hide/show. this.hoveredSeed = null; /// Set of seed indices along the current traceback path. Used both - /// for styling and as the seed set for zoomToFit on startup. + /// for styling and as the seed set for #zoomToFit on startup. this.tracebackSeedIndices = new Set(); this.#setupScales(); @@ -382,7 +382,7 @@ class ChainViz { const xMid = (xExtent[0] + xExtent[1]) / 2; const yMid = (yExtent[0] + yExtent[1]) / 2; - /// The domains of the base scales; needed by zoomToFit to compute + /// The domains of the base scales; needed by #zoomToFit to compute /// the zoom transform's scale factor from the ratio of domain widths. this.initialXDomain = [xMid - width * dataPerPixel / 2, xMid + width * dataPerPixel / 2]; this.initialYDomain = [yMid - height * dataPerPixel / 2, yMid + height * dataPerPixel / 2]; @@ -393,7 +393,7 @@ class ChainViz { this.baseYScale = d3.scaleLinear().domain(this.initialYDomain).range([height, 0]); /// Zoom-adjusted scales; updated on every zoom event and read by - /// positionLines and seed repositioning. + /// #positionLines and seed repositioning. this.currentXScale = this.baseXScale; this.currentYScale = this.baseYScale; @@ -495,26 +495,26 @@ class ChainViz { if (viz.selectedSeed !== d) { d3.select(this).classed('hovered', true); } - viz.updateSeedTooltip(d3.select(this), d); - viz.showSecondaryTransitions(d.index); - viz.highlightTransitionToSelected(d.index); + viz.#updateSeedTooltip(d3.select(this), d); + viz.#showSecondaryTransitions(d.index); + viz.#highlightTransitionToSelected(d.index); }) .on('mouseout', function(event, d) { viz.hoveredSeed = null; if (viz.selectedSeed !== d) { d3.select(this).classed('hovered', false); - viz.hideSecondaryTransitions(d.index); + viz.#hideSecondaryTransitions(d.index); } - viz.unhighlightTransitionToSelected(); + viz.#unhighlightTransitionToSelected(); }) .on('click', function(event, d) { const prevSelected = viz.selectedSeed; if (viz.selectedSeed === d) { viz.selectedSeed = null; d3.select(this).classed('selected', false); - viz.hideTraceback(); + viz.#hideTraceback(); if (viz.hoveredSeed !== d) { - viz.hideSecondaryTransitions(d.index); + viz.#hideSecondaryTransitions(d.index); } } else { if (prevSelected) { @@ -522,17 +522,17 @@ class ChainViz { .classed('selected', false) .classed('hovered', false); if (viz.hoveredSeed === null || viz.hoveredSeed.index !== prevSelected.index) { - viz.hideSecondaryTransitions(prevSelected.index); + viz.#hideSecondaryTransitions(prevSelected.index); } } viz.selectedSeed = d; d3.select(this).classed('selected', true); - viz.showTraceback(d.index); - viz.showSecondaryTransitions(d.index); + viz.#showTraceback(d.index); + viz.#showSecondaryTransitions(d.index); } }); - this.seedCircles.append('title').text(d => viz.seedTooltipText(d)); + this.seedCircles.append('title').text(d => viz.#seedTooltipText(d)); } /** @@ -541,7 +541,7 @@ class ChainViz { #setupZoom() { const viz = this; - /// The D3 zoom behavior object; stored so zoomToFit can call + /// The D3 zoom behavior object; stored so #zoomToFit can call /// svg.call(zoom.transform, ...). this.zoom = d3.zoom() .scaleExtent([0.1, 10000]) @@ -553,7 +553,7 @@ class ChainViz { viz.seedCircles .attr('cx', d => viz.currentXScale(d.ref_pos)) .attr('cy', d => viz.currentYScale(d.read_pos)); - viz.zoomG.selectAll('.transition').call(viz.positionLinesFn()); + viz.zoomG.selectAll('.transition').call(viz.#positionLinesFn()); }); this.svg.call(this.zoom); @@ -568,12 +568,12 @@ class ChainViz { if (bestSeed && bestSeed.max_score > 0) { this.selectedSeed = bestSeed; this.seedCircles.filter(s => s.index === bestSeed.index).classed('selected', true); - this.showTraceback(bestSeed.index); + this.#showTraceback(bestSeed.index); const bestChainSeedIndices = new Set(this.tracebackSeedIndices); this.seedCircles.classed('on-best-chain', d => bestChainSeedIndices.has(d.index)); - this.zoomToFit(seeds.filter(s => this.tracebackSeedIndices.has(s.index)), 0.05); + this.#zoomToFit(seeds.filter(s => this.tracebackSeedIndices.has(s.index)), 0.05); } else { - this.zoomToFit(seeds, 0.05); + this.#zoomToFit(seeds, 0.05); } } @@ -584,14 +584,14 @@ class ChainViz { /** * Base tooltip text for a seed. */ - seedTooltipText(d) { + #seedTooltipText(d) { return `Seed #${d.seed_num} (${d.strand})\\nSeed ID: ${d.seed_id}\\nRead: ${d.read_pos}\\nRef: ${d.ref_pos}\\nMax score: ${d.max_score}`; } /** * Tooltip text for a transition line. */ - transitionTooltipText(d) { + #transitionTooltipText(d) { return `Score: ${d.score}\\nFraction of max: ${(d.fraction * 100).toFixed(1)}%`; } @@ -599,7 +599,7 @@ class ChainViz { * Set x1/y1/x2/y2 on a D3 selection of elements using * current scales. */ - positionLines(selection) { + #positionLines(selection) { const viz = this; selection.each(function(d) { const src = viz.data.seedById(d.source_id); @@ -618,9 +618,9 @@ class ChainViz { * Return a closure suitable for D3 selection.call() that positions * elements using the current zoom-adjusted scales. */ - positionLinesFn() { + #positionLinesFn() { const viz = this; - return function(selection) { viz.positionLines(selection); }; + return function(selection) { viz.#positionLines(selection); }; } // ----------------------------------------------------------- @@ -630,7 +630,7 @@ class ChainViz { /** * Follow best-scoring transitions backward from seedIndex. */ - computeTraceback(seedIndex) { + #computeTraceback(seedIndex) { const path = []; const visited = new Set(); let currentIndex = seedIndex; @@ -650,8 +650,8 @@ class ChainViz { /** * Display the traceback path from seedIndex as red lines. */ - showTraceback(seedIndex) { - const path = this.computeTraceback(seedIndex); + #showTraceback(seedIndex) { + const path = this.#computeTraceback(seedIndex); this.tracebackSeedIndices.clear(); path.forEach(t => { this.tracebackSeedIndices.add(t.source_index); @@ -667,14 +667,14 @@ class ChainViz { lines.enter() .append('line') .attr('class', 'transition traceback') - .call(this.positionLinesFn()) + .call(this.#positionLinesFn()) .append('title') - .text(d => this.transitionTooltipText(d)); + .text(d => this.#transitionTooltipText(d)); lines.exit().remove(); } - hideTraceback() { + #hideTraceback() { this.tracebackLayer.selectAll('.traceback').remove(); this.tracebackSeedIndices.clear(); this.seedCircles.classed('on-traceback', false); @@ -683,7 +683,7 @@ class ChainViz { /** * Show positive-score transitions arriving at seedIndex as colored lines. */ - showSecondaryTransitions(seedIndex) { + #showSecondaryTransitions(seedIndex) { const allTrans = this.data.transitionsTo(seedIndex); const positiveTrans = allTrans.filter(t => t.score > 0); const className = 'secondary-dest-' + seedIndex; @@ -693,20 +693,20 @@ class ChainViz { .enter() .append('line') .attr('class', 'transition secondary ' + className) - .call(this.positionLinesFn()) + .call(this.#positionLinesFn()) .attr('stroke', d => d.is_max ? 'red' : this.colorScale(d.fraction)) .append('title') - .text(d => this.transitionTooltipText(d)); + .text(d => this.#transitionTooltipText(d)); } - hideSecondaryTransitions(seedIndex) { + #hideSecondaryTransitions(seedIndex) { this.secondaryTransitionLayer.selectAll('.secondary-dest-' + seedIndex).remove(); } /** * Draw a thick black line from hoveredIndex to the selected seed. */ - highlightTransitionToSelected(hoveredIndex) { + #highlightTransitionToSelected(hoveredIndex) { if (!this.selectedSeed || hoveredIndex === this.selectedSeed.index) return; const trans = this.data.transitionFromTo(hoveredIndex, this.selectedSeed.index); if (!trans) return; @@ -714,20 +714,20 @@ class ChainViz { this.hoveredToSelectedLayer.append('line') .datum(trans) .attr('class', 'transition to-selected') - .call(this.positionLinesFn()) + .call(this.#positionLinesFn()) .append('title') - .text(d => this.transitionTooltipText(d)); + .text(d => this.#transitionTooltipText(d)); } - unhighlightTransitionToSelected() { + #unhighlightTransitionToSelected() { this.hoveredToSelectedLayer.selectAll('*').remove(); } /** * Rebuild a seed's tooltip, adding transition info when a seed is selected. */ - updateSeedTooltip(circle, d) { - let text = this.seedTooltipText(d); + #updateSeedTooltip(circle, d) { + let text = this.#seedTooltipText(d); if (this.selectedSeed && this.selectedSeed !== d) { const trans = this.data.transitionFromTo(d.index, this.selectedSeed.index); if (trans) { @@ -750,7 +750,7 @@ class ChainViz { /** * Zoom to fit the given seeds with padding as a fraction of data range. */ - zoomToFit(seedsToFit, padding) { + #zoomToFit(seedsToFit, padding) { if (seedsToFit.length === 0) return; const [xMin, xMax] = d3.extent(seedsToFit, s => s.ref_pos); const [yMin, yMax] = d3.extent(seedsToFit, s => s.read_pos); From bbf07badefa02fe47ada2fdf0a5555fe94602115 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 10 Feb 2026 12:29:31 -0500 Subject: [PATCH 19/20] Turn off chaining debugging --- src/algorithms/chain_items.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algorithms/chain_items.cpp b/src/algorithms/chain_items.cpp index 5182c91ae0..63e168e38f 100644 --- a/src/algorithms/chain_items.cpp +++ b/src/algorithms/chain_items.cpp @@ -11,7 +11,7 @@ #include #include -#define debug_chaining +//#define debug_chaining //#define debug_transition namespace vg { From 778fc22e9be26f9a0600ee502d90d78082b830ac Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 10 Feb 2026 12:41:50 -0500 Subject: [PATCH 20/20] Adjust test to allow for new explanation files --- test/t/50_vg_giraffe.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/t/50_vg_giraffe.t b/test/t/50_vg_giraffe.t index 47747b0887..156976000d 100644 --- a/test/t/50_vg_giraffe.t +++ b/test/t/50_vg_giraffe.t @@ -126,7 +126,7 @@ rm -f read.fq read.gam vg giraffe -Z x.giraffe.gbz -f reads/small.middle.ref.indel.multi.fq --show-work --track-position -b chaining-sr > /dev/null 2>&1 # Check that at least some TSV files and directories were created -is "$(find explanation_read1 -name '*.tsv' 2>/dev/null | wc -l | tr -d ' ')" "1" "Chain explanation files are created per chain" +is "$(find explanation_read1 -name 'chain*-dotplot*.tsv' 2>/dev/null | wc -l | tr -d ' ')" "1" "Chain explanation files are created per chain" is "$(ls -d explanation_* 2>/dev/null | wc -l | tr -d ' ')" "3" "Explanation directories are created per read" rm -rf explanation_*