From a4c2d61a35ab7f26d791aee04a844a0478478497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 09:09:56 +0200 Subject: [PATCH 1/9] Update kbo to v0.5.0. --- Cargo.toml | 2 +- src/main.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e98a7ed..ba4d450 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ path = "src/main.rs" ## core clap = { version = "4", features = ["derive"]} chrono = "0.4.40" -kbo = "0.4.1" +kbo = "0.5.0" log = "0.4.20" needletail = { version = "0.6.0", default-features = false, features = ["flate2"] } noodles-vcf = "0.49" diff --git a/src/main.rs b/src/main.rs index 1da3041..c0cacbe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -98,7 +98,7 @@ fn main() { }) => { init_log(if *verbose { 2 } else { 1 }); - let mut sbwt_build_options = kbo::index::BuildOpts::default(); + let mut sbwt_build_options = kbo::BuildOpts::default(); sbwt_build_options.k = *kmer_size; sbwt_build_options.num_threads = *num_threads; sbwt_build_options.prefix_precalc = *prefix_precalc; @@ -137,7 +137,7 @@ fn main() { verbose, }) => { init_log(if *verbose { 2 } else { 1 }); - let mut sbwt_build_options = kbo::index::BuildOpts::default(); + let mut sbwt_build_options = kbo::BuildOpts::default(); sbwt_build_options.k = *kmer_size; sbwt_build_options.num_threads = *num_threads; sbwt_build_options.prefix_precalc = *prefix_precalc; @@ -256,7 +256,7 @@ fn main() { }) => { init_log(if *verbose { 2 } else { 1 }); - let mut sbwt_build_options = kbo::index::BuildOpts::default(); + let mut sbwt_build_options = kbo::BuildOpts::default(); // These are required for the subcommand to work correctly sbwt_build_options.add_revcomp = true; sbwt_build_options.build_select = true; @@ -315,7 +315,7 @@ fn main() { verbose, }) => { init_log(if *verbose { 2 } else { 1 }); - let mut sbwt_build_options = kbo::index::BuildOpts::default(); + let mut sbwt_build_options = kbo::BuildOpts::default(); // These are required for the subcommand to work correctly sbwt_build_options.add_revcomp = true; sbwt_build_options.build_select = true; From c5906136720bc2d48a18c686a1866de671198714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 09:19:07 +0200 Subject: [PATCH 2/9] Update default parameters for call. --- src/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli.rs b/src/cli.rs index 63f0d68..45a049b 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -74,7 +74,7 @@ pub enum Commands { ref_file: PathBuf, // Upper bound for random match probability - #[arg(long = "max-error-prob", default_value_t = 0.0000001, help_heading = "Algorithm")] + #[arg(long = "max-error-prob", default_value_t = 0.00000001, help_heading = "Algorithm")] max_error_prob: f64, // Resources From 530ade154948aff8d5183c3186a5bb3fe9a4e8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 10:01:37 +0200 Subject: [PATCH 3/9] Implement --input-list in build. --- Cargo.toml | 1 + src/cli.rs | 5 ++- src/main.rs | 94 ++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 70 insertions(+), 30 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ba4d450..f2f5c99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ path = "src/main.rs" ## core clap = { version = "4", features = ["derive"]} chrono = "0.4.40" +csv = "1.3.0" kbo = "0.5.0" log = "0.4.20" needletail = { version = "0.6.0", default-features = false, features = ["flate2"] } diff --git a/src/cli.rs b/src/cli.rs index 45a049b..771870e 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -31,8 +31,11 @@ pub enum Commands { #[arg(group = "input", required = true)] seq_files: Vec, + #[arg(short = 'l', long = "input-list", group = "input", required = true, help_heading = "Input")] + input_list: Option, + // Outputs - #[arg(short = 'o', long = "output-prefix", required = false, help_heading = "Output")] + #[arg(short = 'o', long = "output-prefix", required = false, required = true, help_heading = "Output")] output_prefix: Option, // Build parameters diff --git a/src/main.rs b/src/main.rs index c0cacbe..fce88a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ // at your option. // use std::io::Write; +use std::path::PathBuf; use clap::Parser; use log::info; @@ -52,6 +53,33 @@ fn read_fastx_file( seq_data } +fn read_input_list( + input_list_file: &String, + delimiter: u8 +) -> Vec<(String, PathBuf)> { + let fs = match std::fs::File::open(input_list_file) { + Ok(fs) => fs, + Err(e) => panic!(" Error in reading --input-list: {}", e), + }; + + let mut reader = csv::ReaderBuilder::new() + .delimiter(delimiter) + .has_headers(false) + .from_reader(fs); + + reader.records().map(|line| { + if let Ok(record) = line { + if record.len() > 1 { + (record[0].to_string(), PathBuf::from(record[1].to_string())) + } else { + (record[0].to_string(), PathBuf::from(record[0].to_string())) + } + } else { + panic!(" Error in reading --input-list: {}", input_list_file); + } + }).collect::>() +} + /// Initializes the logger with verbosity given in `log_max_level`. fn init_log(log_max_level: usize) { stderrlog::new() @@ -86,39 +114,47 @@ fn main() { // Subcommands: match &cli.command { Some(cli::Commands::Build { - seq_files, - output_prefix, + seq_files, + input_list, + output_prefix, kmer_size, - prefix_precalc, - dedup_batches, - num_threads, - mem_gb, - temp_dir, - verbose, + prefix_precalc, + dedup_batches, + num_threads, + mem_gb, + temp_dir, + verbose, }) => { - init_log(if *verbose { 2 } else { 1 }); + init_log(if *verbose { 2 } else { 1 }); let mut sbwt_build_options = kbo::BuildOpts::default(); - sbwt_build_options.k = *kmer_size; - sbwt_build_options.num_threads = *num_threads; - sbwt_build_options.prefix_precalc = *prefix_precalc; - sbwt_build_options.dedup_batches = *dedup_batches; - sbwt_build_options.mem_gb = *mem_gb; - sbwt_build_options.temp_dir = temp_dir.clone(); - - info!("Building SBWT index from {} files...", seq_files.len()); - let mut seq_data: Vec> = Vec::new(); - seq_files.iter().for_each(|file| { - seq_data.append(&mut read_fastx_file(file).into_iter().map(|(_, seq)| seq).collect::>>()); - }); - - let (sbwt, lcs) = kbo::build(&seq_data, sbwt_build_options); - - info!("Serializing SBWT index to {}.sbwt ...", output_prefix.as_ref().unwrap()); - info!("Serializing LCS array to {}.lcs ...", output_prefix.as_ref().unwrap()); - kbo::index::serialize_sbwt(output_prefix.as_ref().unwrap(), &sbwt, &lcs); - - }, + sbwt_build_options.k = *kmer_size; + sbwt_build_options.num_threads = *num_threads; + sbwt_build_options.prefix_precalc = *prefix_precalc; + sbwt_build_options.dedup_batches = *dedup_batches; + sbwt_build_options.mem_gb = *mem_gb; + sbwt_build_options.temp_dir = temp_dir.clone(); + + let mut in_files = seq_files.clone(); + if let Some(list) = input_list { + let contents = read_input_list(list, b'\t'); + let contents_iter = contents.iter().map(|(_, path)| path.to_str().unwrap().to_string()); + in_files.extend(contents_iter); + } + + info!("Building SBWT index from {} files...", in_files.len()); + let mut seq_data: Vec> = Vec::new(); + in_files.iter().for_each(|file| { + seq_data.append(&mut read_fastx_file(file).into_iter().map(|(_, seq)| seq).collect::>>()); + }); + + let (sbwt, lcs) = kbo::build(&seq_data, sbwt_build_options); + + info!("Serializing SBWT index to {}.sbwt ...", output_prefix.as_ref().unwrap()); + info!("Serializing LCS array to {}.lcs ...", output_prefix.as_ref().unwrap()); + kbo::index::serialize_sbwt(output_prefix.as_ref().unwrap(), &sbwt, &lcs); + + }, Some(cli::Commands::Find { query_files, From 9bd5eadfb5da503759e9a0e25feb6eae5fcee63f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 10:03:45 +0200 Subject: [PATCH 4/9] Always require -o in build. --- src/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli.rs b/src/cli.rs index 771870e..c46c65c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -35,7 +35,7 @@ pub enum Commands { input_list: Option, // Outputs - #[arg(short = 'o', long = "output-prefix", required = false, required = true, help_heading = "Output")] + #[arg(short = 'o', long = "output-prefix", required = true, help_heading = "Output")] output_prefix: Option, // Build parameters From 630d17fda986be3ab37aeab2d4e16bc897d4261f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 10:23:24 +0200 Subject: [PATCH 5/9] Implement -o/--output in call. --- src/cli.rs | 10 ++++++--- src/main.rs | 63 ++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index c46c65c..f02d0c0 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -68,14 +68,18 @@ pub enum Commands { // Call variants in query relative to a reference Call{ - // Input fasta or fastq query file(s) + // Inputs + // // Input fasta or fastq query file(s) #[arg(group = "input", required = true)] query_file: PathBuf, - - // Reference fasta or fastq file + // // Reference fasta or fastq file #[arg(long = "reference", short = 'r', required = true, help_heading = "Input")] ref_file: PathBuf, + // Outputs + #[arg(short = 'o', long = "output", required = false, help_heading = "Output")] + output_file: Option, + // Upper bound for random match probability #[arg(long = "max-error-prob", default_value_t = 0.00000001, help_heading = "Algorithm")] max_error_prob: f64, diff --git a/src/main.rs b/src/main.rs index fce88a3..8e65ab9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -283,6 +283,7 @@ fn main() { Some(cli::Commands::Call{ query_file, ref_file, + output_file, max_error_prob, num_threads, kmer_size, @@ -314,28 +315,54 @@ fn main() { let ref_data: Vec<(String, Vec)> = read_fastx_file(ref_file.to_str().unwrap()); - let stdout = std::io::stdout(); - let query_data: Vec> = read_fastx_file(query_file.to_str().unwrap()).iter().map(|(_, seq)| seq.clone()).collect(); let (sbwt_query, lcs_query) = kbo::index::build_sbwt_from_vecs(&query_data, &Some(sbwt_build_options.clone())); // Will map separately against each contig in the reference - let vcf_header = vcf_writer::write_vcf_header(&mut stdout.lock(), ref_file.to_str().unwrap(), - &ref_data.iter().map(|(contig_name, contig_seq)| { - (contig_name.clone(), contig_seq.len()) - }).collect::>()) - .expect("Write header to .vcf file"); - - ref_data.iter().for_each(|(ref_contig_header, ref_seq)| { - let calls = kbo::call(&sbwt_query, &lcs_query, ref_seq, call_opts.clone()); - vcf_writer::write_vcf_contents( - &mut stdout.lock(), - &vcf_header, - &calls, - ref_seq, - ref_contig_header, - ).expect("Wrote .vcf record"); - }); + if output_file.is_some() { + // this is dumb, surely there is a way to pass stdout or the file? + let mut ofs = match std::fs::File::create(output_file.as_ref().unwrap()) { + Ok(file) => file, + Err(e) => panic!(" Error in opening --output: {}", e), + }; + + let vcf_header = vcf_writer::write_vcf_header(&mut ofs, + ref_file.to_str().unwrap(), + &ref_data.iter().map(|(contig_name, contig_seq)| { + (contig_name.clone(), contig_seq.len()) + }).collect::>()) + .expect("Write header to .vcf file"); + + ref_data.iter().for_each(|(ref_contig_header, ref_seq)| { + let calls = kbo::call(&sbwt_query, &lcs_query, ref_seq, call_opts.clone()); + vcf_writer::write_vcf_contents( + &mut ofs, + &vcf_header, + &calls, + ref_seq, + ref_contig_header, + ).expect("Wrote .vcf record"); + }); + } else { + let stdout = std::io::stdout(); + let vcf_header = vcf_writer::write_vcf_header(&mut stdout.lock(), + ref_file.to_str().unwrap(), + &ref_data.iter().map(|(contig_name, contig_seq)| { + (contig_name.clone(), contig_seq.len()) + }).collect::>()) + .expect("Write header to .vcf file"); + + ref_data.iter().for_each(|(ref_contig_header, ref_seq)| { + let calls = kbo::call(&sbwt_query, &lcs_query, ref_seq, call_opts.clone()); + vcf_writer::write_vcf_contents( + &mut stdout.lock(), + &vcf_header, + &calls, + ref_seq, + ref_contig_header, + ).expect("Wrote .vcf record"); + }); + } } Some(cli::Commands::Map { From 84eccbc55c7f1824a0629484eec2c65006c82c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 10:32:36 +0200 Subject: [PATCH 6/9] Implement -l/--input-list in map. --- src/cli.rs | 3 +++ src/main.rs | 13 ++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index f02d0c0..fa4d1e2 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -167,6 +167,9 @@ pub enum Commands { query_files: Vec, // Input options + // // Input list + #[arg(short = 'l', long = "input-list", group = "input", required = true, help_heading = "Input")] + input_list: Option, // // Reference file #[arg(short = 'r', long = "reference", required = true, help_heading = "Input")] ref_file: String, diff --git a/src/main.rs b/src/main.rs index 8e65ab9..f0bc823 100644 --- a/src/main.rs +++ b/src/main.rs @@ -368,6 +368,7 @@ fn main() { Some(cli::Commands::Map { query_files, ref_file, + input_list, max_error_prob, num_threads, kmer_size, @@ -400,13 +401,19 @@ fn main() { .build() .unwrap(); + let mut in_files: Vec<(String, PathBuf)> = query_files.iter().map(|file| (file.clone(), PathBuf::from(file))).collect(); + if let Some(list) = input_list { + let mut contents = read_input_list(list, b'\t'); + in_files.append(&mut contents); + } + let ref_data: Vec<(String, Vec)> = read_fastx_file(ref_file); let stdout = std::io::stdout(); let mut first_write = true; - query_files.iter().for_each(|query_file| { - let contigs: Vec> = read_fastx_file(query_file).iter().map(|(_, seq)| seq.clone()).collect(); + in_files.iter().for_each(|(file, path)| { + let contigs: Vec> = read_fastx_file(path.to_str().unwrap()).iter().map(|(_, seq)| seq.clone()).collect(); let (sbwt, lcs) = kbo::index::build_sbwt_from_vecs(&contigs, &Some(sbwt_build_options.clone())); let mut res: Vec = Vec::new(); @@ -419,7 +426,7 @@ fn main() { first_write = false; } let _ = writeln!(&mut stdout.lock(), - ">{}\n{}", query_file, std::str::from_utf8(&res).expect("UTF-8")); + ">{}\n{}", file, std::str::from_utf8(&res).expect("UTF-8")); }); }, None => {} From 6fff3e830e81eef8689de3eb3011925b7dab0c86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 10:41:03 +0200 Subject: [PATCH 7/9] Implement -o/--output in map. --- src/cli.rs | 5 +++++ src/main.rs | 35 +++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index fa4d1e2..7ceaf67 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -174,6 +174,11 @@ pub enum Commands { #[arg(short = 'r', long = "reference", required = true, help_heading = "Input")] ref_file: String, + // Output options + // // Output file + #[arg(short = 'o', long = "output", required = false, help_heading = "Output")] + output_file: Option, + // Parameters // // Upper bound for random match probability #[arg(long = "max-error-prob", default_value_t = 0.0000001, help_heading = "Algorithm")] diff --git a/src/main.rs b/src/main.rs index f0bc823..b08d6c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -369,6 +369,7 @@ fn main() { query_files, ref_file, input_list, + output_file, max_error_prob, num_threads, kmer_size, @@ -409,7 +410,15 @@ fn main() { let ref_data: Vec<(String, Vec)> = read_fastx_file(ref_file); - let stdout = std::io::stdout(); + let ofs = if output_file.is_some() { + let ofs = match std::fs::File::create(output_file.as_ref().unwrap()) { + Ok(file) => file, + Err(e) => panic!(" Error in opening --output: {}", e), + }; + Some(ofs) + } else { + None + }; let mut first_write = true; in_files.iter().for_each(|(file, path)| { @@ -420,13 +429,27 @@ fn main() { ref_data.iter().for_each(|(_, ref_seq)| { res.append(&mut kbo::map(ref_seq, &sbwt, &lcs, map_opts.clone())); }); - if first_write { + + if ofs.is_some() { + + if first_write { + let _ = writeln!(&mut ofs.as_ref().unwrap(), + ">{}\n{}", ref_file, std::str::from_utf8(&ref_data.iter().flat_map(|x| x.1.clone()).collect::>()).expect("UTF-8")); + first_write = false; + } + let _ = writeln!(&mut ofs.as_ref().unwrap(), + ">{}\n{}", file, std::str::from_utf8(&res).expect("UTF-8")); + } else { + let stdout = std::io::stdout(); + + if first_write { + let _ = writeln!(&mut stdout.lock(), + ">{}\n{}", ref_file, std::str::from_utf8(&ref_data.iter().flat_map(|x| x.1.clone()).collect::>()).expect("UTF-8")); + first_write = false; + } let _ = writeln!(&mut stdout.lock(), - ">{}\n{}", ref_file, std::str::from_utf8(&ref_data.iter().flat_map(|x| x.1.clone()).collect::>()).expect("UTF-8")); - first_write = false; + ">{}\n{}", file, std::str::from_utf8(&res).expect("UTF-8")); } - let _ = writeln!(&mut stdout.lock(), - ">{}\n{}", file, std::str::from_utf8(&res).expect("UTF-8")); }); }, None => {} From 6c5f5d0bc90dedfba87ed04bcf2b159b619a31f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 10:45:35 +0200 Subject: [PATCH 8/9] Implement -l/--input-list in find. --- src/cli.rs | 6 +++++- src/main.rs | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 7ceaf67..ef4b11c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -106,9 +106,13 @@ pub enum Commands { // Find indexed k-mers in a query Find { - // Input fasta or fastq query file(s) + // Inputs + // // Input fasta or fastq query file(s) #[arg(group = "input", required = true)] query_files: Vec, + // // Input list + #[arg(short = 'l', long = "input-list", group = "input", required = true, help_heading = "Input")] + input_list: Option, // Reference // // Sequence file diff --git a/src/main.rs b/src/main.rs index b08d6c6..0b4f7fc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -158,6 +158,7 @@ fn main() { Some(cli::Commands::Find { query_files, + input_list, ref_file, index_prefix, detailed, @@ -223,12 +224,19 @@ fn main() { .build() .unwrap(); + + let mut in_files: Vec<(String, PathBuf)> = query_files.iter().map(|file| (file.clone(), PathBuf::from(file))).collect(); + if let Some(list) = input_list { + let mut contents = read_input_list(list, b'\t'); + in_files.append(&mut contents); + } + info!("Querying SBWT index..."); println!("query\tref\tq.start\tq.end\tstrand\tlength\tmismatches\tgap_bases\tgap_opens\tidentity\tcoverage\tquery.contig\tref.contig"); let stdout = std::io::stdout(); - query_files.iter().for_each(|file| { + in_files.iter().for_each(|(file, path)| { let mut run_lengths: Vec<(kbo::format::RLE, char, String, String, usize, usize)> = indexes.par_iter().map(|((sbwt, lcs), ref_contig, ref_bases)| { - let mut reader = needletail::parse_fastx_file(file).expect("valid path/file"); + let mut reader = needletail::parse_fastx_file(path).ok().unwrap(); let mut res: Vec<(kbo::format::RLE, char, String, String, usize, usize)> = Vec::new(); while let Some(seqrec) = read_from_fastx_parser(&mut *reader) { let query_contig = std::str::from_utf8(seqrec.id()).expect("UTF-8"); From 4bbed5f784c6656e295c71069ebaea2d7c423fbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 25 Mar 2025 10:52:34 +0200 Subject: [PATCH 9/9] Implement -o/--output in find. --- src/cli.rs | 5 +++++ src/main.rs | 65 ++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index ef4b11c..01d3f5a 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -114,6 +114,11 @@ pub enum Commands { #[arg(short = 'l', long = "input-list", group = "input", required = true, help_heading = "Input")] input_list: Option, + // Output options + // // Output file + #[arg(short = 'o', long = "output", required = false, help_heading = "Output")] + output_file: Option, + // Reference // // Sequence file #[arg(short = 'r', long = "reference", group = "reference", help_heading = "Input")] diff --git a/src/main.rs b/src/main.rs index 0b4f7fc..330ad13 100644 --- a/src/main.rs +++ b/src/main.rs @@ -160,6 +160,7 @@ fn main() { query_files, input_list, ref_file, + output_file, index_prefix, detailed, min_len, @@ -231,9 +232,23 @@ fn main() { in_files.append(&mut contents); } + let mut ofs = if output_file.is_some() { + let ofs = match std::fs::File::create(output_file.as_ref().unwrap()) { + Ok(file) => file, + Err(e) => panic!(" Error in opening --output: {}", e), + }; + Some(ofs) + } else { + None + }; + info!("Querying SBWT index..."); - println!("query\tref\tq.start\tq.end\tstrand\tlength\tmismatches\tgap_bases\tgap_opens\tidentity\tcoverage\tquery.contig\tref.contig"); - let stdout = std::io::stdout(); + if let Some(ofs) = &mut ofs { + let _ = ofs.write(b"query\tref\tq.start\tq.end\tstrand\tlength\tmismatches\tgap_bases\tgap_opens\tidentity\tcoverage\tquery.contig\tref.contig"); + } else { + let stdout = std::io::stdout(); + let _ = stdout.lock().write(b"query\tref\tq.start\tq.end\tstrand\tlength\tmismatches\tgap_bases\tgap_opens\tidentity\tcoverage\tquery.contig\tref.contig"); + } in_files.iter().for_each(|(file, path)| { let mut run_lengths: Vec<(kbo::format::RLE, char, String, String, usize, usize)> = indexes.par_iter().map(|((sbwt, lcs), ref_contig, ref_bases)| { let mut reader = needletail::parse_fastx_file(path).ok().unwrap(); @@ -271,20 +286,38 @@ fn main() { let aln_end = if *strand == '+' { aln.end } else { query_bases - aln.start }; let coverage = (aln.matches as f64 + aln.mismatches as f64)/(*ref_bases as f64) * 100_f64; let identity = (aln.matches as f64)/(aln_len as f64) * 100_f64; - let _ = writeln!(&mut stdout.lock(), - "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{:.2}\t{:.2}\t{}\t{}", - file, ref_file.clone().unwrap(), - aln_start, - aln_end, - strand, - aln.end - aln.start, - aln.mismatches, - aln.gap_bases, - aln.gap_opens, - identity, - coverage, - query_contig, - ref_contig); + if ofs.is_some() { + let _ = writeln!(&mut ofs.as_ref().unwrap(), + "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{:.2}\t{:.2}\t{}\t{}", + file, ref_file.clone().unwrap(), + aln_start, + aln_end, + strand, + aln.end - aln.start, + aln.mismatches, + aln.gap_bases, + aln.gap_opens, + identity, + coverage, + query_contig, + ref_contig); + } else { + let stdout = std::io::stdout(); + let _ = writeln!(&mut stdout.lock(), + "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{:.2}\t{:.2}\t{}\t{}", + file, ref_file.clone().unwrap(), + aln_start, + aln_end, + strand, + aln.end - aln.start, + aln.mismatches, + aln.gap_bases, + aln.gap_opens, + identity, + coverage, + query_contig, + ref_contig); + } }); }); },