From fa0162d0ac07a8ef862c7c30839931fc9bf455cf Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 10:46:36 +0000 Subject: [PATCH 01/31] fastq_shortreads_preprocess_qc init --- .../fastq_shortreads_preprocess_qc/main.nf | 156 ++++++++++++++++++ .../fastq_shortreads_preprocess_qc/meta.yml | 51 ++++++ .../tests/main.nf.test | 46 ++++++ 3 files changed, 253 insertions(+) create mode 100644 subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf create mode 100644 subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml create mode 100644 subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf new file mode 100644 index 000000000000..ec09981c36bf --- /dev/null +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -0,0 +1,156 @@ +// statistics +include { FASTQ_GENERATE_STATISTICS as PRE_STATS } from '../fastq_generate_statistics/main' +include { FASTQ_GENERATE_STATISTICS as POST_STATS } from '../fastq_generate_statistics/main' +// preprocessing +include { FASTQ_PREPROCESS_SEQKIT } from '../fastq_preprocess_seqkit/main' +// barcoding +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +// adapter removal and merging +// include { FASTQ_REMOVEADAPTERS_MERGE } from '../fastq_removeadapters_merge/main' +// complexity filtering +include { PRINSEQPLUSPLUS } from '../../../modules/nf-core/prinseqplusplus/main' +// deduplication +include { BBMAP_CLUMPIFY } from '../../../modules/nf-core/bbmap/clumpify/main' +// host decontamination +include { FASTQ_DECONTAMINATE_DEACON_HOSTILE } from '../fastq_decontaminate_deacon_hostile/main' +// final concatenation +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' + +workflow FASTQ_SHORTREADS_PREPROCESS_QC { + + take: + ch_reads // channel: [ val(meta), [ fastq ] ] + // statistics + skip_fastqc // boolean + skip_seqfu_check // boolean + skip_seqfu_stats // boolean + skip_seqkit_stats // boolean + skip_seqtk_comp // boolean + // preprocessing + skip_seqkit_sana_pair // boolean + skip_seqkit_seq // boolean + skip_seqkit_replace // boolean + skip_seqkit_rmdup // boolean + // barcoding + skip_umitools_extract // boolean + umi_discard_read // integer: 0, 1 or 2 + // adapter removal and merging + // skip_adapterremoval // boolean + // complexity filtering + skip_prinseqplusplus // boolean + // deduplication + skip_bbmap_clumpify // boolean + // host decontamination + skip_decontamination // boolean + ch_fasta // channel: [ val(meta), [ fasta ] ] (optional) + ch_reference // channel: [ val(reference_name), path(reference_dir) ] (optional) + index_name // val (optional) + decontaminator // string (enum): 'hostile' or 'deacon' + // final concatenation + skip_cat_fastq // boolean + + main: + + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + // pre-statistics + PRE_STATS ( + ch_reads, + skip_fastqc, + skip_seqfu_check, + skip_seqfu_stats, + skip_seqkit_stats, + skip_seqtk_comp + ) + ch_versions = ch_versions.mix(PRE_STATS.out.versions) + + // preprocessing + FASTQ_PREPROCESS_SEQKIT ( + ch_reads, + skip_seqkit_sana_pair, + skip_seqkit_seq, + skip_seqkit_replace, + skip_seqkit_rmdup + ) + ch_versions = ch_versions.mix(FASTQ_PREPROCESS_SEQKIT.out.versions) + + // barcoding + umi_reads = FASTQ_PREPROCESS_SEQKIT.out.reads + umi_log = channel.empty() + if (!skip_umitools_extract) { + UMITOOLS_EXTRACT( ch_reads ) + umi_reads = UMITOOLS_EXTRACT.out.reads + umi_log = UMITOOLS_EXTRACT.out.log + ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) + + // Discard R1 / R2 if required + if (umi_discard_read in [1, 2]) { + UMITOOLS_EXTRACT.out.reads + .map { meta, reads -> + meta.single_end ? [meta, reads] : [meta + ['single_end': true], reads[umi_discard_read % 2]] + } + .set { umi_reads } + } + + ch_reads = umi_reads + } + + // adapter removal and merging + // TODO + // if (!skip_adapterremoval) { + + // } + + // complexity filtering + // TODO + // if (!skip_complexity_filtering) { + // PRINSEQPLUSPLUS( ... ) + // ch_versions = ch_versions.mix(PRINSEQPLUSPLUS.out.versions.first()) + // } + + // deduplication + // TODO + // if (!skip_deduplication) { + // BBMAP_CLUMPIFY( ... ) + // ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions.first()) + // } + + // host decontamination + if (!skip_decontamination) { + FASTQ_DECONTAMINATE_DEACON_HOSTILE ( + ch_reads, + ch_fasta, + ch_reference, + index_name, + decontaminator + ) + ch_versions = ch_versions.mix(FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.versions) + + ch_reads = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.fastq_filtered + } + + + // final concatenation + // TODO + // if (!skip_final_concatenation) { + // CAT_FASTQ( ... ) + // ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + // } + + // post-statistics + POST_STATS ( + ch_reads, + skip_fastqc, + skip_seqfu_check, + skip_seqfu_stats, + skip_seqkit_stats, + skip_seqtk_comp + ) + ch_versions = ch_versions.mix(POST_STATS.out.versions) + + emit: + reads = ch_reads // channel: [ val(meta), [ fastq ] ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files +} diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml new file mode 100644 index 000000000000..2f1adca07f65 --- /dev/null +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_shortreads_preprocess_qc" +## TODO nf-core: Add a description of the subworkflow and list keywords +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow +components: + - samtools/sort + - samtools/index +## TODO nf-core: List all of the channels used as input with a description and their structure +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" +## TODO nf-core: List all of the channels used as output with a descriptions and their structure +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@vagkaratzas" +maintainers: + - "@vagkaratzas" diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test new file mode 100644 index 000000000000..6ed640f1c855 --- /dev/null +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -0,0 +1,46 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core subworkflows test fastq_shortreads_preprocess_qc +nextflow_workflow { + + name "Test Subworkflow FASTQ_SHORTREADS_PREPROCESS_QC" + script "../main.nf" + workflow "FASTQ_SHORTREADS_PREPROCESS_QC" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_shortreads_preprocess_qc" + // TODO nf-core: Add tags for all modules used within this subworkflow. Example: + tag "samtools" + tag "samtools/sort" + tag "samtools/index" + + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam - single_end") { + + when { + workflow { + """ + // TODO nf-core: define inputs of the workflow here. Example: + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + then { + assert workflow.success + assertAll( + { assert snapshot( + workflow.out + //TODO nf-core: Add all required assertions to verify the test output. + ).match() } + ) + } + } +} From c363af7fb29949a41b937d741ec8c6fa39ca6004 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 11:07:57 +0000 Subject: [PATCH 02/31] skip all nf-test init --- .../fastq_shortreads_preprocess_qc/main.nf | 21 ++++- .../tests/main.nf.test | 81 ++++++++++++++----- 2 files changed, 81 insertions(+), 21 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index ec09981c36bf..46af0497252f 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -75,8 +75,10 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { ) ch_versions = ch_versions.mix(FASTQ_PREPROCESS_SEQKIT.out.versions) + ch_reads = FASTQ_PREPROCESS_SEQKIT.out.reads + // barcoding - umi_reads = FASTQ_PREPROCESS_SEQKIT.out.reads + umi_reads = ch_reads umi_log = channel.empty() if (!skip_umitools_extract) { UMITOOLS_EXTRACT( ch_reads ) @@ -151,6 +153,23 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { emit: reads = ch_reads // channel: [ val(meta), [ fastq ] ] + + // statistics + pre_stats_fastqc_html = PRE_STATS.out.fastqc_html + pre_stats_fastqc_zip = PRE_STATS.out.fastqc_zip + pre_stats_seqfu_check = PRE_STATS.out.seqfu_check + pre_stats_seqfu_stats = PRE_STATS.out.seqfu_stats + pre_stats_seqfu_multiqc = PRE_STATS.out.seqfu_multiqc + pre_stats_seqkit_stats = PRE_STATS.out.seqkit_stats + pre_stats_seqtk_stats = PRE_STATS.out.seqtk_stats + post_stats_fastqc_html = POST_STATS.out.fastqc_html + post_stats_fastqc_zip = POST_STATS.out.fastqc_zip + post_stats_seqfu_check = POST_STATS.out.seqfu_check + post_stats_seqfu_stats = POST_STATS.out.seqfu_stats + post_stats_seqfu_multiqc = POST_STATS.out.seqfu_multiqc + post_stats_seqkit_stats = POST_STATS.out.seqkit_stats + post_stats_seqtk_stats = POST_STATS.out.seqtk_stats + versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 6ed640f1c855..c644f793c765 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -1,5 +1,3 @@ -// TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core subworkflows test fastq_shortreads_preprocess_qc nextflow_workflow { name "Test Subworkflow FASTQ_SHORTREADS_PREPROCESS_QC" @@ -9,27 +7,70 @@ nextflow_workflow { tag "subworkflows" tag "subworkflows_nfcore" tag "subworkflows/fastq_shortreads_preprocess_qc" - // TODO nf-core: Add tags for all modules used within this subworkflow. Example: - tag "samtools" - tag "samtools/sort" - tag "samtools/index" + tag "subworkflows/fastq_generate_statistics" + tag "fastqc" + tag "seqfu" + tag "seqfu/check" + tag "seqfu/stats" + tag "seqkit" + tag "seqkit/stats" + tag "seqtk" + tag "seqtk/comp" + tag "subworkflows/fastq_preprocess_seqkit" + tag "subworkflows/fastq_sanitise_seqkit" + tag "seqkit" + tag "seqkit/sana" + tag "seqkit/pair" + tag "seqkit/seq" + tag "seqkit/replace" + tag "seqkit/rmdup" + tag "umitools" + tag "umitools/extract" + tag "prinseqplusplus" + tag "bbmap" + tag "bbmap/clumpify" + tag "subworkflows/fastq_decontaminate_deacon_hostile" + tag "subworkflows/fastq_index_filter_deacon" + tag "subworkflows/fastq_fetch_clean_hostile" + tag "hostile" + tag "hostile/fetch" + tag "hostile/clean" + tag "bowtie2/build" + tag "deacon" + tag "deacon/filter" + tag "deacon/index" + tag "cat" + tag "cat/fastq" - - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used - test("sarscov2 - bam - single_end") { + test("sarscov2 - fastq - skip all - single_end") { when { workflow { """ - // TODO nf-core: define inputs of the workflow here. Example: - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] - input[1] = [ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = true // skip_umitools_extract + input[11] = 0 // umi_discard_read + + input[12] = true // skip_prinseqplusplus + input[13] = true // skip_bbmap_clumpify + input[14] = true // skip_decontamination + input[15] = [] // ch_fasta + input[16] = [] // ch_reference + input[17] = [] // index_name + input[18] = "" // decontaminator + input[19] = true // skip_cat_fastq """ } } @@ -37,8 +78,8 @@ nextflow_workflow { assert workflow.success assertAll( { assert snapshot( - workflow.out - //TODO nf-core: Add all required assertions to verify the test output. + workflow.out.reads[0][1], + workflow.out.versions.collect { path(it).yaml } ).match() } ) } From b3a01df824ae0de1a12c9d9423175bc66a38d7ff Mon Sep 17 00:00:00 2001 From: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> Date: Tue, 13 Jan 2026 11:47:19 +0000 Subject: [PATCH 03/31] fastq_qc_stats subworkflow update (#9643) * fastq_qc_stats subworkflow update - renamed and testing for skip all inputs case * bracket push --- .../nf-core/fastq_generate_statistics/main.nf | 57 ------- subworkflows/nf-core/fastq_qc_stats/main.nf | 68 ++++++++ .../meta.yml | 2 +- .../tests/main.nf.test | 43 ++++- .../tests/main.nf.test.snap | 147 ++++++++++++------ 5 files changed, 207 insertions(+), 110 deletions(-) delete mode 100644 subworkflows/nf-core/fastq_generate_statistics/main.nf create mode 100644 subworkflows/nf-core/fastq_qc_stats/main.nf rename subworkflows/nf-core/{fastq_generate_statistics => fastq_qc_stats}/meta.yml (98%) rename subworkflows/nf-core/{fastq_generate_statistics => fastq_qc_stats}/tests/main.nf.test (82%) rename subworkflows/nf-core/{fastq_generate_statistics => fastq_qc_stats}/tests/main.nf.test.snap (78%) diff --git a/subworkflows/nf-core/fastq_generate_statistics/main.nf b/subworkflows/nf-core/fastq_generate_statistics/main.nf deleted file mode 100644 index f0956d196976..000000000000 --- a/subworkflows/nf-core/fastq_generate_statistics/main.nf +++ /dev/null @@ -1,57 +0,0 @@ -// -// Short read sequencing data QC using different tools -// -include { FASTQC } from '../../../modules/nf-core/fastqc/main' -include { SEQFU_CHECK } from '../../../modules/nf-core/seqfu/check/main' -include { SEQFU_STATS } from '../../../modules/nf-core/seqfu/stats/main' -include { SEQKIT_STATS } from '../../../modules/nf-core/seqkit/stats/main' -include { SEQTK_COMP } from '../../../modules/nf-core/seqtk/comp/main' - -workflow FASTQ_GENERATE_STATISTICS { - - take: - ch_reads // channel: [ val(meta), [ fastq ] ] - skip_fastqc // boolean - skip_seqfu_check // boolean - skip_seqfu_stats // boolean - skip_seqkit_stats // boolean - skip_seqtk_comp // boolean - - main: - - ch_versions = Channel.empty() - - if (!skip_fastqc) { - FASTQC( ch_reads ) - } - - if (!skip_seqfu_check){ - SEQFU_CHECK( ch_reads ) - ch_versions = ch_versions.mix(SEQFU_CHECK.out.versions.first()) - } - - if (!skip_seqfu_stats) { - SEQFU_STATS ( ch_reads ) - ch_versions = ch_versions.mix(SEQFU_STATS.out.versions.first()) - } - - if (!skip_seqkit_stats) { - SEQKIT_STATS ( ch_reads ) - ch_versions = ch_versions.mix(SEQKIT_STATS.out.versions.first()) - } - - if (!skip_seqtk_comp) { - SEQTK_COMP ( ch_reads ) - ch_versions = ch_versions.mix(SEQTK_COMP.out.versions.first()) - } - - emit: - fastqc_html = FASTQC.out.html - fastqc_zip = FASTQC.out.zip - seqfu_check = SEQFU_CHECK.out.check - seqfu_stats = SEQFU_STATS.out.stats - seqfu_multiqc = SEQFU_STATS.out.multiqc - seqkit_stats = SEQKIT_STATS.out.stats - seqtk_stats = SEQTK_COMP.out.seqtk_stats - versions = ch_versions -} diff --git a/subworkflows/nf-core/fastq_qc_stats/main.nf b/subworkflows/nf-core/fastq_qc_stats/main.nf new file mode 100644 index 000000000000..d49d7cd94989 --- /dev/null +++ b/subworkflows/nf-core/fastq_qc_stats/main.nf @@ -0,0 +1,68 @@ +include { FASTQC } from '../../../modules/nf-core/fastqc/main' +include { SEQFU_CHECK } from '../../../modules/nf-core/seqfu/check/main' +include { SEQFU_STATS } from '../../../modules/nf-core/seqfu/stats/main' +include { SEQKIT_STATS } from '../../../modules/nf-core/seqkit/stats/main' +include { SEQTK_COMP } from '../../../modules/nf-core/seqtk/comp/main' + +workflow FASTQ_QC_STATS { + + take: + ch_reads // channel: [ val(meta), [ fastq ] ] + skip_fastqc // boolean + skip_seqfu_check // boolean + skip_seqfu_stats // boolean + skip_seqkit_stats // boolean + skip_seqtk_comp // boolean + + main: + + ch_versions = channel.empty() + ch_fastqc_html = channel.empty() + ch_fastqc_zip = channel.empty() + ch_seqfu_check = channel.empty() + ch_seqfu_stats = channel.empty() + ch_seqfu_multiqc = channel.empty() + ch_seqkit_stats = channel.empty() + ch_seqtk_stats = channel.empty() + + if (!skip_fastqc) { + FASTQC( ch_reads ) + ch_fastqc_html = FASTQC.out.html + ch_fastqc_zip = FASTQC.out.zip + } + + if (!skip_seqfu_check) { + SEQFU_CHECK( ch_reads ) + ch_seqfu_check = SEQFU_CHECK.out.check + ch_versions = ch_versions.mix(SEQFU_CHECK.out.versions.first()) + } + + if (!skip_seqfu_stats) { + SEQFU_STATS ( ch_reads ) + ch_seqfu_stats = SEQFU_STATS.out.stats + ch_seqfu_multiqc = SEQFU_STATS.out.multiqc + ch_versions = ch_versions.mix(SEQFU_STATS.out.versions.first()) + } + + if (!skip_seqkit_stats) { + SEQKIT_STATS ( ch_reads ) + ch_seqkit_stats = SEQKIT_STATS.out.stats + ch_versions = ch_versions.mix(SEQKIT_STATS.out.versions.first()) + } + + if (!skip_seqtk_comp) { + SEQTK_COMP ( ch_reads ) + ch_seqtk_stats = SEQTK_COMP.out.seqtk_stats + ch_versions = ch_versions.mix(SEQTK_COMP.out.versions.first()) + } + + emit: + fastqc_html = ch_fastqc_html + fastqc_zip = ch_fastqc_zip + seqfu_check = ch_seqfu_check + seqfu_stats = ch_seqfu_stats + seqfu_multiqc = ch_seqfu_multiqc + seqkit_stats = ch_seqkit_stats + seqtk_stats = ch_seqtk_stats + versions = ch_versions +} diff --git a/subworkflows/nf-core/fastq_generate_statistics/meta.yml b/subworkflows/nf-core/fastq_qc_stats/meta.yml similarity index 98% rename from subworkflows/nf-core/fastq_generate_statistics/meta.yml rename to subworkflows/nf-core/fastq_qc_stats/meta.yml index 04941275535b..0e8bc300c269 100644 --- a/subworkflows/nf-core/fastq_generate_statistics/meta.yml +++ b/subworkflows/nf-core/fastq_qc_stats/meta.yml @@ -1,5 +1,5 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "fastq_generate_statistics" +name: "fastq_qc_stats" description: Generate statistics for short read sequencing data using multiple tools keywords: - fastq diff --git a/subworkflows/nf-core/fastq_generate_statistics/tests/main.nf.test b/subworkflows/nf-core/fastq_qc_stats/tests/main.nf.test similarity index 82% rename from subworkflows/nf-core/fastq_generate_statistics/tests/main.nf.test rename to subworkflows/nf-core/fastq_qc_stats/tests/main.nf.test index 6261c142d4bb..f19d4979bf35 100644 --- a/subworkflows/nf-core/fastq_generate_statistics/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_qc_stats/tests/main.nf.test @@ -1,11 +1,11 @@ nextflow_workflow { - name "Test Subworkflow FASTQ_GENERATE_STATISTICS" + name "Test Subworkflow FASTQ_QC_STATS" script "../main.nf" - workflow "FASTQ_GENERATE_STATISTICS" + workflow "FASTQ_QC_STATS" tag "subworkflows" tag "subworkflows_nfcore" - tag "subworkflows/fastq_generate_statistics" + tag "subworkflows/fastq_qc_stats" tag "fastqc" tag "seqfu" tag "seqfu/check" @@ -19,8 +19,8 @@ nextflow_workflow { when { workflow { """ - input[0] = Channel.of([ - [ id:'test_single', single_end:true ], // meta map + input[0] = channel.of([ + [ id:'test_single', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) input[1] = false @@ -47,11 +47,36 @@ nextflow_workflow { ) } } + + test("sarscov2 - fastq - single_end - skip all") { + when { + workflow { + """ + input[0] = channel.of([ + [ id:'test_single', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = true + input[2] = true + input[3] = true + input[4] = true + input[5] = true + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("sarscov2 - fastq - paired_end") { when { workflow { """ - input[0] = Channel.of([ + input[0] = channel.of([ [ id:'test_paired', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), @@ -84,6 +109,7 @@ nextflow_workflow { ) } } + test("sarscov2 - fastq - single_end - stub") { options "-stub" @@ -91,7 +117,7 @@ nextflow_workflow { when { workflow { """ - input[0] = Channel.of([ + input[0] = channel.of([ [ id:'test_single', single_end:true ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) @@ -113,6 +139,7 @@ nextflow_workflow { ) } } + test("sarscov2 - fastq - paired_end - stub") { options "-stub" @@ -120,7 +147,7 @@ nextflow_workflow { when { workflow { """ - input[0] = Channel.of([ + input[0] = channel.of([ [ id:'test_paired', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), diff --git a/subworkflows/nf-core/fastq_generate_statistics/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_qc_stats/tests/main.nf.test.snap similarity index 78% rename from subworkflows/nf-core/fastq_generate_statistics/tests/main.nf.test.snap rename to subworkflows/nf-core/fastq_qc_stats/tests/main.nf.test.snap index 77a457a58942..0a48bd9b2f91 100644 --- a/subworkflows/nf-core/fastq_generate_statistics/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_qc_stats/tests/main.nf.test.snap @@ -66,10 +66,10 @@ ] ], "7": [ - "versions.yml:md5,5cd377fecf8c1a6bb9aa657313e882d4", - "versions.yml:md5,7518d6130f9f32acd21e5ebaaeb02c82", - "versions.yml:md5,bd055fdc48b00af8df08c1f30575bfd6", - "versions.yml:md5,d1c67dc2d4ff10a37e0f6e53e243548e" + "versions.yml:md5,1d8a6e0163231bdfeea5122e5c474f3c", + "versions.yml:md5,2d80d24be109645eb464302f580d8bad", + "versions.yml:md5,bfe0d52296a5d2adacbb4107f9389dd2", + "versions.yml:md5,d556860e4ed88ea9cb4ba539f4081f83" ], "fastqc_html": [ [ @@ -135,40 +135,99 @@ ] ], "versions": [ - "versions.yml:md5,5cd377fecf8c1a6bb9aa657313e882d4", - "versions.yml:md5,7518d6130f9f32acd21e5ebaaeb02c82", - "versions.yml:md5,bd055fdc48b00af8df08c1f30575bfd6", - "versions.yml:md5,d1c67dc2d4ff10a37e0f6e53e243548e" + "versions.yml:md5,1d8a6e0163231bdfeea5122e5c474f3c", + "versions.yml:md5,2d80d24be109645eb464302f580d8bad", + "versions.yml:md5,bfe0d52296a5d2adacbb4107f9389dd2", + "versions.yml:md5,d556860e4ed88ea9cb4ba539f4081f83" ] }, [ { - "FASTQ_GENERATE_STATISTICS:SEQKIT_STATS": { + "FASTQ_QC_STATS:SEQKIT_STATS": { "seqkit": "2.9.0" } }, { - "FASTQ_GENERATE_STATISTICS:SEQTK_COMP": { - "seqtk": "1.4-r122" + "FASTQ_QC_STATS:SEQFU_STATS": { + "seqfu": "1.22.3" } }, { - "FASTQ_GENERATE_STATISTICS:SEQFU_CHECK": { + "FASTQ_QC_STATS:SEQFU_CHECK": { "seqfu": "1.22.3" } }, { - "FASTQ_GENERATE_STATISTICS:SEQFU_STATS": { - "seqfu": "1.22.3" + "FASTQ_QC_STATS:SEQTK_COMP": { + "seqtk": "1.4-r122" } } ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2025-11-26T16:21:18.145804" + "timestamp": "2026-01-13T11:23:39.702384725" + }, + "sarscov2 - fastq - single_end - skip all": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "fastqc_html": [ + + ], + "fastqc_zip": [ + + ], + "seqfu_check": [ + + ], + "seqfu_multiqc": [ + + ], + "seqfu_stats": [ + + ], + "seqkit_stats": [ + + ], + "seqtk_stats": [ + + ], + "versions": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-13T11:27:15.914381733" }, "sarscov2 - fastq - single_end": { "content": [ @@ -220,17 +279,17 @@ ] ], [ - "versions.yml:md5,5cd377fecf8c1a6bb9aa657313e882d4", - "versions.yml:md5,7518d6130f9f32acd21e5ebaaeb02c82", - "versions.yml:md5,bd055fdc48b00af8df08c1f30575bfd6", - "versions.yml:md5,d1c67dc2d4ff10a37e0f6e53e243548e" + "versions.yml:md5,1d8a6e0163231bdfeea5122e5c474f3c", + "versions.yml:md5,2d80d24be109645eb464302f580d8bad", + "versions.yml:md5,bfe0d52296a5d2adacbb4107f9389dd2", + "versions.yml:md5,d556860e4ed88ea9cb4ba539f4081f83" ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2025-11-26T16:20:36.678295" + "timestamp": "2026-01-13T11:23:18.703640016" }, "sarscov2 - fastq - paired_end": { "content": [ @@ -284,17 +343,17 @@ ] ], [ - "versions.yml:md5,5cd377fecf8c1a6bb9aa657313e882d4", - "versions.yml:md5,7518d6130f9f32acd21e5ebaaeb02c82", - "versions.yml:md5,bd055fdc48b00af8df08c1f30575bfd6", - "versions.yml:md5,d1c67dc2d4ff10a37e0f6e53e243548e" + "versions.yml:md5,1d8a6e0163231bdfeea5122e5c474f3c", + "versions.yml:md5,2d80d24be109645eb464302f580d8bad", + "versions.yml:md5,bfe0d52296a5d2adacbb4107f9389dd2", + "versions.yml:md5,d556860e4ed88ea9cb4ba539f4081f83" ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2025-11-26T16:20:59.399629" + "timestamp": "2026-01-13T11:23:30.115092354" }, "sarscov2 - fastq - paired_end - stub": { "content": [ @@ -363,10 +422,10 @@ ] ], "7": [ - "versions.yml:md5,5cd377fecf8c1a6bb9aa657313e882d4", - "versions.yml:md5,7518d6130f9f32acd21e5ebaaeb02c82", - "versions.yml:md5,bd055fdc48b00af8df08c1f30575bfd6", - "versions.yml:md5,d1c67dc2d4ff10a37e0f6e53e243548e" + "versions.yml:md5,1d8a6e0163231bdfeea5122e5c474f3c", + "versions.yml:md5,2d80d24be109645eb464302f580d8bad", + "versions.yml:md5,bfe0d52296a5d2adacbb4107f9389dd2", + "versions.yml:md5,d556860e4ed88ea9cb4ba539f4081f83" ], "fastqc_html": [ [ @@ -432,39 +491,39 @@ ] ], "versions": [ - "versions.yml:md5,5cd377fecf8c1a6bb9aa657313e882d4", - "versions.yml:md5,7518d6130f9f32acd21e5ebaaeb02c82", - "versions.yml:md5,bd055fdc48b00af8df08c1f30575bfd6", - "versions.yml:md5,d1c67dc2d4ff10a37e0f6e53e243548e" + "versions.yml:md5,1d8a6e0163231bdfeea5122e5c474f3c", + "versions.yml:md5,2d80d24be109645eb464302f580d8bad", + "versions.yml:md5,bfe0d52296a5d2adacbb4107f9389dd2", + "versions.yml:md5,d556860e4ed88ea9cb4ba539f4081f83" ] }, [ { - "FASTQ_GENERATE_STATISTICS:SEQKIT_STATS": { + "FASTQ_QC_STATS:SEQKIT_STATS": { "seqkit": "2.9.0" } }, { - "FASTQ_GENERATE_STATISTICS:SEQTK_COMP": { - "seqtk": "1.4-r122" + "FASTQ_QC_STATS:SEQFU_STATS": { + "seqfu": "1.22.3" } }, { - "FASTQ_GENERATE_STATISTICS:SEQFU_CHECK": { + "FASTQ_QC_STATS:SEQFU_CHECK": { "seqfu": "1.22.3" } }, { - "FASTQ_GENERATE_STATISTICS:SEQFU_STATS": { - "seqfu": "1.22.3" + "FASTQ_QC_STATS:SEQTK_COMP": { + "seqtk": "1.4-r122" } } ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nextflow": "25.10.2" }, - "timestamp": "2025-11-26T16:21:37.948975" + "timestamp": "2026-01-13T11:23:49.767795998" } } \ No newline at end of file From e1a7bbbed471625802285b8544bf53b8ff9b2ed6 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 11:54:46 +0000 Subject: [PATCH 04/31] empty skip all test working --- .../fastq_shortreads_preprocess_qc/main.nf | 18 +++++++++--------- .../tests/main.nf.test | 2 +- .../tests/main.nf.test.snap | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 10 deletions(-) create mode 100644 subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 46af0497252f..4f126f962448 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -1,20 +1,20 @@ // statistics -include { FASTQ_GENERATE_STATISTICS as PRE_STATS } from '../fastq_generate_statistics/main' -include { FASTQ_GENERATE_STATISTICS as POST_STATS } from '../fastq_generate_statistics/main' +include { FASTQ_QC_STATS as PRE_STATS } from '../fastq_qc_stats/main' +include { FASTQ_QC_STATS as POST_STATS } from '../fastq_qc_stats/main' // preprocessing -include { FASTQ_PREPROCESS_SEQKIT } from '../fastq_preprocess_seqkit/main' +include { FASTQ_PREPROCESS_SEQKIT } from '../fastq_preprocess_seqkit/main' // barcoding -include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' +include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' // adapter removal and merging -// include { FASTQ_REMOVEADAPTERS_MERGE } from '../fastq_removeadapters_merge/main' +// include { FASTQ_REMOVEADAPTERS_MERGE } from '../fastq_removeadapters_merge/main' // complexity filtering -include { PRINSEQPLUSPLUS } from '../../../modules/nf-core/prinseqplusplus/main' +include { PRINSEQPLUSPLUS } from '../../../modules/nf-core/prinseqplusplus/main' // deduplication -include { BBMAP_CLUMPIFY } from '../../../modules/nf-core/bbmap/clumpify/main' +include { BBMAP_CLUMPIFY } from '../../../modules/nf-core/bbmap/clumpify/main' // host decontamination -include { FASTQ_DECONTAMINATE_DEACON_HOSTILE } from '../fastq_decontaminate_deacon_hostile/main' +include { FASTQ_DECONTAMINATE_DEACON_HOSTILE } from '../fastq_decontaminate_deacon_hostile/main' // final concatenation -include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' workflow FASTQ_SHORTREADS_PREPROCESS_QC { diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index c644f793c765..6de5e621a195 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -7,7 +7,7 @@ nextflow_workflow { tag "subworkflows" tag "subworkflows_nfcore" tag "subworkflows/fastq_shortreads_preprocess_qc" - tag "subworkflows/fastq_generate_statistics" + tag "subworkflows/fastq_qc_stats" tag "fastqc" tag "seqfu" tag "seqfu/check" diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap new file mode 100644 index 000000000000..f3427965d842 --- /dev/null +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "sarscov2 - fastq - skip all - single_end": { + "content": [ + "/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz", + [ + + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-13T11:53:40.26586129" + } +} \ No newline at end of file From 4447e526a4ee0e129b5789793a1ea719000862c1 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 12:09:49 +0000 Subject: [PATCH 05/31] meta init --- .../fastq_shortreads_preprocess_qc/meta.yml | 85 ++++++++++++------- 1 file changed, 54 insertions(+), 31 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 2f1adca07f65..a0ab5bd5b517 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -1,44 +1,67 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "fastq_shortreads_preprocess_qc" -## TODO nf-core: Add a description of the subworkflow and list keywords -description: Sort SAM/BAM/CRAM file +description: | + Quality check and preprocessing subworkflow of Illumina short reads + that can do; quality check of input reads and generate statistics, + preprocess and validate reads, barcoding, remove adapters and merge reads, + filter complexity, deduplicate reads, remove host contamination, + concatenate reads and generate statistics for post-processing reads. keywords: - - sort - - bam - - sam - - cram -## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow + - fastq + - illumina + - short + - reads + - qc + - stats + - preprocess + - barcoding + - adapters + - merge + - complexity + - deduplication + - host + - decontamination components: - - samtools/sort - - samtools/index -## TODO nf-core: List all of the channels used as input with a description and their structure + - fastq_qc_stats + - fastqc + - seqfu/check + - seqfu/stats + - seqkit/stats + - seqtk/comp + - fastq_preprocess_seqkit + - fastq_sanitise_seqkit + - seqkit/sana + - seqkit/pair + - seqkit/seq + - seqkit/replace + - seqkit/rmdup + - umitools/extract + - prinseqplusplus + - bbmap/clumpify + - fastq_decontaminate_deacon_hostile + - fastq_index_filter_deacon + - fastq_fetch_clean_hostile + - hostile/fetch + - hostile/clean + - bowtie2/build + - deacon/filter + - deacon/index + - cat/fastq + input: - - ch_bam: + - ch_reads: type: file description: | - The input channel containing the BAM/CRAM/SAM files - Structure: [ val(meta), path(bam) ] - pattern: "*.{bam/cram/sam}" -## TODO nf-core: List all of the channels used as output with a descriptions and their structure + List of FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + Structure: [ val(meta), [ path(reads) ] ] + pattern: "*.fastq.gz" output: - - bam: + - reads:: type: file description: | - Channel containing BAM files - Structure: [ val(meta), path(bam) ] - pattern: "*.bam" - - bai: - type: file - description: | - Channel containing indexed BAM (BAI) files - Structure: [ val(meta), path(bai) ] - pattern: "*.bai" - - csi: - type: file - description: | - Channel containing CSI files - Structure: [ val(meta), path(csi) ] - pattern: "*.csi" + Channel containing processed short reads + Structure: [ val(meta), path(reads) ] + pattern: "*.fastq.gz" - versions: type: file description: | From 4c88066863f786573a69659c0c4ffbe4c1893b04 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 12:12:34 +0000 Subject: [PATCH 06/31] seqera ai filled inputs and outputs of meta --- .../fastq_shortreads_preprocess_qc/meta.yml | 162 +++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index a0ab5bd5b517..4616306c916e 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -55,19 +55,179 @@ input: List of FastQ files of size 1 and 2 for single-end and paired-end data, respectively. Structure: [ val(meta), [ path(reads) ] ] pattern: "*.fastq.gz" + - skip_fastqc: + type: boolean + description: | + Skip FastQC quality control step + - skip_seqfu_check: + type: boolean + description: | + Skip SeqFu check step + - skip_seqfu_stats: + type: boolean + description: | + Skip SeqFu statistics step + - skip_seqkit_stats: + type: boolean + description: | + Skip SeqKit statistics step + - skip_seqtk_comp: + type: boolean + description: | + Skip SeqTk composition analysis step + - skip_seqkit_sana_pair: + type: boolean + description: | + Skip SeqKit sanitize and pair step + - skip_seqkit_seq: + type: boolean + description: | + Skip SeqKit sequence processing step + - skip_seqkit_replace: + type: boolean + description: | + Skip SeqKit replace step + - skip_seqkit_rmdup: + type: boolean + description: | + Skip SeqKit remove duplicates step + - skip_umitools_extract: + type: boolean + description: | + Skip UMI-tools extract barcoding step + - umi_discard_read: + type: integer + description: | + Discard R1 or R2 after UMI extraction (0 = keep both, 1 = discard R1, 2 = discard R2) + - skip_prinseqplusplus: + type: boolean + description: | + Skip PRINSEQ++ complexity filtering step + - skip_bbmap_clumpify: + type: boolean + description: | + Skip BBMap Clumpify deduplication step + - skip_decontamination: + type: boolean + description: | + Skip host decontamination step + - ch_fasta: + type: file + description: | + Reference genome FASTA file for decontamination (optional) + Structure: [ val(meta), [ path(fasta) ] ] + pattern: "*.{fasta,fa,fna}" + - ch_reference: + type: directory + description: | + Pre-built reference index directory for decontamination (optional) + Structure: [ val(reference_name), path(reference_dir) ] + - index_name: + type: string + description: | + Name for the decontamination index (optional) + - decontaminator: + type: string + description: | + Decontamination tool to use ('hostile' or 'deacon') + - skip_cat_fastq: + type: boolean + description: | + Skip final FASTQ concatenation step + output: - - reads:: + - reads: type: file description: | Channel containing processed short reads Structure: [ val(meta), path(reads) ] pattern: "*.fastq.gz" + - pre_stats_fastqc_html: + type: file + description: | + FastQC HTML reports for pre-processing reads + Structure: [ val(meta), path(html) ] + pattern: "*.html" + - pre_stats_fastqc_zip: + type: file + description: | + FastQC ZIP archives for pre-processing reads + Structure: [ val(meta), path(zip) ] + pattern: "*.zip" + - pre_stats_seqfu_check: + type: file + description: | + SeqFu check results for pre-processing reads + Structure: [ val(meta), path(check) ] + - pre_stats_seqfu_stats: + type: file + description: | + SeqFu statistics for pre-processing reads + Structure: [ val(meta), path(stats) ] + - pre_stats_seqfu_multiqc: + type: file + description: | + SeqFu MultiQC-compatible stats for pre-processing reads + Structure: [ val(meta), path(multiqc) ] + - pre_stats_seqkit_stats: + type: file + description: | + SeqKit statistics for pre-processing reads + Structure: [ val(meta), path(stats) ] + - pre_stats_seqtk_stats: + type: file + description: | + SeqTk composition statistics for pre-processing reads + Structure: [ val(meta), path(stats) ] + - post_stats_fastqc_html: + type: file + description: | + FastQC HTML reports for post-processing reads + Structure: [ val(meta), path(html) ] + pattern: "*.html" + - post_stats_fastqc_zip: + type: file + description: | + FastQC ZIP archives for post-processing reads + Structure: [ val(meta), path(zip) ] + pattern: "*.zip" + - post_stats_seqfu_check: + type: file + description: | + SeqFu check results for post-processing reads + Structure: [ val(meta), path(check) ] + - post_stats_seqfu_stats: + type: file + description: | + SeqFu statistics for post-processing reads + Structure: [ val(meta), path(stats) ] + - post_stats_seqfu_multiqc: + type: file + description: | + SeqFu MultiQC-compatible stats for post-processing reads + Structure: [ val(meta), path(multiqc) ] + - post_stats_seqkit_stats: + type: file + description: | + SeqKit statistics for post-processing reads + Structure: [ val(meta), path(stats) ] + - post_stats_seqtk_stats: + type: file + description: | + SeqTk composition statistics for post-processing reads + Structure: [ val(meta), path(stats) ] - versions: type: file description: | File containing software versions Structure: [ path(versions.yml) ] pattern: "versions.yml" + - multiqc_files: + type: file + description: | + MultiQC compatible files for aggregated reporting + Structure: [ path(files) ] + authors: - "@vagkaratzas" maintainers: From e37b11ecdf94e2dcf355be646000a01dbb4f56cb Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 12:26:55 +0000 Subject: [PATCH 07/31] stub added --- .../tests/main.nf.test | 42 +++++++++++++++++++ .../tests/main.nf.test.snap | 18 ++++++++ 2 files changed, 60 insertions(+) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 6de5e621a195..9ac1c8acba19 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -84,4 +84,46 @@ nextflow_workflow { ) } } + + test("sarscov2 - fastq - skip all - single_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = true // skip_umitools_extract + input[11] = 0 // umi_discard_read + + input[12] = true // skip_prinseqplusplus + input[13] = true // skip_bbmap_clumpify + input[14] = true // skip_decontamination + input[15] = [] // ch_fasta + input[16] = [] // ch_reference + input[17] = [] // index_name + input[18] = "" // decontaminator + input[19] = true // skip_cat_fastq + """ + } + } + then { + assert workflow.success + assertAll( + { assert snapshot(workflow.out.reads).match() } + ) + } + } } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index f3427965d842..e4f0367fb4ef 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -11,5 +11,23 @@ "nextflow": "25.10.2" }, "timestamp": "2026-01-13T11:53:40.26586129" + }, + "sarscov2 - fastq - skip all - single_end - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-13T12:17:19.455574032" } } \ No newline at end of file From dd3a37454a26edb810cd0491a0a570bb4ef7fa1c Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 13:46:57 +0000 Subject: [PATCH 08/31] sarscov2 - fastq - seqfu - seqkit - deacon - single_end nf-test added --- .../fastq_shortreads_preprocess_qc/main.nf | 14 ++- .../tests/main.nf.test | 56 +++++++++ .../tests/main.nf.test.snap | 119 ++++++++++++++++++ .../tests/nextflow.config | 26 ++++ 4 files changed, 211 insertions(+), 4 deletions(-) create mode 100644 subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 4f126f962448..e636142e8fab 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -63,7 +63,8 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { skip_seqkit_stats, skip_seqtk_comp ) - ch_versions = ch_versions.mix(PRE_STATS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(PRE_STATS.out.seqfu_multiqc) + ch_versions = ch_versions.mix(PRE_STATS.out.versions) // preprocessing FASTQ_PREPROCESS_SEQKIT ( @@ -149,7 +150,8 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { skip_seqkit_stats, skip_seqtk_comp ) - ch_versions = ch_versions.mix(POST_STATS.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(POST_STATS.out.seqfu_multiqc) + ch_versions = ch_versions.mix(POST_STATS.out.versions) emit: reads = ch_reads // channel: [ val(meta), [ fastq ] ] @@ -159,17 +161,21 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { pre_stats_fastqc_zip = PRE_STATS.out.fastqc_zip pre_stats_seqfu_check = PRE_STATS.out.seqfu_check pre_stats_seqfu_stats = PRE_STATS.out.seqfu_stats - pre_stats_seqfu_multiqc = PRE_STATS.out.seqfu_multiqc pre_stats_seqkit_stats = PRE_STATS.out.seqkit_stats pre_stats_seqtk_stats = PRE_STATS.out.seqtk_stats post_stats_fastqc_html = POST_STATS.out.fastqc_html post_stats_fastqc_zip = POST_STATS.out.fastqc_zip post_stats_seqfu_check = POST_STATS.out.seqfu_check post_stats_seqfu_stats = POST_STATS.out.seqfu_stats - post_stats_seqfu_multiqc = POST_STATS.out.seqfu_multiqc post_stats_seqkit_stats = POST_STATS.out.seqkit_stats post_stats_seqtk_stats = POST_STATS.out.seqtk_stats + // host decontamination + hostile_reference = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.reference + hostile_json = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.json + deacon_index = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.index + deacon_summary = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.summary + versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 9ac1c8acba19..51e052f4099b 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -3,6 +3,7 @@ nextflow_workflow { name "Test Subworkflow FASTQ_SHORTREADS_PREPROCESS_QC" script "../main.nf" workflow "FASTQ_SHORTREADS_PREPROCESS_QC" + config './nextflow.config' tag "subworkflows" tag "subworkflows_nfcore" @@ -42,6 +43,61 @@ nextflow_workflow { tag "cat" tag "cat/fastq" + test("sarscov2 - fastq - seqfu - seqkit - deacon - single_end") { + + when { + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = true // skip_fastqc + input[2] = false // skip_seqfu_check + input[3] = false // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = false // skip_seqkit_sana_pair + input[7] = false // skip_seqkit_seq + input[8] = false // skip_seqkit_replace + input[9] = false // skip_seqkit_rmdup + input[10] = true // skip_umitools_extract + input[11] = 0 // umi_discard_read + + input[12] = true // skip_prinseqplusplus + input[13] = true // skip_bbmap_clumpify + input[14] = false // skip_decontamination + input[15] = channel.of( + [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ) // ch_fasta + input[16] = [] // ch_reference + input[17] = [] // index_name + input[18] = 'deacon' // decontaminator + input[19] = true // skip_cat_fastq + """ + } + } + then { + assert workflow.success + assertAll( + { assert snapshot( + workflow.out.reads[0][1], + workflow.out.pre_stats_seqfu_check, + workflow.out.pre_stats_seqfu_stats, + workflow.out.post_stats_seqfu_check, + workflow.out.post_stats_seqfu_stats, + workflow.out.deacon_index, + file(workflow.out.deacon_summary[0][1]).name, + workflow.out.multiqc_files, + workflow.out.versions.collect { path(it).yaml } + ).match() } + ) + } + } + test("sarscov2 - fastq - skip all - single_end") { when { diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index e4f0367fb4ef..6fdecab7894e 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -1,4 +1,123 @@ { + "sarscov2 - fastq - seqfu - seqkit - deacon - single_end": { + "content": [ + "test.fq.gz:md5,f3a7626275fad3775b6005fad9c13c27", + [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,4c6409169772005cfb06be9e41f2c1e2" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,7573d0c83cfc9af6e1ced67a45265381" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,24dd7cfbb9ae0034d0bd804f464f11fa" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.tsv:md5,35dd18aff6780370b48027fec9c7d900" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idx:md5,84e4985c91800686db9c9dca28fabd1a" + ] + ], + "test.json", + [ + [ + { + "id": "test", + "single_end": true + }, + "test_mqc.txt:md5,1facba42f81058e557e3d85dcff2a6f3" + ], + [ + { + "id": "test", + "single_end": true + }, + "test_mqc.txt:md5,a039b8c1cc923db88d2484d3abbf00fe" + ] + ], + [ + { + "FASTQ_SHORTREADS_PREPROCESS_QC:POST_STATS:SEQFU_CHECK": { + "seqfu": "1.22.3" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:SEQKIT_REPLACE": { + "seqkit": "2.9.0" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:SEQKIT_RMDUP": { + "seqkit": "v2.9.0" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:SEQKIT_SEQ": { + "seqkit": "v2.9.0" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_DECONTAMINATE_DEACON_HOSTILE:FASTQ_INDEX_FILTER_DEACON:DEACON_INDEX": { + "deacon": "0.12.0" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:POST_STATS:SEQFU_STATS": { + "seqfu": "1.22.3" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:PRE_STATS:SEQFU_CHECK": { + "seqfu": "1.22.3" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:FASTQ_SANITISE_SEQKIT:SEQKIT_SANA": { + "seqkit": "2.10.1" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:PRE_STATS:SEQFU_STATS": { + "seqfu": "1.22.3" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-13T13:42:12.770307912" + }, "sarscov2 - fastq - skip all - single_end": { "content": [ "/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz", diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config new file mode 100644 index 000000000000..78045d7d3e04 --- /dev/null +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config @@ -0,0 +1,26 @@ +process { + withName: SEQKIT_SANA { + ext.prefix = { "${meta.id}_${meta.strandness}" } + } + + withName: SEQKIT_SEQ { + ext.args = [ + "--remove-gaps", + "--upper-case", + "--validate-seq", + "--min-len 30", + "--max-len 5000" + ].join(' ').trim() + ext.prefix = { "intermediate_seqkit_seq_${meta.strandness}" } + } + + withName: SEQKIT_REPLACE { + ext.args = '-p "/" -r "_"' + ext.suffix = ".fasta" + ext.prefix = { "intermediate_seqkit_replace_${meta.strandness}" } + } + + withName: SEQKIT_RMDUP { + ext.prefix = { "${meta.id}_${meta.strandness}" } + } +} From f8e0ef1b75854ae6ea4f453d92c6a63a889f1d25 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 13 Jan 2026 15:17:55 +0000 Subject: [PATCH 09/31] prinseqplusplus added --- .../fastq_shortreads_preprocess_qc/main.nf | 162 +++++++++++------- .../fastq_shortreads_preprocess_qc/meta.yml | 2 +- 2 files changed, 101 insertions(+), 63 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index e636142e8fab..4fb47c7a9fca 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -19,40 +19,58 @@ include { CAT_FASTQ } from '../../../modules/nf-core/ca workflow FASTQ_SHORTREADS_PREPROCESS_QC { take: - ch_reads // channel: [ val(meta), [ fastq ] ] + ch_reads // channel: [ val(meta), [ fastq ] ] // statistics - skip_fastqc // boolean - skip_seqfu_check // boolean - skip_seqfu_stats // boolean - skip_seqkit_stats // boolean - skip_seqtk_comp // boolean + skip_fastqc // boolean + skip_seqfu_check // boolean + skip_seqfu_stats // boolean + skip_seqkit_stats // boolean + skip_seqtk_comp // boolean // preprocessing - skip_seqkit_sana_pair // boolean - skip_seqkit_seq // boolean - skip_seqkit_replace // boolean - skip_seqkit_rmdup // boolean + skip_seqkit_sana_pair // boolean + skip_seqkit_seq // boolean + skip_seqkit_replace // boolean + skip_seqkit_rmdup // boolean // barcoding - skip_umitools_extract // boolean - umi_discard_read // integer: 0, 1 or 2 + skip_umitools_extract // boolean + umi_discard_read // integer: 0, 1 or 2 // adapter removal and merging - // skip_adapterremoval // boolean + // skip_adapterremoval // boolean // complexity filtering - skip_prinseqplusplus // boolean + skip_complexity_filtering // boolean // deduplication - skip_bbmap_clumpify // boolean + skip_bbmap_clumpify // boolean // host decontamination - skip_decontamination // boolean - ch_fasta // channel: [ val(meta), [ fasta ] ] (optional) - ch_reference // channel: [ val(reference_name), path(reference_dir) ] (optional) - index_name // val (optional) - decontaminator // string (enum): 'hostile' or 'deacon' + skip_decontamination // boolean + ch_fasta // channel: [ val(meta), [ fasta ] ] (optional) + ch_reference // channel: [ val(reference_name), path(reference_dir) ] (optional) + index_name // val (optional) + decontaminator // string (enum): 'hostile' or 'deacon' // final concatenation - skip_cat_fastq // boolean + skip_cat_fastq // boolean main: - ch_versions = channel.empty() - ch_multiqc_files = channel.empty() + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + ch_pre_stats_fastqc_html = channel.empty() + ch_pre_stats_fastqc_zip = channel.empty() + ch_pre_stats_seqfu_check = channel.empty() + ch_pre_stats_seqfu_stats = channel.empty() + ch_pre_stats_seqkit_stats = channel.empty() + ch_pre_stats_seqtk_stats = channel.empty() + ch_post_stats_fastqc_html = channel.empty() + ch_post_stats_fastqc_zip = channel.empty() + ch_post_stats_seqfu_check = channel.empty() + ch_post_stats_seqfu_stats = channel.empty() + ch_post_stats_seqkit_stats = channel.empty() + ch_post_stats_seqtk_stats = channel.empty() + ch_umi_log = channel.empty() + ch_prinseq_log = channel.empty() + ch_hostile_reference = channel.empty() + ch_hostile_json = channel.empty() + ch_deacon_index = channel.empty() + ch_deacon_summary = channel.empty() // pre-statistics PRE_STATS ( @@ -63,8 +81,14 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { skip_seqkit_stats, skip_seqtk_comp ) - ch_multiqc_files = ch_multiqc_files.mix(PRE_STATS.out.seqfu_multiqc) - ch_versions = ch_versions.mix(PRE_STATS.out.versions) + ch_pre_stats_fastqc_html = PRE_STATS.out.fastqc_html + ch_pre_stats_fastqc_zip = PRE_STATS.out.fastqc_zip + ch_pre_stats_seqfu_check = PRE_STATS.out.seqfu_check + ch_pre_stats_seqfu_stats = PRE_STATS.out.seqfu_stats + ch_pre_stats_seqkit_stats = PRE_STATS.out.seqkit_stats + ch_pre_stats_seqtk_stats = PRE_STATS.out.seqtk_stats + ch_multiqc_files = ch_multiqc_files.mix(PRE_STATS.out.seqfu_multiqc) + ch_versions = ch_versions.mix(PRE_STATS.out.versions) // preprocessing FASTQ_PREPROCESS_SEQKIT ( @@ -74,29 +98,25 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { skip_seqkit_replace, skip_seqkit_rmdup ) + ch_reads = FASTQ_PREPROCESS_SEQKIT.out.reads ch_versions = ch_versions.mix(FASTQ_PREPROCESS_SEQKIT.out.versions) - ch_reads = FASTQ_PREPROCESS_SEQKIT.out.reads - // barcoding - umi_reads = ch_reads - umi_log = channel.empty() if (!skip_umitools_extract) { UMITOOLS_EXTRACT( ch_reads ) - umi_reads = UMITOOLS_EXTRACT.out.reads - umi_log = UMITOOLS_EXTRACT.out.log + ch_umi_reads = UMITOOLS_EXTRACT.out.reads + ch_umi_log = UMITOOLS_EXTRACT.out.log ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) // Discard R1 / R2 if required if (umi_discard_read in [1, 2]) { - UMITOOLS_EXTRACT.out.reads + ch_umi_reads = UMITOOLS_EXTRACT.out.reads .map { meta, reads -> meta.single_end ? [meta, reads] : [meta + ['single_end': true], reads[umi_discard_read % 2]] } - .set { umi_reads } } - ch_reads = umi_reads + ch_reads = ch_umi_reads } // adapter removal and merging @@ -106,11 +126,12 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { // } // complexity filtering - // TODO - // if (!skip_complexity_filtering) { - // PRINSEQPLUSPLUS( ... ) - // ch_versions = ch_versions.mix(PRINSEQPLUSPLUS.out.versions.first()) - // } + if (!skip_complexity_filtering) { + PRINSEQPLUSPLUS( ch_reads ) + ch_reads = PRINSEQPLUSPLUS.out.good_reads + ch_prinseq_log = PRINSEQPLUSPLUS.out.log + ch_versions = ch_versions.mix(PRINSEQPLUSPLUS.out.versions.first()) + } // deduplication // TODO @@ -128,9 +149,12 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { index_name, decontaminator ) - ch_versions = ch_versions.mix(FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.versions) - - ch_reads = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.fastq_filtered + ch_reads = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.fastq_filtered + ch_hostile_reference = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.reference + ch_hostile_json = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.json + ch_deacon_index = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.index + ch_deacon_summary = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.summary + ch_versions = ch_versions.mix(FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.versions) } @@ -150,32 +174,46 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { skip_seqkit_stats, skip_seqtk_comp ) - ch_multiqc_files = ch_multiqc_files.mix(POST_STATS.out.seqfu_multiqc) - ch_versions = ch_versions.mix(POST_STATS.out.versions) + ch_post_stats_fastqc_html = POST_STATS.out.fastqc_html + ch_post_stats_fastqc_zip = POST_STATS.out.fastqc_zip + ch_post_stats_seqfu_check = POST_STATS.out.seqfu_check + ch_post_stats_seqfu_stats = POST_STATS.out.seqfu_stats + ch_post_stats_seqkit_stats = POST_STATS.out.seqkit_stats + ch_post_stats_seqtk_stats = POST_STATS.out.seqtk_stats + ch_multiqc_files = ch_multiqc_files.mix(POST_STATS.out.seqfu_multiqc) + ch_versions = ch_versions.mix(POST_STATS.out.versions) emit: - reads = ch_reads // channel: [ val(meta), [ fastq ] ] + reads = ch_reads // channel: [ val(meta), [ fastq ] ] // statistics - pre_stats_fastqc_html = PRE_STATS.out.fastqc_html - pre_stats_fastqc_zip = PRE_STATS.out.fastqc_zip - pre_stats_seqfu_check = PRE_STATS.out.seqfu_check - pre_stats_seqfu_stats = PRE_STATS.out.seqfu_stats - pre_stats_seqkit_stats = PRE_STATS.out.seqkit_stats - pre_stats_seqtk_stats = PRE_STATS.out.seqtk_stats - post_stats_fastqc_html = POST_STATS.out.fastqc_html - post_stats_fastqc_zip = POST_STATS.out.fastqc_zip - post_stats_seqfu_check = POST_STATS.out.seqfu_check - post_stats_seqfu_stats = POST_STATS.out.seqfu_stats - post_stats_seqkit_stats = POST_STATS.out.seqkit_stats - post_stats_seqtk_stats = POST_STATS.out.seqtk_stats + pre_stats_fastqc_html = ch_pre_stats_fastqc_html + pre_stats_fastqc_zip = ch_pre_stats_fastqc_zip + pre_stats_seqfu_check = ch_pre_stats_seqfu_check + pre_stats_seqfu_stats = ch_pre_stats_seqfu_stats + pre_stats_seqkit_stats = ch_pre_stats_seqkit_stats + pre_stats_seqtk_stats = ch_pre_stats_seqtk_stats + post_stats_fastqc_html = ch_post_stats_fastqc_html + post_stats_fastqc_zip = ch_post_stats_fastqc_zip + post_stats_seqfu_check = ch_post_stats_seqfu_check + post_stats_seqfu_stats = ch_post_stats_seqfu_stats + post_stats_seqkit_stats = ch_post_stats_seqkit_stats + post_stats_seqtk_stats = ch_post_stats_seqtk_stats + + // barcoding + umi_log = ch_umi_log + + // adapter removal and merging + + // complexity filtering + prinseq_log = ch_prinseq_log // host decontamination - hostile_reference = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.reference - hostile_json = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.json - deacon_index = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.index - deacon_summary = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.summary + hostile_reference = ch_hostile_reference + hostile_json = ch_hostile_json + deacon_index = ch_deacon_index + deacon_summary = ch_deacon_summary - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 4616306c916e..1f3b11c41532 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -99,7 +99,7 @@ input: type: integer description: | Discard R1 or R2 after UMI extraction (0 = keep both, 1 = discard R1, 2 = discard R2) - - skip_prinseqplusplus: + - skip_complexity_filtering: type: boolean description: | Skip PRINSEQ++ complexity filtering step From c04e29a149d7f2a2d14edda9c6c7027c532bda53 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Wed, 14 Jan 2026 10:51:52 +0000 Subject: [PATCH 10/31] clumpify added, cat_fastq added, nf-test added for missing tools --- .../fastq_shortreads_preprocess_qc/main.nf | 29 ++++---- .../fastq_shortreads_preprocess_qc/meta.yml | 4 +- .../tests/main.nf.test | 68 ++++++++++++++++--- .../tests/main.nf.test.snap | 29 ++++++++ .../tests/nextflow.config | 4 ++ 5 files changed, 111 insertions(+), 23 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 4fb47c7a9fca..42805776f806 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -39,7 +39,7 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { // complexity filtering skip_complexity_filtering // boolean // deduplication - skip_bbmap_clumpify // boolean + skip_deduplication // boolean // host decontamination skip_decontamination // boolean ch_fasta // channel: [ val(meta), [ fasta ] ] (optional) @@ -47,7 +47,7 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { index_name // val (optional) decontaminator // string (enum): 'hostile' or 'deacon' // final concatenation - skip_cat_fastq // boolean + skip_final_concatenation // boolean main: @@ -67,6 +67,7 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { ch_post_stats_seqtk_stats = channel.empty() ch_umi_log = channel.empty() ch_prinseq_log = channel.empty() + ch_clumpify_log = channel.empty() ch_hostile_reference = channel.empty() ch_hostile_json = channel.empty() ch_deacon_index = channel.empty() @@ -134,11 +135,12 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { } // deduplication - // TODO - // if (!skip_deduplication) { - // BBMAP_CLUMPIFY( ... ) - // ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions.first()) - // } + if (!skip_deduplication) { + BBMAP_CLUMPIFY( ch_reads ) + ch_reads = BBMAP_CLUMPIFY.out.reads + ch_clumpify_log = BBMAP_CLUMPIFY.out.log + ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions.first()) + } // host decontamination if (!skip_decontamination) { @@ -159,11 +161,11 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { // final concatenation - // TODO - // if (!skip_final_concatenation) { - // CAT_FASTQ( ... ) - // ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) - // } + if (!skip_final_concatenation) { + // CAT_FASTQ ( ch_reads.map { meta, reads -> [meta, reads.flatten()] } ) // TODO test more cases + CAT_FASTQ ( ch_reads ) + ch_reads = CAT_FASTQ.out.reads + } // post-statistics POST_STATS ( @@ -208,6 +210,9 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { // complexity filtering prinseq_log = ch_prinseq_log + // deduplication + clumpify_log = ch_clumpify_log + // host decontamination hostile_reference = ch_hostile_reference hostile_json = ch_hostile_json diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 1f3b11c41532..1a725fee2d21 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -103,7 +103,7 @@ input: type: boolean description: | Skip PRINSEQ++ complexity filtering step - - skip_bbmap_clumpify: + - skip_deduplication: type: boolean description: | Skip BBMap Clumpify deduplication step @@ -130,7 +130,7 @@ input: type: string description: | Decontamination tool to use ('hostile' or 'deacon') - - skip_cat_fastq: + - skip_final_concatenation: type: boolean description: | Skip final FASTQ concatenation step diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 51e052f4099b..112660226a1c 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -64,8 +64,8 @@ nextflow_workflow { input[10] = true // skip_umitools_extract input[11] = 0 // umi_discard_read - input[12] = true // skip_prinseqplusplus - input[13] = true // skip_bbmap_clumpify + input[12] = true // skip_complexity_filtering + input[13] = true // skip_deduplication input[14] = false // skip_decontamination input[15] = channel.of( [ @@ -76,7 +76,7 @@ nextflow_workflow { input[16] = [] // ch_reference input[17] = [] // index_name input[18] = 'deacon' // decontaminator - input[19] = true // skip_cat_fastq + input[19] = true // skip_final_concatenation """ } } @@ -98,6 +98,56 @@ nextflow_workflow { } } + test("sarscov2 - fastq - umitools - prinseq - clumpify - cat - single_end") { + + when { + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = false // skip_umitools_extract + input[11] = 0 // umi_discard_read + + input[12] = false // skip_complexity_filtering + input[13] = false // skip_deduplication + input[14] = true // skip_decontamination + input[15] = channel.of( + [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ) // ch_fasta + input[16] = [] // ch_reference + input[17] = [] // index_name + input[18] = 'deacon' // decontaminator + input[19] = false // skip_final_concatenation + """ + } + } + then { + assert workflow.success + assertAll( + { assert snapshot( + workflow.out.reads[0][1], + path(workflow.out.umi_log[0][1]).readLines().size(), + path(workflow.out.clumpify_log[0][1]).readLines().size(), + workflow.out.versions.collect { path(it).yaml } + ).match() } + ) + } + } + test("sarscov2 - fastq - skip all - single_end") { when { @@ -119,14 +169,14 @@ nextflow_workflow { input[10] = true // skip_umitools_extract input[11] = 0 // umi_discard_read - input[12] = true // skip_prinseqplusplus - input[13] = true // skip_bbmap_clumpify + input[12] = true // skip_complexity_filtering + input[13] = true // skip_deduplication input[14] = true // skip_decontamination input[15] = [] // ch_fasta input[16] = [] // ch_reference input[17] = [] // index_name input[18] = "" // decontaminator - input[19] = true // skip_cat_fastq + input[19] = true // skip_final_concatenation """ } } @@ -164,14 +214,14 @@ nextflow_workflow { input[10] = true // skip_umitools_extract input[11] = 0 // umi_discard_read - input[12] = true // skip_prinseqplusplus - input[13] = true // skip_bbmap_clumpify + input[12] = true // skip_complexity_filtering + input[13] = true // skip_deduplication input[14] = true // skip_decontamination input[15] = [] // ch_fasta input[16] = [] // ch_reference input[17] = [] // index_name input[18] = "" // decontaminator - input[19] = true // skip_cat_fastq + input[19] = true // skip_final_concatenation """ } } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index 6fdecab7894e..4784df43b159 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -131,6 +131,35 @@ }, "timestamp": "2026-01-13T11:53:40.26586129" }, + "sarscov2 - fastq - umitools - prinseq - clumpify - cat - single_end": { + "content": [ + "test.merged.fastq.gz:md5,e9a7e89c097b6f23464382b91c274013", + 51, + 36, + [ + { + "FASTQ_SHORTREADS_PREPROCESS_QC:PRINSEQPLUSPLUS": { + "prinseqplusplus": 1.2 + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:BBMAP_CLUMPIFY": { + "bbmap": 39.18 + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:UMITOOLS_EXTRACT": { + "umitools": "1.1.6" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-14T10:49:09.073574264" + }, "sarscov2 - fastq - skip all - single_end - stub": { "content": [ [ diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config index 78045d7d3e04..8a439363b822 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config @@ -23,4 +23,8 @@ process { withName: SEQKIT_RMDUP { ext.prefix = { "${meta.id}_${meta.strandness}" } } + + withName: UMITOOLS_EXTRACT { + ext.args = '--bc-pattern="NNNN"' + } } From 912c6b7d0ee593ec96d65687ef4a093ef30ffedc Mon Sep 17 00:00:00 2001 From: EliottBo <112384714+eliottBo@users.noreply.github.com> Date: Tue, 13 Jan 2026 15:53:51 +0100 Subject: [PATCH 11/31] Update WHATSHAP/PHASE container, topic versions and task.cpus (#9642) * Add task.cpus in bgzip command and https for container. Fixed the new way of getting the version with topics * Add topics to meta * Updateing snapshot to correctly assess versions --- modules/nf-core/whatshap/phase/main.nf | 20 ++++++------ modules/nf-core/whatshap/phase/meta.yml | 26 ++++++++++++---- .../nf-core/whatshap/phase/tests/main.nf.test | 3 +- .../whatshap/phase/tests/main.nf.test.snap | 31 ++++++++++++------- 4 files changed, 50 insertions(+), 30 deletions(-) diff --git a/modules/nf-core/whatshap/phase/main.nf b/modules/nf-core/whatshap/phase/main.nf index 6d5729607831..502105e15fab 100644 --- a/modules/nf-core/whatshap/phase/main.nf +++ b/modules/nf-core/whatshap/phase/main.nf @@ -4,8 +4,8 @@ process WHATSHAP_PHASE { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'oras://community.wave.seqera.io/library/whatshap:2.8--c3862a4b2ad0f978' - : 'community.wave.seqera.io/library/whatshap:2.8--7fe530bc624a3e5a'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d8/d837709891c2d98fc0956f6fd0dba18b0f67d96c4db74ccbae7db98fd00afe42/data' + : 'community.wave.seqera.io/library/whatshap:2.8--7fe530bc624a3e5a' }" input: tuple val(meta), path(vcf), path(tbi) @@ -13,15 +13,15 @@ process WHATSHAP_PHASE { tuple val(meta3), path(fasta), path(fai) output: - tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.vcf.gz"), emit: vcf tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi - path "versions.yml", emit: versions + tuple val("${task.process}"), val('whatshap'), eval("whatshap --version"), emit: versions_whatshap, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" if ("${vcf}" == "${prefix}.vcf" || "${vcf}" == "${prefix}.vcf.gz") { @@ -36,7 +36,10 @@ process WHATSHAP_PHASE { ${vcf} \\ ${bam} - bgzip ${prefix}.vcf + bgzip \\ + -@ ${task.cpus} \\ + ${prefix}.vcf + tabix -p vcf ${prefix}.vcf.gz cat <<-END_VERSIONS > versions.yml @@ -54,10 +57,5 @@ process WHATSHAP_PHASE { """ echo "" | gzip > ${prefix}.vcf.gz touch ${prefix}.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - whatshap: \$(whatshap --version 2>&1 | sed 's/whatshap //g') - END_VERSIONS """ } diff --git a/modules/nf-core/whatshap/phase/meta.yml b/modules/nf-core/whatshap/phase/meta.yml index 89233aae3b66..a0b522e28141 100644 --- a/modules/nf-core/whatshap/phase/meta.yml +++ b/modules/nf-core/whatshap/phase/meta.yml @@ -85,13 +85,27 @@ output: pattern: "*.vcf.gz.tbi" ontologies: - edam: http://edamontology.org/format_3616 # TBI format + versions_whatshap: + - - ${task.process}: + type: string + description: The name of the process + - whatshap: + type: string + description: The name of the tool + - whatshap --version: + type: string + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - whatshap: + type: string + description: The name of the tool + - whatshap --version: + type: eval + description: The expression to obtain the version of the tool authors: - "@haidyi" diff --git a/modules/nf-core/whatshap/phase/tests/main.nf.test b/modules/nf-core/whatshap/phase/tests/main.nf.test index 5d6ec2b8a9ec..2984a3000a30 100644 --- a/modules/nf-core/whatshap/phase/tests/main.nf.test +++ b/modules/nf-core/whatshap/phase/tests/main.nf.test @@ -56,8 +56,7 @@ nextflow_process { { assert snapshot( path(process.out.vcf.get(0).get(1)).vcf.summary, path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, - process.out.versions, - path(process.out.versions[0]).yaml, + process.out.findAll { key, val -> key.startsWith("versions")}, ).match() } ) } diff --git a/modules/nf-core/whatshap/phase/tests/main.nf.test.snap b/modules/nf-core/whatshap/phase/tests/main.nf.test.snap index 18b00f7fb6b4..d36db70200a0 100644 --- a/modules/nf-core/whatshap/phase/tests/main.nf.test.snap +++ b/modules/nf-core/whatshap/phase/tests/main.nf.test.snap @@ -19,7 +19,11 @@ ] ], "2": [ - "versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994" + [ + "WHATSHAP_PHASE", + "whatshap", + "2.8" + ] ], "tbi": [ [ @@ -37,8 +41,12 @@ "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994" + "versions_whatshap": [ + [ + "WHATSHAP_PHASE", + "whatshap", + "2.8" + ] ] } ], @@ -46,25 +54,26 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-08T10:09:39.439411781" + "timestamp": "2026-01-13T11:48:26.363542049" }, "whatshap - phase - vcf": { "content": [ "VcfFile [chromosomes=[chr19:45760000-45770300], sampleCount=1, variantCount=1, phased=false, phasedAutodetect=false]", "e75d1ebbe87d6e55739cacb4e81dcd08", - [ - "versions.yml:md5,eebecfb3d7f284fc7d11c67978bf1994" - ], { - "WHATSHAP_PHASE": { - "whatshap": 2.8 - } + "versions_whatshap": [ + [ + "WHATSHAP_PHASE", + "whatshap", + "2.8" + ] + ] } ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2025-12-08T10:09:34.591009388" + "timestamp": "2026-01-13T11:54:46.729067103" } } \ No newline at end of file From 76d01196b811cb36e578fcfcc490f04435aaf318 Mon Sep 17 00:00:00 2001 From: Alejandra Escobar Date: Tue, 13 Jan 2026 15:40:48 +0000 Subject: [PATCH 12/31] Feature/localcdsearch (#9632) * local-cd-search annotate and download modules * Fixing lint * Update localcdsearch tool metadata in meta.yml * Fix container URL selection logic in main.nf * Fix container URL for local-cd-search * Fix container URL for local-cd-search * Fix container URL for local-cd-search * Fix container URL for local-cd-search * Fix container URL for local-cd-search * Update modules/nf-core/localcdsearch/annotate/main.nf Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/main.nf Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/main.nf Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/main.nf Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/main.nf Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/main.nf Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/main.nf Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/meta.yml Co-authored-by: James A. Fellows Yates * Update modules/nf-core/localcdsearch/download/meta.yml Co-authored-by: James A. Fellows Yates * Singularity container added * edamontology added * Updating test for one input only * Updating snap * Updating test for db downloding * Update version handling in main.nf * Update version expression in meta.yml * Refactor version handling in main.nf Updated version information to use a variable for better maintainability. * Update version retrieval expression in meta.yml * Add version information to stub Added version information for the tool in the main.nf file. * Add version information to stub Added version information to the main.nf file. * Update modules/nf-core/localcdsearch/download/meta.yml Co-authored-by: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> * Refactor assertions to include process success check * Refactor assertions for process success in tests * Update container URL for local-cd-search * Remove redundant success assertion in tests --------- Co-authored-by: James A. Fellows Yates Co-authored-by: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> --- .../localcdsearch/annotate/environment.yml | 7 + .../nf-core/localcdsearch/annotate/main.nf | 53 ++++++ .../nf-core/localcdsearch/annotate/meta.yml | 89 ++++++++++ .../localcdsearch/annotate/tests/main.nf.test | 126 +++++++++++++ .../annotate/tests/main.nf.test.snap | 122 +++++++++++++ .../localcdsearch/download/environment.yml | 7 + .../nf-core/localcdsearch/download/main.nf | 39 +++++ .../nf-core/localcdsearch/download/meta.yml | 59 +++++++ .../localcdsearch/download/tests/main.nf.test | 102 +++++++++++ .../download/tests/main.nf.test.snap | 165 ++++++++++++++++++ 10 files changed, 769 insertions(+) create mode 100644 modules/nf-core/localcdsearch/annotate/environment.yml create mode 100644 modules/nf-core/localcdsearch/annotate/main.nf create mode 100644 modules/nf-core/localcdsearch/annotate/meta.yml create mode 100644 modules/nf-core/localcdsearch/annotate/tests/main.nf.test create mode 100644 modules/nf-core/localcdsearch/annotate/tests/main.nf.test.snap create mode 100644 modules/nf-core/localcdsearch/download/environment.yml create mode 100644 modules/nf-core/localcdsearch/download/main.nf create mode 100644 modules/nf-core/localcdsearch/download/meta.yml create mode 100644 modules/nf-core/localcdsearch/download/tests/main.nf.test create mode 100644 modules/nf-core/localcdsearch/download/tests/main.nf.test.snap diff --git a/modules/nf-core/localcdsearch/annotate/environment.yml b/modules/nf-core/localcdsearch/annotate/environment.yml new file mode 100644 index 000000000000..cdef14c5c0d4 --- /dev/null +++ b/modules/nf-core/localcdsearch/annotate/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::local-cd-search=0.3.0" diff --git a/modules/nf-core/localcdsearch/annotate/main.nf b/modules/nf-core/localcdsearch/annotate/main.nf new file mode 100644 index 000000000000..4baa3bda0692 --- /dev/null +++ b/modules/nf-core/localcdsearch/annotate/main.nf @@ -0,0 +1,53 @@ +process LOCALCDSEARCH_ANNOTATE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e1/e1ed921c933d8eeeb0db6d72ece09ec25edab9ad441c84b070acff1592af2d54/data' : + 'biocontainers/local-cd-search:0.3.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path db + val sites + + output: + tuple val(meta), path("*_results.tsv"), emit: result + tuple val(meta), path("*_sites.tsv") , emit: annot_sites, optional: true + tuple val("${task.process}"), val('local-cd-search'), eval("echo ${VERSION}"), topic: versions, emit: versions_localcdsearch + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + VERSION = '0.3.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def prefix = task.ext.prefix ?: "${meta.id}" + def val_flag = sites ? "--sites-output ${prefix}_sites.tsv" : '' + def is_compressed = fasta.getExtension() == "gz" + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + def uncompress_input = is_compressed ? "gzip -c -d ${fasta} > ${fasta_name}" : '' + """ + $uncompress_input + + local-cd-search \\ + annotate \\ + $args \\ + $val_flag \\ + --threads $task.cpus \\ + ${fasta_name} \\ + ${prefix}_results.tsv \\ + ${db} + """ + + stub: + def args = task.ext.args ?: '' + VERSION = '0.3.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + touch ${prefix}_results.tsv + ${sites ? "touch ${prefix}_sites.tsv" : ''} + """ +} diff --git a/modules/nf-core/localcdsearch/annotate/meta.yml b/modules/nf-core/localcdsearch/annotate/meta.yml new file mode 100644 index 000000000000..184853635ab3 --- /dev/null +++ b/modules/nf-core/localcdsearch/annotate/meta.yml @@ -0,0 +1,89 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "localcdsearch_annotate" +description: A command-line tool for local protein domain annotation using NCBI's Conserved Domain Database (CDD) +keywords: + - cdd + - rpsblast + - rpsbproc + - protein + - domain + - annotation +tools: + - "localcdsearch": + description: "Protein annotation using local PSSM databases from CDD." + homepage: "https://github.com/apcamargo/local-cd-search" + documentation: "https://github.com/apcamargo/local-cd-search" + tool_dev_url: "https://github.com/apcamargo/local-cd-search" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Input fasta file containing protein queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + ontologies: + - edam: "http://edamontology.org/format_1929" # FASTA + - db: + type: directory + description: Directory containing the metadata and databse directories + pattern: "*" + - sites: + type: boolean + description: When true an extra tsv output file is generated + +output: + result: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*_results.tsv": + type: file + description: tab-separated file with hits filtered by CDD's curated bit-score thresholds + pattern: "*_results.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + annot_sites: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*_sites.tsv": + type: file + description: If --sites-output is specified, an additional tab-separated file is created with functional site annotations + pattern: "*_sites.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions_localcdsearch: + - - "${task.process}": + type: string + description: The name of the process + - "local-cd-search": + type: string + description: The name of the tool + - "echo ${VERSION}": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - local-cd-search: + type: string + description: The name of the tool + - echo ${VERSION}: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@Ales-ibt" +maintainers: + - "@Ales-ibt" diff --git a/modules/nf-core/localcdsearch/annotate/tests/main.nf.test b/modules/nf-core/localcdsearch/annotate/tests/main.nf.test new file mode 100644 index 000000000000..a6c268608bb7 --- /dev/null +++ b/modules/nf-core/localcdsearch/annotate/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process LOCALCDSEARCH_ANNOTATE" + script "../main.nf" + process "LOCALCDSEARCH_ANNOTATE" + + tag "modules" + tag "modules_nfcore" + tag "localcdsearch" + tag "localcdsearch/annotate" + tag "localcdsearch/download" + + setup { + run("LOCALCDSEARCH_DOWNLOAD") { + script "../../download/main.nf" + process { + """ + input[0] = ['smart'] + """ + } + } + } + + test("sarscov2 - smart") { + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = LOCALCDSEARCH_DOWNLOAD.out.db + input[2] = false + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.result, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - smart - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = LOCALCDSEARCH_DOWNLOAD.out.db + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.result, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - smart - sites") { + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = LOCALCDSEARCH_DOWNLOAD.out.db + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.result, + process.out.annot_sites, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - smart - sites - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = LOCALCDSEARCH_DOWNLOAD.out.db + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.result, + process.out.annot_sites, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/localcdsearch/annotate/tests/main.nf.test.snap b/modules/nf-core/localcdsearch/annotate/tests/main.nf.test.snap new file mode 100644 index 000000000000..4643e75fd0dd --- /dev/null +++ b/modules/nf-core/localcdsearch/annotate/tests/main.nf.test.snap @@ -0,0 +1,122 @@ +{ + "sarscov2 - smart - sites": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_results.tsv:md5,9b702972e196a5a0c78caf731ca8aa98" + ] + ], + [ + [ + { + "id": "test" + }, + "test_sites.tsv:md5,1f23b91ab52ed005e35c1fb3b07f36da" + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_ANNOTATE", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:17:49.533079" + }, + "sarscov2 - smart": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_results.tsv:md5,9b702972e196a5a0c78caf731ca8aa98" + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_ANNOTATE", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:17:24.849122" + }, + "sarscov2 - smart - sites - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_results.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test" + }, + "test_sites.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_ANNOTATE", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:17:54.933935" + }, + "sarscov2 - smart - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_results.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_ANNOTATE", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:17:30.019737" + } +} \ No newline at end of file diff --git a/modules/nf-core/localcdsearch/download/environment.yml b/modules/nf-core/localcdsearch/download/environment.yml new file mode 100644 index 000000000000..cdef14c5c0d4 --- /dev/null +++ b/modules/nf-core/localcdsearch/download/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::local-cd-search=0.3.0" diff --git a/modules/nf-core/localcdsearch/download/main.nf b/modules/nf-core/localcdsearch/download/main.nf new file mode 100644 index 000000000000..ca0e1670ceaf --- /dev/null +++ b/modules/nf-core/localcdsearch/download/main.nf @@ -0,0 +1,39 @@ +process LOCALCDSEARCH_DOWNLOAD { + tag "${databases.join(', ')}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e1/e1ed921c933d8eeeb0db6d72ece09ec25edab9ad441c84b070acff1592af2d54/data' : + 'biocontainers/local-cd-search:0.3.0--pyhdfd78af_0' }" + + input: + val databases + + output: + path('database/'), emit: db + tuple val("${task.process}"), val('local-cd-search'), eval("echo ${VERSION}"), topic: versions, emit: versions_localcdsearch + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + VERSION = '0.3.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + mkdir database/ + local-cd-search \\ + download \\ + ${args} \\ + database/ \\ + ${databases.join(' ')} + """ + + stub: + def args = task.ext.args ?: '' + VERSION = '0.3.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + echo $args + mkdir database/ + """ +} diff --git a/modules/nf-core/localcdsearch/download/meta.yml b/modules/nf-core/localcdsearch/download/meta.yml new file mode 100644 index 000000000000..c474601c454c --- /dev/null +++ b/modules/nf-core/localcdsearch/download/meta.yml @@ -0,0 +1,59 @@ +name: "localcdsearch_download" +description: A command-line tool for downloading databases for local protein domain annotation using + NCBI's Conserved Domain Database (CDD) +keywords: + - cdd + - rpsblast + - rpsbproc + - protein + - domain + - annotation + - download +tools: + - "localcdsearch": + description: "Protein annotation using local PSSM databases from CDD." + homepage: "https://github.com/apcamargo/local-cd-search" + documentation: "https://github.com/apcamargo/local-cd-search" + tool_dev_url: "https://github.com/apcamargo/local-cd-search" + licence: ["MIT"] + identifier: "" + +input: + - databases: + type: list + description: | + List of database names to download. Can be a single database name or multiple names. + Valid options: cdd, cdd_ncbi, cog, kog, pfam, prk, smart, tigr + pattern: "cdd|cdd_ncbi|cog|kog|pfam|prk|smart|tigr" +output: + db: + - "database/": + type: directory + description: Directory containing downloaded CDD databases + ontologies: + - edam: http://edamontology.org/data_1049 # The name of a directory + versions_localcdsearch: + - - ${task.process}: + type: string + description: The name of the process + - local-cd-search: + type: string + description: The name of the tool + - echo ${VERSION}: + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - local-cd-search: + type: string + description: The name of the tool + - echo ${VERSION}: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@Ales-ibt" +maintainers: + - "@Ales-ibt" diff --git a/modules/nf-core/localcdsearch/download/tests/main.nf.test b/modules/nf-core/localcdsearch/download/tests/main.nf.test new file mode 100644 index 000000000000..195d92959644 --- /dev/null +++ b/modules/nf-core/localcdsearch/download/tests/main.nf.test @@ -0,0 +1,102 @@ +nextflow_process { + + name "Test Process LOCALCDSEARCH_DOWNLOAD" + script "../main.nf" + process "LOCALCDSEARCH_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "localcdsearch" + tag "localcdsearch/download" + + test("Download one database - Smart") { + + when { + process { + """ + input[0] = ['smart'] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("Download one database - Smart - stub") { + + options "-stub" + + when { + process { + """ + input[0] = ['smart'] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + + test("Download multiple databases - Smart, Tigr") { + + when { + process { + """ + input[0] = ['smart', 'tigr'] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("Download multiple databases - Smart, Tigr - stub") { + + options "-stub" + + when { + process { + """ + input[0] = ['smart', 'tigr'] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.db, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + +} diff --git a/modules/nf-core/localcdsearch/download/tests/main.nf.test.snap b/modules/nf-core/localcdsearch/download/tests/main.nf.test.snap new file mode 100644 index 000000000000..d92550c6b39c --- /dev/null +++ b/modules/nf-core/localcdsearch/download/tests/main.nf.test.snap @@ -0,0 +1,165 @@ +{ + "Download one database - Smart - stub": { + "content": [ + [ + [ + + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_DOWNLOAD", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:04:01.191164" + }, + "Download one database - Smart": { + "content": [ + [ + [ + [ + "Smart.aux:md5,4676e5073e915b31726844ed20a768d2", + "Smart.freq:md5,2fa7da70eb0697574ab484d625493b46", + "Smart.loo:md5,c17ab45e4e8e7316adc9148d39e40f4b", + "Smart.pal:md5,c1e55902129ad5193a82bebb545c4563", + "Smart.pdb:md5,fb463e0703f2bd9b75068b1f0eb71a5d", + "Smart.phr:md5,acb5655696f6948baf7ba8064f7adb58", + "Smart.pin:md5,af1c29799b5c5168481bceb3c6d8cdd8", + "Smart.pos:md5,4a1c4bc186ba31897ac53ecaa2749cdf", + "Smart.pot:md5,8954644983f3cf5d0d24cbcc1a531a21", + "Smart.psq:md5,ca89b28ce23aa7e7246359293ec0a480", + "Smart.ptf:md5,36ec33f1546d8065dce4d7d646b9d707", + "Smart.pto:md5,9a94d5d06ba316923905c46eb30c76bc", + "Smart.rps:md5,fc24261c5717fc4b4696516a330eba5e" + ], + [ + "bitscore_specific.txt:md5,417ab1c1223f42a38cb4c0caee8165f5", + "cddannot.dat:md5,60073848cd4fac905fe5c3a2cacb0dde", + "cddannot_generic.dat:md5,7b1f2e6149469f394b7688cb8685d111", + "cddid.tbl:md5,51831ce178d98aeb2b9b3cdebe9069fe", + "cdtrack.txt:md5,dc94bb9a8e8027d40a344d78828c2661", + "family_superfamily_links:md5,3af9f63b6aa9ab822666c77059fe9c08" + ] + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_DOWNLOAD", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:03:57.328825" + }, + "Download multiple databases - Smart, Tigr - stub": { + "content": [ + [ + [ + + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_DOWNLOAD", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:04:44.588308" + }, + "Download multiple databases - Smart, Tigr": { + "content": [ + [ + [ + [ + "Smart.aux:md5,4676e5073e915b31726844ed20a768d2", + "Smart.freq:md5,2fa7da70eb0697574ab484d625493b46", + "Smart.loo:md5,c17ab45e4e8e7316adc9148d39e40f4b", + "Smart.pal:md5,c1e55902129ad5193a82bebb545c4563", + "Smart.pdb:md5,fb463e0703f2bd9b75068b1f0eb71a5d", + "Smart.phr:md5,acb5655696f6948baf7ba8064f7adb58", + "Smart.pin:md5,af1c29799b5c5168481bceb3c6d8cdd8", + "Smart.pos:md5,4a1c4bc186ba31897ac53ecaa2749cdf", + "Smart.pot:md5,8954644983f3cf5d0d24cbcc1a531a21", + "Smart.psq:md5,ca89b28ce23aa7e7246359293ec0a480", + "Smart.ptf:md5,36ec33f1546d8065dce4d7d646b9d707", + "Smart.pto:md5,9a94d5d06ba316923905c46eb30c76bc", + "Smart.rps:md5,fc24261c5717fc4b4696516a330eba5e" + ], + [ + "Tigr.00.aux:md5,77c6dde6261801c4931ca6887ef5f3bd", + "Tigr.00.freq:md5,d754f8d0b8f8d6fa03ab8c975f12a626", + "Tigr.00.loo:md5,95ccf669e5110f39408a84f5cbc36c9f", + "Tigr.00.pdb:md5,d24ae88caeeade489b207925e89e23cb", + "Tigr.00.phr:md5,28c3c0e455b23366a20fec10d9dad8b3", + "Tigr.00.pin:md5,ede33d2dfdf358256def9c7c33ce9d3a", + "Tigr.00.pos:md5,f9eecf080eea2f4d92d1a901c4240090", + "Tigr.00.pot:md5,6a66041ed547e097f4fd3c7844cd7f30", + "Tigr.00.psq:md5,1d6a57b8138949f6e8b5da300d25a6eb", + "Tigr.00.ptf:md5,d62636ed1d2e68c73187781249850687", + "Tigr.00.pto:md5,80cf92e1c5ab21ba75f2d785b9447e22", + "Tigr.00.rps:md5,23e2b0fa0623ed2cc93a14ea2120d37c", + "Tigr.01.aux:md5,e28ca085ae533d35c80ebf61e49b8539", + "Tigr.01.freq:md5,69e42176dfed09fea8ea86a76b759920", + "Tigr.01.loo:md5,30cf041e222a3fd03eb51dde4861da8a", + "Tigr.01.pdb:md5,394e696f67d5b6157e057dcfcff601c8", + "Tigr.01.phr:md5,449af45813ae62dd542716af8ecdca4a", + "Tigr.01.pin:md5,f791ed2bec725729d48675969def0c02", + "Tigr.01.pos:md5,4209b291fb02a88cc1b4d2f137c20150", + "Tigr.01.pot:md5,6c7581b0b75d936a7ae3c2155412c200", + "Tigr.01.psq:md5,333fbb79a8929af7ecd74cacd5790ab5", + "Tigr.01.ptf:md5,8ef602b360a790032a66502e887b1aa7", + "Tigr.01.pto:md5,b25c40003d61daeaff15a49cb2fe7f59", + "Tigr.01.rps:md5,c38a0e4c93a08fd9a98aafb718768ab7", + "Tigr.pal:md5,f9dc15c5f5808ff34ff38a4a30613ee0" + ], + [ + "bitscore_specific.txt:md5,417ab1c1223f42a38cb4c0caee8165f5", + "cddannot.dat:md5,60073848cd4fac905fe5c3a2cacb0dde", + "cddannot_generic.dat:md5,7b1f2e6149469f394b7688cb8685d111", + "cddid.tbl:md5,51831ce178d98aeb2b9b3cdebe9069fe", + "cdtrack.txt:md5,dc94bb9a8e8027d40a344d78828c2661", + "family_superfamily_links:md5,3af9f63b6aa9ab822666c77059fe9c08" + ] + ] + ], + { + "versions_localcdsearch": [ + [ + "LOCALCDSEARCH_DOWNLOAD", + "local-cd-search", + "0.3.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2026-01-09T14:04:38.90845" + } +} \ No newline at end of file From afa52dd4a196d7265de0e41589f81bafefe8b8c0 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 14 Jan 2026 14:07:32 +0100 Subject: [PATCH 13/31] Update RGI modules to be Nextflow strict syntax compliant (#9650) * Add new input channel for premade seqid2map file * Add extra tests for using custom seqid2map file * Apply suggestions from code review * Update modules/nf-core/kraken2/build/tests/main.nf.test Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * Make RGI modules syntax compliant --------- Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> --- modules/nf-core/rgi/cardannotation/main.nf | 4 ++-- modules/nf-core/rgi/main/main.nf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/rgi/cardannotation/main.nf b/modules/nf-core/rgi/cardannotation/main.nf index 0386ce206d7a..fec518ece2f4 100644 --- a/modules/nf-core/rgi/cardannotation/main.nf +++ b/modules/nf-core/rgi/cardannotation/main.nf @@ -11,8 +11,8 @@ process RGI_CARDANNOTATION { output: path ("card_database_processed"), emit: db - env RGI_VERSION, emit: tool_version - env DB_VERSION, emit: db_version + env 'RGI_VERSION', emit: tool_version + env 'DB_VERSION', emit: db_version path "versions.yml", emit: versions when: diff --git a/modules/nf-core/rgi/main/main.nf b/modules/nf-core/rgi/main/main.nf index a796460c4376..744dbc92d239 100644 --- a/modules/nf-core/rgi/main/main.nf +++ b/modules/nf-core/rgi/main/main.nf @@ -16,8 +16,8 @@ process RGI_MAIN { tuple val(meta), path("*.json"), emit: json tuple val(meta), path("*.txt"), emit: tsv tuple val(meta), path("temp/"), emit: tmp - env RGI_VERSION, emit: tool_version - env DB_VERSION, emit: db_version + env 'RGI_VERSION', emit: tool_version + env 'DB_VERSION', emit: db_version path "versions.yml", emit: versions when: From 5be99dd30b8c4eb06007feda9953611cedcabe2e Mon Sep 17 00:00:00 2001 From: Delfina Terradas <155591053+delfiterradas@users.noreply.github.com> Date: Wed, 14 Jan 2026 11:11:30 -0300 Subject: [PATCH 14/31] Generate normalised matrix with `variancepartition/dream` (#9645) * Emit normalised matrix from dream and update tests * Update `meta.yml` * Fix linting error * Update snapshots * Bugfix --- .../nf-core/variancepartition/dream/main.nf | 1 + .../nf-core/variancepartition/dream/meta.yml | 24 ++++++-- .../variancepartition/dream/templates/dream.R | 12 ++++ .../dream/tests/main.nf.test | 36 ++++++++++++ .../dream/tests/main.nf.test.snap | 40 ++++++++++++- .../abundance_differential_filter/main.nf | 1 + .../tests/dream_voom.config | 8 +++ .../tests/main.nf.test | 56 +++++++++++++++++++ .../tests/main.nf.test.snap | 48 ++++++++++++++++ .../tests/main.nf.test.snap | 50 ++++++++--------- 10 files changed, 244 insertions(+), 32 deletions(-) create mode 100644 subworkflows/nf-core/abundance_differential_filter/tests/dream_voom.config diff --git a/modules/nf-core/variancepartition/dream/main.nf b/modules/nf-core/variancepartition/dream/main.nf index 343b10becca6..0e02941c755f 100644 --- a/modules/nf-core/variancepartition/dream/main.nf +++ b/modules/nf-core/variancepartition/dream/main.nf @@ -14,6 +14,7 @@ process VARIANCEPARTITION_DREAM { output: tuple val(meta), path("*.dream.results.tsv") , emit: results tuple val(meta), path("*.dream.model.txt") , emit: model + tuple val(meta), path("*.normalised_counts.tsv") , emit: normalised_counts, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/variancepartition/dream/meta.yml b/modules/nf-core/variancepartition/dream/meta.yml index 59c5faf09ea2..a98fe8fd98eb 100644 --- a/modules/nf-core/variancepartition/dream/meta.yml +++ b/modules/nf-core/variancepartition/dream/meta.yml @@ -1,6 +1,6 @@ name: "variancepartition_dream" -description: Runs a differential expression analysis with dream() from variancePartition - R package +description: Runs a differential expression analysis with dream() from + variancePartition R package keywords: - rnaseq - dream @@ -36,12 +36,12 @@ input: should be used to derive the target samples - formula: type: string - description: (Optional) R formula string used for modeling, e.g. '~ treatment - + (1 | sample_number)'. + description: (Optional) R formula string used for modeling, e.g. '~ + treatment + (1 | sample_number)'. - comparison: type: string - description: (Optional) Literal string passed to `limma::makeContrasts`, e.g. - 'treatmenthND6 - treatmentmCherry'. + description: (Optional) Literal string passed to `limma::makeContrasts`, + e.g. 'treatmenthND6 - treatmentmCherry'. - - meta2: type: map description: | @@ -80,6 +80,18 @@ output: R model description text file. pattern: "*.dream.model.txt" ontologies: [] + normalised_counts: + - - meta: + type: map + description: | + groovy array with metadata information for the contrast generated + - "*.normalised_counts.tsv": + type: file + description: normalised TSV format expression matrix with genes by row and + samples by column + pattern: "*.normalised_counts.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV versions: - versions.yml: type: file diff --git a/modules/nf-core/variancepartition/dream/templates/dream.R b/modules/nf-core/variancepartition/dream/templates/dream.R index 772f081c580b..4ea67e531ccb 100644 --- a/modules/nf-core/variancepartition/dream/templates/dream.R +++ b/modules/nf-core/variancepartition/dream/templates/dream.R @@ -246,6 +246,18 @@ if (as.logical(opt\$apply_voom)) { dge <- DGEList(countMatrix) dge <- calcNormFactors(dge) vobjDream <- voomWithDreamWeights(dge, form, metadata, BPPARAM = param) + + # Write normalized counts matrix to a TSV file + normalized_counts <- vobjDream\$E + if (!is.null(opt\$round_digits)) { + normalized_counts <- apply(normalized_counts, 2, function(x) round(x, opt\$round_digits)) + } + normalized_counts_with_genes <- data.frame(gene_id = rownames(normalized_counts), normalized_counts, check.names = FALSE, row.names = NULL) + write.table(normalized_counts_with_genes, + file = paste(opt\$output_prefix, "normalised_counts.tsv", sep = '.'), + sep = " ", + quote = FALSE, + row.names = FALSE) } else { # Assume countMatrix roughly follows a normal distribution vobjDream <- countMatrix diff --git a/modules/nf-core/variancepartition/dream/tests/main.nf.test b/modules/nf-core/variancepartition/dream/tests/main.nf.test index 57ec1fc8d423..b6d17baa1826 100644 --- a/modules/nf-core/variancepartition/dream/tests/main.nf.test +++ b/modules/nf-core/variancepartition/dream/tests/main.nf.test @@ -46,6 +46,42 @@ nextflow_process { } } + test("RNAseq - Voom - Feature Counts - formula + comparison contrast string - interaction") { + when { + params { + module_args = "--round_digits 2 --apply_voom true" + } + process { + """ + input[0] = Channel.of([ + 'id': 'genotype_WT_KO_treatment_Control_Treated', + 'formula': '~ genotype * treatment', + 'comparison': 'genotypeWT - treatmentTreated' // should be a 'make.names() string' + ]) + .map { + tuple(it, it.variable, it.reference, it.target, it.formula, it.comparison) + } + + input[1] = Channel.of([ + [ id: 'test' ], + file("https://github.com/nf-core/test-datasets/raw/differentialabundance/modules_testdata/variancepartition_dream/metadata.tsv", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/differentialabundance/modules_testdata/variancepartition_dream/counts.tsv", checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.results[0][1]).getText().contains("gene_id\tlogFC\tAveExpr\tt\tP.Value\tadj.P.Val\tB") }, + { assert path(process.out.results[0][1]).getText().contains("5.07") }, + { assert path(process.out.results[0][1]).getText().contains("2.97\t3.88\t38.17") }, + { assert snapshot(process.out.model, process.out.normalised_counts, process.out.versions).match() } + ) + } + } + test("Mus musculus - expression table - contrasts + formula + comparison contrast string") { when { diff --git a/modules/nf-core/variancepartition/dream/tests/main.nf.test.snap b/modules/nf-core/variancepartition/dream/tests/main.nf.test.snap index ea82a90fe03f..9e80143b75ed 100644 --- a/modules/nf-core/variancepartition/dream/tests/main.nf.test.snap +++ b/modules/nf-core/variancepartition/dream/tests/main.nf.test.snap @@ -251,6 +251,9 @@ ] ], "2": [ + + ], + "3": [ "versions.yml:md5,03b686ec8c67a91501ebb2b2a5234e77" ], "model": [ @@ -262,6 +265,9 @@ }, "treatment_mCherry_hND6.dream.model.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] + ], + "normalised_counts": [ + ], "results": [ [ @@ -282,7 +288,39 @@ "nf-test": "0.9.3", "nextflow": "25.04.2" }, - "timestamp": "2025-12-23T18:58:10.585988582" + "timestamp": "2026-01-13T15:35:07.121696674" + }, + "RNAseq - Voom - Feature Counts - formula + comparison contrast string - interaction": { + "content": [ + [ + [ + { + "id": "genotype_WT_KO_treatment_Control_Treated", + "formula": "~ genotype * treatment", + "comparison": "genotypeWT - treatmentTreated" + }, + "genotype_WT_KO_treatment_Control_Treated.dream.model.txt:md5,e67d5a9c42dd9374b370b6426832d08f" + ] + ], + [ + [ + { + "id": "genotype_WT_KO_treatment_Control_Treated", + "formula": "~ genotype * treatment", + "comparison": "genotypeWT - treatmentTreated" + }, + "genotype_WT_KO_treatment_Control_Treated.normalised_counts.tsv:md5,84917b0fe7db41763ae3dc7c09e2ec4f" + ] + ], + [ + "versions.yml:md5,fc1f26eb2194018e99fc2916332676b7" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.2" + }, + "timestamp": "2026-01-13T15:53:31.743589111" }, "Mus musculus - expression table - contrasts + formula + comparison contrast string": { "content": [ diff --git a/subworkflows/nf-core/abundance_differential_filter/main.nf b/subworkflows/nf-core/abundance_differential_filter/main.nf index de64b108f43d..8fa1e00f1f16 100644 --- a/subworkflows/nf-core/abundance_differential_filter/main.nf +++ b/subworkflows/nf-core/abundance_differential_filter/main.nf @@ -173,6 +173,7 @@ workflow ABUNDANCE_DIFFERENTIAL_FILTER { ch_normalised_matrix = DESEQ2_NORM.out.normalised_counts .mix(LIMMA_NORM.out.normalised_counts) + .mix(VARIANCEPARTITION_DREAM.out.normalised_counts) ch_model = DESEQ2_DIFFERENTIAL.out.model .mix(LIMMA_DIFFERENTIAL.out.model) diff --git a/subworkflows/nf-core/abundance_differential_filter/tests/dream_voom.config b/subworkflows/nf-core/abundance_differential_filter/tests/dream_voom.config new file mode 100644 index 000000000000..9b2555dd7ad1 --- /dev/null +++ b/subworkflows/nf-core/abundance_differential_filter/tests/dream_voom.config @@ -0,0 +1,8 @@ +process { + withName: 'VARIANCEPARTITION_DREAM' { + ext.args = { [ + "--round_digits 2 --apply_voom true" + ].join(' ').trim() } + ext.prefix = { "${meta.id}_${meta.differential_method}_voom" } + } +} diff --git a/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test b/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test index e6affcc1a396..f6eb66a5e0bb 100644 --- a/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test +++ b/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test @@ -70,6 +70,62 @@ nextflow_workflow { } } + test("dream - voom") { + config './dream_voom.config' + tag "dream_voom" + + when { + workflow { + """ + + def testData = [ + expression_test_data_dir: params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/', + contrasts_file: 'SRP254919.contrasts.csv', + abundance_file: 'SRP254919.salmon.merged.gene_counts.top1000cov.tsv', + samplesheet_file: 'SRP254919.samplesheet.csv' + ] + + ch_samplesheet = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.samplesheet_file) + ]) + ch_transcript_lengths = Channel.of([ [], [] ]) + ch_control_features = Channel.of([ [], [] ]) + ch_contrasts = Channel.of(['id': 'treatment_mCherry_hND6', 'formula':'~ treatment + (1 | sample_number)', 'comparison': 'treatmentmCherry']) + .map{ + tuple([id: 'test'], it, it.variable, it.reference, it.target, it.formula, it.comparison) + } + .groupTuple() + ch_input = Channel.of([ + [ id:'test' ], + file(testData.expression_test_data_dir + testData.abundance_file), + 'dream', // analysis method + 1.5, // FC threshold + 0.05 // stat (adjusted p-value) threshold + ]) + + input[0] = ch_input + input[1] = ch_samplesheet + input[2] = ch_transcript_lengths + input[3] = ch_control_features + input[4] = ch_contrasts + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.results_genewise[0][1]).getText().contains("gene_id\tlogFC\tAveExpr\tt\tP.Value\tadj.P.Val\tB") }, + { assert snapshot( + workflow.out.results_genewise_filtered, + workflow.out.model, + workflow.out.normalised_matrix, + workflow.out.versions + ).match() } + ) + } + } + test("dream - complex contrast - literal contrast string comparison") { config './dream.config' tag "dream_complex" diff --git a/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test.snap b/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test.snap index ba48668eb862..a42566ee1136 100644 --- a/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test.snap +++ b/subworkflows/nf-core/abundance_differential_filter/tests/main.nf.test.snap @@ -836,6 +836,54 @@ }, "timestamp": "2025-06-17T10:39:22.686251" }, + "dream - voom": { + "content": [ + [ + [ + { + "id": "treatment_mCherry_hND6_test", + "formula": "~ treatment + (1 | sample_number)", + "comparison": "treatmentmCherry", + "differential_method": "dream", + "fc_threshold": 1.5, + "stat_threshold": 0.05 + }, + "treatment_mCherry_hND6_test_filtered.tsv:md5,6a90dfffc1dc342156968de1a45dc53e" + ] + ], + [ + [ + { + "id": "treatment_mCherry_hND6_test", + "formula": "~ treatment + (1 | sample_number)", + "comparison": "treatmentmCherry", + "differential_method": "dream" + }, + "treatment_mCherry_hND6_test_dream_voom.dream.model.txt:md5,7103206474aa480ffd9cec149263489f" + ] + ], + [ + [ + { + "id": "treatment_mCherry_hND6_test", + "formula": "~ treatment + (1 | sample_number)", + "comparison": "treatmentmCherry", + "differential_method": "dream" + }, + "treatment_mCherry_hND6_test_dream_voom.normalised_counts.tsv:md5,4e9f616c34f59773e1c8611c76255d42" + ] + ], + [ + "versions.yml:md5,1c02d4e455e8f3809c8ce37bee947690", + "versions.yml:md5,736da31f06f854355d45aeb9d9c874e0" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.2" + }, + "timestamp": "2026-01-13T16:01:26.450174111" + }, "deseq2 - mouse - basic": { "content": [ [ diff --git a/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test.snap b/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test.snap index 23e07d8a5199..bdafe4a5aa94 100644 --- a/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test.snap +++ b/subworkflows/nf-core/differential_functional_enrichment/tests/main.nf.test.snap @@ -128,16 +128,16 @@ ] ], [ + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604", + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604", + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604", + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604", + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604", + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604", "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", "versions.yml:md5,25ab98049a601f4940f3e5a24aa73f55", - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0", - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0", - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0", - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0", - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0", - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0", "versions.yml:md5,cd9cd1563a983e586b15fd2276da8bfb", "versions.yml:md5,cd9cd1563a983e586b15fd2276da8bfb", "versions.yml:md5,f2db818ec8143f64399247548098b643", @@ -152,10 +152,10 @@ ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-09-12T13:53:45.331513742" + "timestamp": "2026-01-13T16:47:14.656830015" }, "deseq2 + gprofiler2 - mouse": { "content": [ @@ -238,19 +238,19 @@ "\"DIFFERENTIAL_FUNCTIONAL_ENRICHMENT:GPROFILER2_GOST\"": { "r-ggplot2": "3.4.3", "r-gprofiler2": "0.2.2", - "gprofiler-data": "biomart: Ensembl\nbiomart_version: '113'\ndisplay_name: Mouse\ngenebuild: GRCm39\ngprofiler_version: e113_eg59_p19_f6a03c19\norganism: mmusculus\nsources:\n CORUM:\n name: CORUM protein complexes\n version: 28.11.2022 Corum 4.1\n GO:BP:\n name: biological process\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:CC:\n name: cellular component\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:MF:\n name: molecular function\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n HP:\n name: Human Phenotype Ontology\n version: |-\n annotations: 05.2025\n classes: None\n KEGG:\n name: Kyoto Encyclopedia of Genes and Genomes\n version: KEGG FTP Release 2024-01-22\n REAC:\n name: Reactome\n version: |-\n annotations: BioMart\n classes: 2025-5-23\n TF:\n name: Transfac\n version: |-\n annotations: TRANSFAC Release 2024.2\n classes: v2\n WP:\n name: WikiPathways\n version: '20250510'\ntaxonomy_id: '10090'\n" + "gprofiler-data": "biomart: Ensembl\nbiomart_version: '113'\ndisplay_name: Mouse\ngenebuild: GRCm39\ngprofiler_version: e113_eg59_p19_6ece67f5\norganism: mmusculus\nsources:\n CORUM:\n name: CORUM protein complexes\n version: 28.11.2022 Corum 4.1\n GO:BP:\n name: biological process\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:CC:\n name: cellular component\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:MF:\n name: molecular function\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n HP:\n name: Human Phenotype Ontology\n version: |-\n annotations: 05.2025\n classes: None\n KEGG:\n name: Kyoto Encyclopedia of Genes and Genomes\n version: KEGG FTP Release 2024-01-22\n REAC:\n name: Reactome\n version: |-\n annotations: BioMart\n classes: 2025-5-23\n TF:\n name: Transfac\n version: |-\n annotations: TRANSFAC Release 2024.2\n classes: v2\n WP:\n name: WikiPathways\n version: '20250510'\ntaxonomy_id: '10090'\n" } }, [ - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0", - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0" + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604", + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-09-12T13:50:16.438987978" + "timestamp": "2026-01-13T16:45:04.216675971" }, "propd + grea - mouse": { "content": [ @@ -340,18 +340,18 @@ "\"DIFFERENTIAL_FUNCTIONAL_ENRICHMENT:GPROFILER2_GOST\"": { "r-ggplot2": "3.4.3", "r-gprofiler2": "0.2.2", - "gprofiler-data": "biomart: Ensembl\nbiomart_version: '113'\ndisplay_name: Mouse\ngenebuild: GRCm39\ngprofiler_version: e113_eg59_p19_f6a03c19\norganism: mmusculus\nsources:\n CORUM:\n name: CORUM protein complexes\n version: 28.11.2022 Corum 4.1\n GO:BP:\n name: biological process\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:CC:\n name: cellular component\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:MF:\n name: molecular function\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n HP:\n name: Human Phenotype Ontology\n version: |-\n annotations: 05.2025\n classes: None\n KEGG:\n name: Kyoto Encyclopedia of Genes and Genomes\n version: KEGG FTP Release 2024-01-22\n REAC:\n name: Reactome\n version: |-\n annotations: BioMart\n classes: 2025-5-23\n TF:\n name: Transfac\n version: |-\n annotations: TRANSFAC Release 2024.2\n classes: v2\n WP:\n name: WikiPathways\n version: '20250510'\ntaxonomy_id: '10090'\n" + "gprofiler-data": "biomart: Ensembl\nbiomart_version: '113'\ndisplay_name: Mouse\ngenebuild: GRCm39\ngprofiler_version: e113_eg59_p19_6ece67f5\norganism: mmusculus\nsources:\n CORUM:\n name: CORUM protein complexes\n version: 28.11.2022 Corum 4.1\n GO:BP:\n name: biological process\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:CC:\n name: cellular component\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n GO:MF:\n name: molecular function\n version: |-\n annotations: BioMart\n classes: releases/2025-03-16\n HP:\n name: Human Phenotype Ontology\n version: |-\n annotations: 05.2025\n classes: None\n KEGG:\n name: Kyoto Encyclopedia of Genes and Genomes\n version: KEGG FTP Release 2024-01-22\n REAC:\n name: Reactome\n version: |-\n annotations: BioMart\n classes: 2025-5-23\n TF:\n name: Transfac\n version: |-\n annotations: TRANSFAC Release 2024.2\n classes: v2\n WP:\n name: WikiPathways\n version: '20250510'\ntaxonomy_id: '10090'\n" } }, [ - "versions.yml:md5,48ce57c814471f53329615d311b1f5a0" + "versions.yml:md5,0d83547f6ab8761c0ce33dbe39d03604" ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-09-12T13:48:59.907490432" + "timestamp": "2026-01-13T16:44:24.970752" }, "deseq2 + decoupler - mouse": { "content": [ @@ -501,7 +501,7 @@ ], "8": [ - "versions.yml:md5,71d562114ad0a46a870b75ac423ab82c" + "versions.yml:md5,6cc615ac7ba96eaf49f24851c64fde49" ], "decoupler_dc_estimate": [ @@ -558,14 +558,14 @@ ], "versions": [ - "versions.yml:md5,71d562114ad0a46a870b75ac423ab82c" + "versions.yml:md5,6cc615ac7ba96eaf49f24851c64fde49" ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.04.2" }, - "timestamp": "2025-09-12T13:57:00.483231081" + "timestamp": "2026-01-13T16:48:38.618873304" } } \ No newline at end of file From 77ff8c95ff75b911bc38affbdda4c8d36fbabe50 Mon Sep 17 00:00:00 2001 From: Kateryna Nemesh <121881939+kornkv@users.noreply.github.com> Date: Wed, 14 Jan 2026 15:29:23 +0100 Subject: [PATCH 15/31] Add draft of FASTQ_REMOVE_ADAPTERS_AND_MERGE subworkflow with tests (#9521) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add draft of FASTQ_REMOVE_ADAPTERS_AND_MERGE subworkflow with tests * Add ontologies to tcoffee/regressive and upp/align modules (#9484) * add ontologies to tcoffee regressive * add ontologies to upp align * Add module PBMARKDUP (#9457) * Add module pbmarkdup * Fix linting * Update path to test data * Update with code review (--dup-file, log, check file name collisions) * Fix linting * Update path to test data * Update modules/nf-core/pbmarkdup/meta.yml * Fix linting * Enable complex contrast strings in DESeq2 (#9473) * Enable complex contrast strings * Update docker image * Add test case with limma contrast string * Format changes and add test with shrinkage * Declare deepvariant optional html output (#9469) * Add deepvariant optional html * update snapshot * Update modules/nf-core/deepvariant/rundeepvariant/main.nf Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> * trigger html generation * revert config change --------- Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> * utils_nfcore_pipeline: fix small lang server error (#9492) fix small lang server error * Fix hisat2/align to support large genome indices (.ht2l) (#9493) HISAT2 uses .ht2l extension instead of .ht2 for large genomes. Updated index detection to match both extensions. Related to nf-core/rnaseq#1643 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude * Update shinyngs modules to latest release (#9488) * Update semibin/singleeasybin environment (#9495) * add new ontology term to tcoffee align (#9497) * tcoffee_extractfrompdb test: sort file listing so "first" file is deterministic (#9489) * Sort file listing so "first" file is deterministic * Declare closure parameter per strict syntax Co-authored-by: Matthias Hörtenhuber --------- Co-authored-by: Matthias Hörtenhuber Co-authored-by: mashehu * Sambamba depth add region bed input (#9498) * sambamba add region bed input * fix linting * fix linting * Apply suggestions from code review Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> --------- Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> * fix fasta_index_methylseq and fastq_align_dedup workflows (#9496) * fix fasta_index_methylseq and fastq_align_dedup workflows for clarity and consistency - Updated variable names in fasta_index_methylseq to use 'channel' instead of 'Channel' for consistency. - Renamed UNTAR to UNTAR_BISMARK and UNTAR_BWAMETH for clarity in fasta_index_methylseq. - Enhanced comments and descriptions in meta.yml files for better understanding of input and output structures. - Adjusted test cases in fastq_align_dedup workflows to reflect changes in input structure from single-end to paired-end. - Updated version numbers in test snapshots to reflect recent changes. * fix: pre-commit lint fixes * Update test files for Glimpse (#9467) * Update glimpse * Update chunk * Update concordance * Revert changes * Fix glimpse test * Fix glimpse * Fix glimpse2 tests * Update sbwf * Remove old snapshots * Update glimpse * Update modules/nf-core/glimpse2/concordance/tests/main.nf.test Co-authored-by: Matthias Hörtenhuber * Update test --------- Co-authored-by: LouisLeNezet Co-authored-by: Matthias Hörtenhuber * meta.yml schema: add `containers` section, fix order and simplify (#9503) * fix order * add container section * simplify schema * require https for singluarity * update and add topics to snakemake module (#9454) * update and add topics * add new topics structure * add stub test and capture version in snapshot * update to 9.14.0 * fix singularity be setting cache_dir * fix stub --------- Co-authored-by: Matthias Hörtenhuber Co-authored-by: mashehu * fix sambamba depth stub version (#9509) * fix stub version * stray module * anota2seq: wrong variable name for batch assignment (#9511) rename variable * fix(anota2seq): add gene IDs and handle empty results (#9510) - Add missing gene_id_col parameter definition (defaults to 'gene_id') - Include gene IDs as first column in all results tables using configurable column name - Only write output files when there are significant results to avoid empty files - Mark all results TSV outputs as optional since they're conditionally created - Update test to use buffering results instead of empty mRNA_abundance results - Update test snapshots with new file formats including gene_id column This ensures anota2seq results are consistent with other modules and include gene identifiers for downstream analysis, while gracefully handling cases where no genes pass significance thresholds. Co-authored-by: Sebastian Uhrig * fix(decoupler): reorder imports and ensure environment variables are … (#9516) fix(decoupler): reorder imports and ensure environment variables are set before importing modules * Add strdrop/build (#9512) Add strdrop build * chore(deps): update infrastructural dependencies * 🔧 update image and bioconda container for VueGen to latest version (#9201) * 🔧 update image and bioconda container to latest version * ✅ update test snapshots * :bug: fix display of version of vuegen - had no command line interface option to display version, see https://github.com/Multiomics-Analytics-Group/vuegen/issues/167 * :art: display versions.yml content in snapshots * 🔧 add Dockerfile to install lastet PyPI vuegen version - does not pass hadolint(er) as of now * 🚧 add wave containers * :fire: remove README again * :fire: remove Dockerfile again * 🚧 try to follow Mahesh's advice * :bug: add explicit cache directory * 🔧 bump to Python 3.12 and remove channel prefix * :wrench: specify singularity image with https ... as specified in the docs: https://nf-co.re/docs/tutorials/nf-core_components/using_seqera_containers * 🚧 set user specified R libarary folder * ⏪ make docker and conda work again (using nf-core 3.5.1) * :wrench: switch again to custom docker image instead of wave - wave leads to too many custom installation issues * :bug: try to add font package * :fire: remove code moved to image - singularity runs in devcontainer * ⏪ add back conda quarto flag * :art: remove trailing whitespace * :art: format again * :art: hopefully the last trailing whitespace * :memo: document the build process and why the container is needed * Update image with nf-core one Co-authored-by: Matthias Hörtenhuber * Update container name Co-authored-by: Matthias Hörtenhuber * Apply suggestion from @mashehu * Apply suggestion from @mashehu --------- Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> Co-authored-by: Matthias Hörtenhuber * qsv/cat: bump version (#9518) Bump qsv * Update haplogrep3 recipe to use topics (#9523) Co-authored-by: nf-core-bot * semibin/singleeasybin: bump version + migrate to topics (#9517) * Update semibin2 module * Update snapshot * Remove unneeded snapshot section --------- Co-authored-by: Matthias Hörtenhuber * remove unused folder * rename * trimmomatic revisit * cutadapt revisit * trimgalore revisit * bbduk revisit * fastp revisit * adapterremoval revisit * leehom checkpoint (#9534) * leehom checkpoint * debugged with transpose and proper collect on nf-test * leehom change order * remove ngmerge because cant deal with /1 /2 paired reads * paired end no merge test * New module: clusty (#9533) * new module clusty * patch links docs * fix prettier * remove non-mandatory input 'algorithm' * also patch stub input * Bump TRGT to 4.1.0 (#9514) * Bump TRGT to 4.1.0 * Switch to topics * Update modules/nf-core/trgt/genotype/tests/main.nf.test Co-authored-by: Matthias Hörtenhuber * remove duplicate versions --------- Co-authored-by: Matthias Hörtenhuber * fix missing quotes (#9535) * Fix dream to show more than 10 results (#9507) * fix: dream to show more than 10 results * test: update snapshots * test: update snapshots * feat: add round digits option and update snapshots again * fix: improve code formatting and update test snapshots * Fix tests and update snapshots * Update snapshot for `abundance_differential_filter` subworkflow * Fix tests with unstable content * Fix unstable files in test for `abundance_differential_filter` subworkflow --------- Co-authored-by: Anabella Trigila <18577080+atrigila@users.noreply.github.com> * rename to more appropriate fastq_preprocess_seqkit (#9537) * New module - TD2 (added modules for td2.longorfs & td2.predict) (#9475) * added modules for td2.longorfs & td2.predict * changes to version handling, updated snaps, and tests - resolving comments * added topics section in meta.yml * fix topics section in meta.yml * Update modules/nf-core/td2/longorfs/main.nf Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * Update modules/nf-core/td2/longorfs/meta.yml Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * Update modules/nf-core/td2/predict/main.nf Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * Update modules/nf-core/td2/longorfs/meta.yml Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * Update main.nf - stub run * Update meta.yml * resolving comments * updated emad ontologies * Update modules/nf-core/td2/predict/tests/main.nf.test.snap * Update modules/nf-core/td2/predict/main.nf * Update modules/nf-core/td2/longorfs/tests/main.nf.test.snap * Update modules/nf-core/td2/longorfs/main.nf * Update modules/nf-core/td2/predict/tests/main.nf.test.snap * Update modules/nf-core/td2/longorfs/tests/main.nf.test.snap --------- Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> * Bump ichorCNA package build in ichorcna/createpon and ichorcna/run (#9531) * Bump ichorCNA package build in ichorcna/createpon and ichorcna/run At the moment, creating a PoN without data on chromosome X is bound to fail due to a bug in ichorCNA(https://github.com/GavinHaLab/ichorCNA/pull/26). It is unclear when a new version will be out, so I have patched the current bioconda package (https://github.com/bioconda/bioconda-recipes/pull/61045) and built new container images, which I want to update to in this PR. * Update some md5sums * Update test snapshot * Update test snapshot * RSeQC split_bam.py module implementation (#9536) * RSeQC split_bam.py module implementation Syntax edit Refractored version channel to topic Cleanup * Update modules/nf-core/rseqc/splitbam/main.nf Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> * Update modules/nf-core/rseqc/splitbam/tests/main.nf.test Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> * Update modules/nf-core/rseqc/splitbam/tests/main.nf.test Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> * Updated snapshot --------- Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * remove topics from multiqc (#9530) * remove topics from multiqc * versions_multiqc -> versions * Apply suggestion from @nvnieuwk * fix meta.yml after merge * bump to MultiQC version 1.33 (#9538) * bump to 1.33 * update snapshots --------- Co-authored-by: Matthias Hörtenhuber * Add strdrop/call (#9513) * Add strdrop build * Add strdrop/call * remove tag * fix wrong parameter name in error * align * remove duplicate versions * snaps * figure out why snapshots are not stable * different versions of trgt used in setup * Bump TRGT to 5.0.0 (#9541) 5.0.0 * Channel -> channel in some subwfs (#9542) Channel -> channel * Migrate cat/fastq to topic channel (#9543) * Migrate cat/fastq to topic channel * Remove out.versions in related subworkflows * Update snapshots (again) * Add module picard/collectvariantcallingmetrics (#9502) * Add module picard/collectvariantcallingmetrics * Run prettier * Fix format * Make one input tuple * Change test input structure * Add md5 checksum * Update snapshot * Update meta.yml * Support multithreading * Remove TODO Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> --------- Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> * fix tabix/tabix stub (#9544) * fix tabix stub * bump max shards to 16 * Unify msa modules (#9539) * add mafft_align subworkflow to prepare channels for mafft/align like all other msa modules * default output format fasta for tcoffee align and regressive to match other msa modules * [automated] Fix linting with Prettier * Update subworkflows/nf-core/mafft_align/main.nf Co-authored-by: Jose Espinosa-Carrasco --------- Co-authored-by: nf-core-bot Co-authored-by: Jose Espinosa-Carrasco * Fix missing version from subworkflow snapshot (#9548) * New module: whatshap/phase (#9431) * add whatshap phase module * change the vcf-specific test * change test * add versions * remove snapshot * update snapshot * Update modules/nf-core/whatshap/phase/tests/main.nf.test Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> * rm unused args * add snapshot * add ontologies * update the snapshot * add versions to snapshot * fix stub issues * update the test and snap * fix format with "nextflow lint" * fix hardcoded name and file name ambiguity problem --------- Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> * Bump version cat/cat to pigz 2.8 & rewrite nf-test & topic channel (#9549) * bump version cat/cat * reinclude cat tags for other modules * patch test name & meta.yml * Update main.nf.test * plastid metagene_generate, make_wiggle, psite (#9482) * plastid metagene_generate, make_wiggle, psite * pair BAM and BAI files * pair bam and p_offsets * omit optional arguments * metagene generate accepts various input formats * add meta * do not remove variable headers from output files * warning about hard-coded version * make lint happy * make lint happy #2 * plastid/make_wiggle: nf-core standards compliance - Add mapping_rule val input (enum: fiveprime, threeprime, center, fiveprime_variable) - Move output_format to ext.args (optional arg per nf-core standards) - Add validation: error if p_offsets missing with fiveprime_variable - Remove hardcoded --fiveprime_variable - Update meta.yml with mapping_rule input and enum - Update tests with mapping_rule input 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 * adapt meta.yml to new parameters * plastid: consolidate test snapshots and fix reproducibility - Consolidate multiple snapshot assertions into single snapshots per test - Remove snapshots of empty stub files (just check existence) - Exclude non-reproducible PNG from psite snapshots (matplotlib drift) - Format metagene_generate command across multiple lines 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 * plastid/make_wiggle: remove tracks from snapshot Wig files have non-reproducible md5sums across environments. Content is already validated via getText().contains('track'). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 * plastid/psite: remove non-reproducible outputs from snapshot metagene_profiles.txt and p_offsets.txt have non-reproducible md5sums. Content is already validated via getText().contains() checks. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --------- Co-authored-by: Jonathan Manning Co-authored-by: Claude Opus 4.5 * Add index and threads to trgt/merge (#9545) * Fix test path modification (#9465) * Update bcftools * Update bcftools csq * Update somalier ancestry * Update bcftools pluginfilltags * Update cnvnator * Update snap cnvnator * Fix extract somalier sbwf * Update snapshot * Update md5 * Fix linting * Upd snapshot * Upd snapshot * Upd snapshot * Upd snapshot --------- Co-authored-by: LouisLeNezet * Update xenium ranger modules and subworkflows (#9525) * update xeniumranger modules to use xeniumranger v4.0 * update xeniumranger modules to use xeniumranger v4.0 * fix: update xeniumranger import-segmentation tests and logic * update test compute resource spec to follow 10x website * remove extra file * fix: apply topic channels and update meta.yml structure for xeniumranger * fix: update meta.yml structure to satisfy schema * fix: align meta.yml with topic versions blog post example * fix: update meta.yml topics structure to satisfy schema validation * fix: automated lint fix for meta.yml topics structure * fix: use standard container definition syntax * fix: apply topic versions to rename module * fix: apply topic versions and meta.yml schema fixes for xeniumranger modules and subworkflows * untrack files * remove test config files for github workflow * remove config def in tests * remove config def in tests * Update tests and snapshots for xeniumranger modules and subworkflows * remove subworkflows for xeniumranger * remove subworkflows for xeniumranger * fix: update test template * fix relabel meta.yml * make prek hppy * make prek hppy * make prek hppy * update import-segmentation doc * update import-segmentation doc * fix oncologies * Fix snapshot file key order for rename and resegment modules * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * comment out config * fix: make prek happy * fix typo * remove test files --------- Co-authored-by: Dongze He <32473855+DongzeHE@users.noreply.github.com> Co-authored-by: Sameesh Kher Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Version update: Modkit repair, callmods and bedmethyltobigwig (#9547) * update to v0.6 * update to v0.6 * update to v0.6 * update yml * update to v0.6 * update snapshot * update snapshot --------- Co-authored-by: ra25wog * Update `GLIMPSE` sbwf (#9524) * Update glimpse sbwf * Update test * Update filter operation * Update subworkflows/nf-core/vcf_impute_glimpse/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Update subworkflows/nf-core/vcf_impute_glimpse/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Update subworkflows/nf-core/vcf_impute_glimpse/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Update subworkflows/nf-core/vcf_impute_glimpse/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Update snapshot --------- Co-authored-by: LouisLeNezet Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Add quilt imputation subworkflow (#9443) * Add sbwf quilt * Fix tags * Update meta * Use modules repo files * Update glimpse2 imputation subworkflow (#9434) * Update glimpse2 sbwf * Update test * Update subworkflows/nf-core/bam_vcf_impute_glimpse2/main.nf Co-authored-by: Jonathan Manning * Update subworkflows/nf-core/bam_vcf_impute_glimpse2/tests/main.nf.test Co-authored-by: Jonathan Manning * Add deprecation * Change ifEmpty --------- Co-authored-by: LouisLeNezet Co-authored-by: Jonathan Manning * Add `BEAGLE5` imputation subworkflow (#9550) * Update glimpse2 sbwf * Update test * Add region to beagle5 * Add subworkflow * Fix linting * Fix linting * Fix linting * Update subworkflows/nf-core/vcf_impute_beagle5/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Add comment * Update grouping and test * Remove tag * Revert change glimpse2 reference * Revert change glimpse2 sbwf * Revert change glimpse2 sbwf * Revert change glimpse2 sbwf --------- Co-authored-by: LouisLeNezet Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> * Add minimac4 imputation subworkfllow (#9451) * Add vcf_impute_minimac4 * Update linting * Update test * Fix linting * Update minimac4 sbwf * Remove tag * Remove tag * Fix linting * Add comment * Update snapshot * Fix nf-test * Add BBSplit stats to MultiQC in fastq_qc_trim_filter_setstrandedness subworkflow (#9559) Add BBSplit stats to MultiQC files in fastq_qc_trim_filter_setstrandedness Pass BBSplit stats output to MultiQC for visualization of read binning statistics. MultiQC 1.33+ includes support for parsing BBSplit stats.txt files and displaying per-sample read distribution across reference genomes. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.5 * Update cutadapt (#9551) * proper stub for gz and stub test added * topic output syntax and tests update * meta yml updated with topics and ontologies * meta file curated * version bump to latest * update nf-tests properly * adding self to maintainers * removed Z flag as is deprecated after v4.10 * conda bug with different pre-built python version fixed * added cutadapt to stub now that stub gz is properly created, and removed cutadapt versions, since it is now ported to topics * single-end test with tool skips * Standarize and alignment for all imputation and alignment modules (#9566) * Standarize and alignment * Fix glimpse2 sbwf test * Fix test * Add comment * Update snapshot --------- Co-authored-by: LouisLeNezet * Update Infrastructural dependencies * Remove .view() (#9567) * Bump strdrop to 0.3.1 (#9565) * Remove unecessary tags (#9568) * Remove .view() * Remove unecessary tags * Update trimgalore (#9570) * testing solo trim-galore container, without adding extra cutadapt and pigz * Syntax updates and topic version for manta modules (#9556) * update manta germline * topics convertinversion * topics convertinversion * topics manta/somatic * topics manta/tumoronly * Syntax updates and topics of jasminesv (#9554) syntax updates and topics of jasminesv * Update `Modkit pileup` (#9553) * update yaml * update main.nf * modified test runs * update bedmethyltobigwig tests * update main * update snapshot * fix linting * update snapshots * remove config * update module_args * [automated] Fix linting with Prettier * changed name * update main --------- Co-authored-by: ra25wog Co-authored-by: nf-core-bot * Standarize and alignment for all imputation and alignment modules (#9566) * Standarize and alignment * Fix glimpse2 sbwf test * Fix test * Add comment * Update snapshot --------- Co-authored-by: LouisLeNezet * Update Infrastructural dependencies * Remove .view() (#9567) * Bump strdrop to 0.3.1 (#9565) * Remove unecessary tags (#9568) * Remove .view() * Remove unecessary tags * latest container, with cutadapt 5.2 * new output syntax, nf-tests updated, meta updated * meta yml lint fixed * trying to fix lint * lint fix with nf-core tools 3.6.0dev * removing TRIMGALORE versions output from the FASTQ_FASTQC_UMITOOLS_TRIMGALORE subworkflow --------- Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> Co-authored-by: Jinn <155078830+jkh00@users.noreply.github.com> Co-authored-by: ra25wog Co-authored-by: nf-core-bot Co-authored-by: Louis Le Nézet <58640615+LouisLeNezet@users.noreply.github.com> Co-authored-by: LouisLeNezet Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> * trimgalore output versions removed * structure for subworkflow outputs in meta.yml file * Update subworkflows/nf-core/fastq_removeadapters_merge/main.nf Co-authored-by: James A. Fellows Yates * Update subworkflows/nf-core/fastq_removeadapters_merge/main.nf Co-authored-by: James A. Fellows Yates * main and meta updated with new one-tool logic * nf-tests updated * var name change * paired_interleaved dropped * adapterremoval merge logic update similar to eager * Update subworkflows/nf-core/fastq_removeadapters_merge/main.nf Co-authored-by: James A. Fellows Yates * update snapshot --------- Co-authored-by: Júlia Mir Pedrol Co-authored-by: Hanh Hoang <134130358+sainsachiko@users.noreply.github.com> Co-authored-by: Delfina Terradas <155591053+delfiterradas@users.noreply.github.com> Co-authored-by: Peter Pruisscher <57712924+peterpru@users.noreply.github.com> Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Co-authored-by: Jonathan Manning Co-authored-by: Claude Co-authored-by: Diego Alvarez S. Co-authored-by: Nathan Weeks <1800812+nathanweeks@users.noreply.github.com> Co-authored-by: Matthias Hörtenhuber Co-authored-by: mashehu Co-authored-by: Felix Lenner <52530259+fellen31@users.noreply.github.com> Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> Co-authored-by: Louis Le Nézet <58640615+LouisLeNezet@users.noreply.github.com> Co-authored-by: LouisLeNezet Co-authored-by: Maxime U Garcia Co-authored-by: Sebastian Uhrig Co-authored-by: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Henry Webel Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> Co-authored-by: nf-core-bot Co-authored-by: vagkaratzas Co-authored-by: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> Co-authored-by: Joon Klaps Co-authored-by: Sameesh Kher Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> Co-authored-by: Luca Beltrame Co-authored-by: Rayan Hassaïne Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> Co-authored-by: Friederike Hanssen Co-authored-by: Adrien Coulier Co-authored-by: Georgia Kesisoglou Co-authored-by: Jose Espinosa-Carrasco Co-authored-by: HD Yi Co-authored-by: Dongze He <171858310+an-altosian@users.noreply.github.com> Co-authored-by: Dongze He <32473855+DongzeHE@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Jinn <155078830+jkh00@users.noreply.github.com> Co-authored-by: ra25wog Co-authored-by: Jonathan Manning Co-authored-by: James A. Fellows Yates --- setup-nextflow | 1 - .../fastq_removeadapters_merge/main.nf | 140 ++++++++ .../fastq_removeadapters_merge/meta.yml | 92 ++++++ .../tests/main.nf.test | 304 ++++++++++++++++++ .../tests/main.nf.test.snap | 263 +++++++++++++++ .../tests/nextflow.config | 9 + .../tests/nextflow_PE.config | 9 + .../tests/nextflow_SE.config | 9 + 8 files changed, 826 insertions(+), 1 deletion(-) delete mode 160000 setup-nextflow create mode 100644 subworkflows/nf-core/fastq_removeadapters_merge/main.nf create mode 100644 subworkflows/nf-core/fastq_removeadapters_merge/meta.yml create mode 100644 subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow.config create mode 100644 subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_PE.config create mode 100644 subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_SE.config diff --git a/setup-nextflow b/setup-nextflow deleted file mode 160000 index 6c2e22b4d901..000000000000 --- a/setup-nextflow +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 6c2e22b4d901f0c42ca66c5069f8026df026d165 diff --git a/subworkflows/nf-core/fastq_removeadapters_merge/main.nf b/subworkflows/nf-core/fastq_removeadapters_merge/main.nf new file mode 100644 index 000000000000..5d3dd3c995ed --- /dev/null +++ b/subworkflows/nf-core/fastq_removeadapters_merge/main.nf @@ -0,0 +1,140 @@ +// both SE and PE +include { TRIMMOMATIC } from '../../../modules/nf-core/trimmomatic/main' +include { CUTADAPT } from '../../../modules/nf-core/cutadapt/main' +include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' +include { BBMAP_BBDUK } from '../../../modules/nf-core/bbmap/bbduk/main' +include { LEEHOM } from '../../../modules/nf-core/leehom/main' +// both SE and PE, plus merging +include { FASTP } from '../../../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../../modules/nf-core/adapterremoval/main' +// helper module for concatenating adapterremoval paired-end processed reads +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' + +workflow FASTQ_REMOVEADAPTERS_MERGE { + + take: + ch_input_reads // channel: [mandatory] meta, reads + val_adapter_tool // string: [mandatory] tool_name // choose from: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"] + ch_custom_adapters_file // channel: [optional] {fasta,txt} // fasta, for bbduk or fastp, or txt, for adapterremoval + val_save_merged // boolean: [mandatory] if true, will return the merged reads instead, for fastp and adapterremoval + val_fastp_discard_trimmed_pass // boolean: [mandatory] // only for fastp + val_fastp_save_trimmed_fail // boolean: [mandatory] // only for fastp + + main: + + ch_discarded_reads = channel.empty() // from trimmomatic, trimgalore, leehom, fastp, adapterremoval + ch_log = channel.empty() // from trimmomatic, trimgalore, fastp + ch_report = channel.empty() // from trimmomatic, trimgalore, fastp + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() // from trimmomatic, cutadapt, bbduk, leehom, fastp, adapterremoval + + if (val_adapter_tool == "trimmomatic") { + TRIMMOMATIC( ch_input_reads ) + + ch_processed_reads = TRIMMOMATIC.out.trimmed_reads + ch_discarded_reads = ch_discarded_reads.mix(TRIMMOMATIC.out.unpaired_reads.transpose()) // .transpose() because paired reads will output 2 unpaired files in an array + ch_log = TRIMMOMATIC.out.trim_log + ch_report = TRIMMOMATIC.out.summary + ch_versions = ch_versions.mix(TRIMMOMATIC.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(TRIMMOMATIC.out.out_log) + } else if (val_adapter_tool == "cutadapt") { + CUTADAPT( ch_input_reads ) + + ch_processed_reads = CUTADAPT.out.reads + ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log) + } else if (val_adapter_tool == "trimgalore") { + TRIMGALORE( ch_input_reads ) + + ch_processed_reads = TRIMGALORE.out.reads + ch_discarded_reads = ch_discarded_reads.mix(TRIMGALORE.out.unpaired) + ch_log = TRIMGALORE.out.log + ch_report = TRIMGALORE.out.html.mix(TRIMGALORE.out.zip) + } else if (val_adapter_tool == "bbduk") { + BBMAP_BBDUK( ch_input_reads, ch_custom_adapters_file ) + + ch_processed_reads = BBMAP_BBDUK.out.reads + ch_versions = ch_versions.mix(BBMAP_BBDUK.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(BBMAP_BBDUK.out.log) + } else if (val_adapter_tool == "leehom") { + LEEHOM( ch_input_reads ) + + ch_processed_reads = LEEHOM.out.fq_pass + .join(LEEHOM.out.unmerged_r1_fq_pass, by: 0, remainder: true) + .join(LEEHOM.out.unmerged_r2_fq_pass, by: 0, remainder: true) + .map { meta, single, r1, r2 -> + if (meta.single_end) { + return [meta, single] + } else { + return [meta, [r1, r2]] + } + } + ch_discarded_reads = ch_discarded_reads.mix(LEEHOM.out.fq_fail, LEEHOM.out.unmerged_r1_fq_fail, LEEHOM.out.unmerged_r2_fq_fail) + ch_versions = ch_versions.mix(LEEHOM.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(LEEHOM.out.log) + } else if (val_adapter_tool == "fastp") { + FASTP( + ch_input_reads.map { meta, files -> [ meta, files, ch_custom_adapters_file ] }, + val_fastp_discard_trimmed_pass, + val_fastp_save_trimmed_fail, + val_save_merged + ) + + if (val_save_merged) { + ch_processed_reads = FASTP.out.reads_merged + } else { + ch_processed_reads = FASTP.out.reads + } + ch_discarded_reads = ch_discarded_reads.mix(FASTP.out.reads_fail.transpose()) // .transpose() because paired reads have 3 fail files in an array + ch_log = FASTP.out.log + ch_report = FASTP.out.html + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json) + } else if (val_adapter_tool == "adapterremoval") { + ch_adapterremoval_in = ch_input_reads + .branch { meta, _reads -> + single: meta.single_end + paired: !meta.single_end + } + + ADAPTERREMOVAL_SE( ch_adapterremoval_in.single, ch_custom_adapters_file ) + ADAPTERREMOVAL_PE( ch_adapterremoval_in.paired, ch_custom_adapters_file ) + + if (val_save_merged) { // merge + ch_concat_fastq = channel.empty() + .mix( + ADAPTERREMOVAL_PE.out.collapsed, + ADAPTERREMOVAL_PE.out.collapsed_truncated, + ADAPTERREMOVAL_PE.out.singles_truncated, + ) + .map { meta, reads -> + def meta_new = meta.clone() + meta_new.single_end = true + [meta_new, reads] + } + .groupTuple() + // Paired-end reads cause a nested tuple during grouping. + // We want to present a flat list of files to `CAT_FASTQ`. + .map { meta, fastq -> [meta, fastq.flatten()] } + + CAT_FASTQ( ch_concat_fastq ) + + ch_processed_reads = CAT_FASTQ.out.reads.mix(ADAPTERREMOVAL_SE.out.singles_truncated) + } else { // no merge + ch_processed_reads = ADAPTERREMOVAL_PE.out.paired_truncated.mix(ADAPTERREMOVAL_SE.out.singles_truncated) + } + ch_discarded_reads = ch_discarded_reads.mix(ADAPTERREMOVAL_SE.out.discarded, ADAPTERREMOVAL_PE.out.discarded) + ch_versions = ch_versions.mix(ADAPTERREMOVAL_SE.out.versions.first(), ADAPTERREMOVAL_PE.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(ADAPTERREMOVAL_PE.out.settings, ADAPTERREMOVAL_SE.out.settings) + } else { + error('Please choose one of the available adapter removal and merging tools: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"]') + } + + emit: + processed_reads = ch_processed_reads // channel: [ val(meta), [ fastq.gz ] ] + discarded_reads = ch_discarded_reads // channel: [ val(meta), [ fastq.gz ] ] + logfile = ch_log // channel: [ val(meta), [ {log,txt} ] ] + report = ch_report // channel: [ val(meta), [ {summary,html,zip} ] ] + versions = ch_versions // channel: [ versions.yml ] + multiqc_files = ch_multiqc_files +} diff --git a/subworkflows/nf-core/fastq_removeadapters_merge/meta.yml b/subworkflows/nf-core/fastq_removeadapters_merge/meta.yml new file mode 100644 index 000000000000..d45c8cb8766b --- /dev/null +++ b/subworkflows/nf-core/fastq_removeadapters_merge/meta.yml @@ -0,0 +1,92 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_removeadapters_merge" +description: Remove adapters and merge reads based on various module choices +keywords: + - adapters + - removal + - short reads + - merge + - trim +components: + - trimmomatic + - cutadapt + - trimgalore + - bbmap/bbduk + - leehom + - fastp + - adapterremoval + - cat/fastq +input: + - ch_input_reads: + type: file + description: | + List of FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + Structure: [ val(meta), [ path(reads) ] ] + - val_adapter_tool: + type: string + description: | + Choose one of the available adapter removal and/or merging tools + enum: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"] + - ch_custom_adapters_file: + type: file + description: | + Optional reference files, containing adapter and/or contaminant sequences for removal. + In fasta format for bbmap/bbduk and fastp, or in text format for AdapterRemoval (one adapter per line). + - val_save_merged: + type: boolean + description: | + Specify true to output merged reads instead + Used by fastp and adapterremoval + - val_fastp_discard_trimmed_pass: + type: boolean + description: | + Used only by fastp. + Specify true to not write any reads that pass trimming thresholds from the fastp process. + This can be used to use fastp for the output report only. + - val_fastp_save_trimmed_fail: + type: boolean + description: | + Used only by fastp. + Specify true to save files that failed to pass fastp trimming thresholds +output: + - processed_reads: + type: file + description: | + Structure: [ val(meta), path(fastq.gz) ] + The trimmed/modified single or paired end or merged fastq reads + pattern: "*.fastq.gz" + - discarded_reads: + type: file + description: | + Structure: [ val(meta), path(fastq.gz) ] + The discarded reads + pattern: "*.fastq.gz" + - logfile: + type: file + description: | + Execution log file + (trimmomatic {log}, trimgalore {txt}, fastp {log}) + pattern: "*.{log,txt}" + - report: + type: file + description: | + Execution report + (trimmomatic {summary}, trimgalore {html,zip}, fastp {html}) + pattern: "*.{summary,html,zip}" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" + - multiqc_files: + type: file + description: | + MultiQC-compatible output files from tools used in preprocessing + (trimmomatic, cutadapt, bbduk, leehom, fastp, adapterremoval) +authors: + - "@kornkv" + - "@vagkaratzas" +maintainers: + - "@kornkv" + - "@vagkaratzas" diff --git a/subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test b/subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test new file mode 100644 index 000000000000..cafbf5c56ec5 --- /dev/null +++ b/subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test @@ -0,0 +1,304 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_REMOVEADAPTERS_MERGE" + script "../main.nf" + workflow "FASTQ_REMOVEADAPTERS_MERGE" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_removeadapters_merge" + tag "trimmomatic" + tag "cutadapt" + tag "trimgalore" + tag "bbmap" + tag "bbmap/bbduk" + tag "leehom" + tag "fastp" + tag "adapterremoval" + tag "cat" + tag "cat/fastq" + + test("sarscov2 - fastq - trimmomatic - single-end") { + config "./nextflow_SE.config" + when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = "trimmomatic" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = false // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.processed_reads[0][1], + workflow.out.logfile[0][1], + workflow.out.report[0][1], + workflow.out.multiqc_files.collect { file(it[1]).name }, + workflow.out.versions.collect { path(it).yaml } + ).match()} + ) + } + } + + test("sarscov2 - fastq - cutadapt - paired-end") { + config "./nextflow.config" + when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = "cutadapt" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = false // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.processed_reads[0][1], + workflow.out.multiqc_files.collect { file(it[1]).name } + ).match()} + ) + } + } + + test("sarscov2 - fastq - trimgalore - single-end") { + when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = "trimgalore" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = false // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.processed_reads[0][1], + path(workflow.out.logfile[0][1]).readLines().size() + ).match()} + ) + } + } + + test("sarscov2 - fastq - bbduk - paired-end") { + config "./nextflow.config" + when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of([ + [ id:'test_bbduk', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = "bbduk" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = false // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.processed_reads[0][1], + workflow.out.multiqc_files.collect { file(it[1]).name } + ).match()} + ) + } + } + + test("sarscov2 - fastq - leehom - single-end") { + when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = "leehom" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = false // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.processed_reads[0][1], + workflow.out.discarded_reads.collect { file(it[1]).name }, + workflow.out.multiqc_files.collect { file(it[1]).name }, + workflow.out.versions.collect { path(it).yaml } + ).match()} + ) + } + } + + test("sarscov2 - fastq - fastp - single-end") { + when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + input[1] = "fastp" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = true // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.processed_reads[0][1], + workflow.out.discarded_reads[0][1], + path(workflow.out.logfile[0][1]).readLines().size(), + path(workflow.out.report[0][1]).readLines().size(), + workflow.out.multiqc_files.collect { file(it[1]).name }, + workflow.out.versions.collect { path(it).yaml } + ).match()} + ) + } + } + + test("sarscov2 - fastq - adapterremoval - paired-end - merge") { + config "./nextflow_PE.config" + when { + params { + save_merged = true + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = "adapterremoval" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = false // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path(workflow.out.processed_reads[0][1]).linesGzip.size(), + workflow.out.discarded_reads.collect { file(it[1]).name }, + workflow.out.versions.collect { path(it).yaml }, + workflow.out.multiqc_files.collect { file(it[1]).name } + ).match()} + ) + } + } + + test("sarscov2 - fastq - trimmomatic - paired-end - stub") { + config "./nextflow_PE.config" + options "-stub" + + when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = "trimmomatic" // val_adapter_tool + input[2] = [] // ch_custom_adapters_file + input[3] = params.save_merged // val_save_merged + input[4] = false // val_fastp_discard_trimmed_pass + input[5] = false // val_fastp_save_trimmed_fail + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + +} diff --git a/subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test.snap new file mode 100644 index 000000000000..a50ea253c64f --- /dev/null +++ b/subworkflows/nf-core/fastq_removeadapters_merge/tests/main.nf.test.snap @@ -0,0 +1,263 @@ +{ + "sarscov2 - fastq - trimmomatic - single-end": { + "content": [ + "test.SE.paired.trim.fastq.gz:md5,e68abbd3b88f7ec12940a4f5c2b8bfb9", + "test_trim.log:md5,e4c3f619e9b0e26847f8f3e3d9af319b", + "test.summary:md5,24c973237557a1439c775ca19a5deaa5", + [ + "test_out.log" + ], + [ + { + "FASTQ_REMOVEADAPTERS_MERGE:TRIMMOMATIC": { + "trimmomatic": 0.39 + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-19T11:19:19.104684739" + }, + "sarscov2 - fastq - leehom - single-end": { + "content": [ + "test.fq.gz:md5,304af6f5f6bb58c70abf7924eacfa175", + [ + "test.fail.fq.gz" + ], + [ + "test.log" + ], + [ + { + "FASTQ_REMOVEADAPTERS_MERGE:LEEHOM": { + "leehom": "1.2.15" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-19T11:48:35.122105584" + }, + "sarscov2 - fastq - trimmomatic - paired-end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.trim_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.paired.trim_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unpaired.trim_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "test", + "single_end": false + }, + "test.unpaired.trim_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test_trim.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.summary:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,134eb815a4e39fddbfa930d8023aafaa" + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "discarded_reads": [ + [ + { + "id": "test", + "single_end": false + }, + "test.unpaired.trim_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ], + [ + { + "id": "test", + "single_end": false + }, + "test.unpaired.trim_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "logfile": [ + [ + { + "id": "test", + "single_end": false + }, + "test_trim.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "multiqc_files": [ + [ + { + "id": "test", + "single_end": false + }, + "test_out.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "processed_reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.trim_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.paired.trim_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "report": [ + [ + { + "id": "test", + "single_end": false + }, + "test.summary:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,134eb815a4e39fddbfa930d8023aafaa" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-12T12:06:23.539591046" + }, + "sarscov2 - fastq - cutadapt - paired-end": { + "content": [ + [ + "test_1.trim.fastq.gz:md5,682dab8e982563cffac2bd60bf7444f4", + "test_2.trim.fastq.gz:md5,1a1e8ab23af40e6474be0cdf82fab907" + ], + [ + "test.cutadapt.log" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-19T11:19:27.959952445" + }, + "sarscov2 - fastq - bbduk - paired-end": { + "content": [ + [ + "test_bbduk_1.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec", + "test_bbduk_2.fastq.gz:md5,2ebae722295ea66d84075a3b042e2b42" + ], + [ + "test_bbduk.bbduk.log" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-19T11:19:41.651709543" + }, + "sarscov2 - fastq - trimgalore - single-end": { + "content": [ + "test_trimmed.fq.gz:md5,566d44cca0d22c522d6cf0e50c7165dc", + 60 + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-19T11:19:34.98214978" + }, + "sarscov2 - fastq - adapterremoval - paired-end - merge": { + "content": [ + 336, + [ + "test.discarded.fastq.gz" + ], + [ + { + "FASTQ_REMOVEADAPTERS_MERGE:ADAPTERREMOVAL_PE": { + "adapterremoval": "2.3.2" + } + } + ], + [ + "test.settings" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-14T14:17:56.133855749" + }, + "sarscov2 - fastq - fastp - single-end": { + "content": [ + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd", + 32, + 2394, + [ + "test.fastp.json" + ], + [ + { + "FASTQ_REMOVEADAPTERS_MERGE:FASTP": { + "fastp": "1.0.1" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-19T11:49:02.995746975" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow.config b/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow.config new file mode 100644 index 000000000000..e1badf49b21a --- /dev/null +++ b/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: 'CUTADAPT' { + ext.args = '-q 25' + } + + withName: 'BBMAP_BBDUK' { + ext.args = 'trimq=10 qtrim=r' + } +} diff --git a/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_PE.config b/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_PE.config new file mode 100644 index 000000000000..dc4eef5d6025 --- /dev/null +++ b/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_PE.config @@ -0,0 +1,9 @@ +process { + withName: 'TRIMMOMATIC' { + ext.args = 'ILLUMINACLIP:TruSeq3-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } + + withName: 'ADAPTERREMOVAL_PE' { + ext.args = params.adapterremoval_args + } +} diff --git a/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_SE.config b/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_SE.config new file mode 100644 index 000000000000..82c186711dab --- /dev/null +++ b/subworkflows/nf-core/fastq_removeadapters_merge/tests/nextflow_SE.config @@ -0,0 +1,9 @@ +process { + withName: 'TRIMMOMATIC' { + ext.args = 'ILLUMINACLIP:TruSeq3-SE:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36' + } + + withName: 'ADAPTERREMOVAL_SE' { + ext.args = params.adapterremoval_args + } +} From 32102cc584f4dee55caaec850cc3fdc74363832e Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Thu, 15 Jan 2026 11:07:16 +0000 Subject: [PATCH 16/31] FASTQ_REMOVEADAPTERS_MERGE added --- .../fastq_shortreads_preprocess_qc/main.nf | 125 ++++++++------ .../fastq_shortreads_preprocess_qc/meta.yml | 7 +- .../tests/main.nf.test | 161 ++++++++++-------- 3 files changed, 169 insertions(+), 124 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 42805776f806..e94aef0fd6ed 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -6,7 +6,7 @@ include { FASTQ_PREPROCESS_SEQKIT } from '../fastq_preprocess_seqkit/ // barcoding include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main' // adapter removal and merging -// include { FASTQ_REMOVEADAPTERS_MERGE } from '../fastq_removeadapters_merge/main' +include { FASTQ_REMOVEADAPTERS_MERGE } from '../fastq_removeadapters_merge/main' // complexity filtering include { PRINSEQPLUSPLUS } from '../../../modules/nf-core/prinseqplusplus/main' // deduplication @@ -19,59 +19,67 @@ include { CAT_FASTQ } from '../../../modules/nf-core/ca workflow FASTQ_SHORTREADS_PREPROCESS_QC { take: - ch_reads // channel: [ val(meta), [ fastq ] ] + ch_reads // channel: [ val(meta), [ fastq ] ] // statistics - skip_fastqc // boolean - skip_seqfu_check // boolean - skip_seqfu_stats // boolean - skip_seqkit_stats // boolean - skip_seqtk_comp // boolean + skip_fastqc // boolean + skip_seqfu_check // boolean + skip_seqfu_stats // boolean + skip_seqkit_stats // boolean + skip_seqtk_comp // boolean // preprocessing - skip_seqkit_sana_pair // boolean - skip_seqkit_seq // boolean - skip_seqkit_replace // boolean - skip_seqkit_rmdup // boolean + skip_seqkit_sana_pair // boolean + skip_seqkit_seq // boolean + skip_seqkit_replace // boolean + skip_seqkit_rmdup // boolean // barcoding - skip_umitools_extract // boolean - umi_discard_read // integer: 0, 1 or 2 + skip_umitools_extract // boolean + val_umi_discard_read // integer: 0, 1 or 2 // adapter removal and merging - // skip_adapterremoval // boolean + skip_adapterremoval // boolean + val_adapter_tool // string: [mandatory] tool_name // choose from: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"] + ch_custom_adapters_file // channel: [optional] {fasta,txt} // fasta, for bbduk or fastp, or txt, for adapterremoval + val_save_merged // boolean: [mandatory] if true, will return the merged reads instead, for fastp and adapterremoval + val_fastp_discard_trimmed_pass // boolean: [mandatory] // only for fastp + val_fastp_save_trimmed_fail // boolean: [mandatory] // only for fastp // complexity filtering - skip_complexity_filtering // boolean + skip_complexity_filtering // boolean // deduplication - skip_deduplication // boolean + skip_deduplication // boolean // host decontamination - skip_decontamination // boolean - ch_fasta // channel: [ val(meta), [ fasta ] ] (optional) - ch_reference // channel: [ val(reference_name), path(reference_dir) ] (optional) - index_name // val (optional) - decontaminator // string (enum): 'hostile' or 'deacon' + skip_decontamination // boolean + ch_fasta // channel: [ val(meta), [ fasta ] ] (optional) + ch_reference // channel: [ val(reference_name), path(reference_dir) ] (optional) + val_index_name // val (optional) + val_decontaminator // string (enum): 'hostile' or 'deacon' // final concatenation - skip_final_concatenation // boolean + skip_final_concatenation // boolean main: - ch_versions = channel.empty() - ch_multiqc_files = channel.empty() - ch_pre_stats_fastqc_html = channel.empty() - ch_pre_stats_fastqc_zip = channel.empty() - ch_pre_stats_seqfu_check = channel.empty() - ch_pre_stats_seqfu_stats = channel.empty() - ch_pre_stats_seqkit_stats = channel.empty() - ch_pre_stats_seqtk_stats = channel.empty() - ch_post_stats_fastqc_html = channel.empty() - ch_post_stats_fastqc_zip = channel.empty() - ch_post_stats_seqfu_check = channel.empty() - ch_post_stats_seqfu_stats = channel.empty() - ch_post_stats_seqkit_stats = channel.empty() - ch_post_stats_seqtk_stats = channel.empty() - ch_umi_log = channel.empty() - ch_prinseq_log = channel.empty() - ch_clumpify_log = channel.empty() - ch_hostile_reference = channel.empty() - ch_hostile_json = channel.empty() - ch_deacon_index = channel.empty() - ch_deacon_summary = channel.empty() + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + ch_pre_stats_fastqc_html = channel.empty() + ch_pre_stats_fastqc_zip = channel.empty() + ch_pre_stats_seqfu_check = channel.empty() + ch_pre_stats_seqfu_stats = channel.empty() + ch_pre_stats_seqkit_stats = channel.empty() + ch_pre_stats_seqtk_stats = channel.empty() + ch_post_stats_fastqc_html = channel.empty() + ch_post_stats_fastqc_zip = channel.empty() + ch_post_stats_seqfu_check = channel.empty() + ch_post_stats_seqfu_stats = channel.empty() + ch_post_stats_seqkit_stats = channel.empty() + ch_post_stats_seqtk_stats = channel.empty() + ch_umi_log = channel.empty() + ch_adapterremoval_discarded_reads = channel.empty() + ch_adapterremoval_logfile = channel.empty() + ch_adapterremoval_report = channel.empty() + ch_prinseq_log = channel.empty() + ch_clumpify_log = channel.empty() + ch_hostile_reference = channel.empty() + ch_hostile_json = channel.empty() + ch_deacon_index = channel.empty() + ch_deacon_summary = channel.empty() // pre-statistics PRE_STATS ( @@ -110,10 +118,10 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) // Discard R1 / R2 if required - if (umi_discard_read in [1, 2]) { + if (val_umi_discard_read in [1, 2]) { ch_umi_reads = UMITOOLS_EXTRACT.out.reads .map { meta, reads -> - meta.single_end ? [meta, reads] : [meta + ['single_end': true], reads[umi_discard_read % 2]] + meta.single_end ? [meta, reads] : [meta + ['single_end': true], reads[val_umi_discard_read % 2]] } } @@ -121,10 +129,22 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { } // adapter removal and merging - // TODO - // if (!skip_adapterremoval) { - - // } + if (!skip_adapterremoval) { + FASTQ_REMOVEADAPTERS_MERGE ( + ch_reads, + val_adapter_tool, + ch_custom_adapters_file, + val_save_merged, + val_fastp_discard_trimmed_pass, + val_fastp_save_trimmed_fail + ) + ch_adapterremoval_discarded_reads = FASTQ_REMOVEADAPTERS_MERGE.out.discarded_reads + ch_adapterremoval_logfile = FASTQ_REMOVEADAPTERS_MERGE.out.ch_log + ch_adapterremoval_report = FASTQ_REMOVEADAPTERS_MERGE.out.ch_report + ch_reads = FASTQ_REMOVEADAPTERS_MERGE.out.processed_reads + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_REMOVEADAPTERS_MERGE.out.multiqc_files) + ch_versions = ch_versions.mix(FASTQ_REMOVEADAPTERS_MERGE.out.versions) + } // complexity filtering if (!skip_complexity_filtering) { @@ -148,8 +168,8 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { ch_reads, ch_fasta, ch_reference, - index_name, - decontaminator + val_index_name, + val_decontaminator ) ch_reads = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.fastq_filtered ch_hostile_reference = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.reference @@ -206,6 +226,9 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { umi_log = ch_umi_log // adapter removal and merging + adapterremoval_discarded_reads = ch_adapterremoval_discarded_reads + adapterremoval_logfile = ch_adapterremoval_logfile + adapterremoval_report = ch_adapterremoval_report // complexity filtering prinseq_log = ch_prinseq_log diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 1a725fee2d21..0d043c66794f 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -36,6 +36,7 @@ components: - seqkit/replace - seqkit/rmdup - umitools/extract + - fastq_removeadapters_merge - prinseqplusplus - bbmap/clumpify - fastq_decontaminate_deacon_hostile @@ -95,7 +96,7 @@ input: type: boolean description: | Skip UMI-tools extract barcoding step - - umi_discard_read: + - val_umi_discard_read: type: integer description: | Discard R1 or R2 after UMI extraction (0 = keep both, 1 = discard R1, 2 = discard R2) @@ -122,11 +123,11 @@ input: description: | Pre-built reference index directory for decontamination (optional) Structure: [ val(reference_name), path(reference_dir) ] - - index_name: + - val_index_name: type: string description: | Name for the decontamination index (optional) - - decontaminator: + - val_decontaminator: type: string description: | Decontamination tool to use ('hostile' or 'deacon') diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 112660226a1c..d115fdee289f 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -27,6 +27,7 @@ nextflow_workflow { tag "seqkit/rmdup" tag "umitools" tag "umitools/extract" + tag "subworkflows/fastq_removeadapters_merge" tag "prinseqplusplus" tag "bbmap" tag "bbmap/clumpify" @@ -62,21 +63,26 @@ nextflow_workflow { input[8] = false // skip_seqkit_replace input[9] = false // skip_seqkit_rmdup input[10] = true // skip_umitools_extract - input[11] = 0 // umi_discard_read - - input[12] = true // skip_complexity_filtering - input[13] = true // skip_deduplication - input[14] = false // skip_decontamination - input[15] = channel.of( + input[11] = 0 // val_umi_discard_read + input[12] = true // skip_adapterremoval + input[13] = "" // val_adapter_tool + input[14] = [] // ch_custom_adapters_file + input[15] = false // val_save_merged + input[16] = false // val_fastp_discard_trimmed_pass + input[17] = false // val_fastp_save_trimmed_fail + input[18] = true // skip_complexity_filtering + input[19] = true // skip_deduplication + input[20] = false // skip_decontamination + input[21] = channel.of( [ [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ) // ch_fasta - input[16] = [] // ch_reference - input[17] = [] // index_name - input[18] = 'deacon' // decontaminator - input[19] = true // skip_final_concatenation + input[22] = [] // ch_reference + input[23] = [] // val_index_name + input[24] = 'deacon' // val_decontaminator + input[25] = true // skip_final_concatenation """ } } @@ -107,31 +113,36 @@ nextflow_workflow { [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) - input[1] = true // skip_fastqc - input[2] = true // skip_seqfu_check - input[3] = true // skip_seqfu_stats - input[4] = true // skip_seqkit_stats - input[5] = true // skip_seqtk_comp - input[6] = true // skip_seqkit_sana_pair - input[7] = true // skip_seqkit_seq - input[8] = true // skip_seqkit_replace - input[9] = true // skip_seqkit_rmdup - input[10] = false // skip_umitools_extract - input[11] = 0 // umi_discard_read - - input[12] = false // skip_complexity_filtering - input[13] = false // skip_deduplication - input[14] = true // skip_decontamination - input[15] = channel.of( + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = false // skip_umitools_extract + input[11] = 0 // val_umi_discard_read + input[12] = true // skip_adapterremoval + input[13] = "" // val_adapter_tool + input[14] = [] // ch_custom_adapters_file + input[15] = false // val_save_merged + input[16] = false // val_fastp_discard_trimmed_pass + input[17] = false // val_fastp_save_trimmed_fail + input[18] = false // skip_complexity_filtering + input[19] = false // skip_deduplication + input[20] = true // skip_decontamination + input[21] = channel.of( [ [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ) // ch_fasta - input[16] = [] // ch_reference - input[17] = [] // index_name - input[18] = 'deacon' // decontaminator - input[19] = false // skip_final_concatenation + input[22] = [] // ch_reference + input[23] = [] // val_index_name + input[24] = 'deacon' // val_decontaminator + input[25] = false // skip_final_concatenation """ } } @@ -157,26 +168,31 @@ nextflow_workflow { [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) - input[1] = true // skip_fastqc - input[2] = true // skip_seqfu_check - input[3] = true // skip_seqfu_stats - input[4] = true // skip_seqkit_stats - input[5] = true // skip_seqtk_comp - input[6] = true // skip_seqkit_sana_pair - input[7] = true // skip_seqkit_seq - input[8] = true // skip_seqkit_replace - input[9] = true // skip_seqkit_rmdup - input[10] = true // skip_umitools_extract - input[11] = 0 // umi_discard_read - - input[12] = true // skip_complexity_filtering - input[13] = true // skip_deduplication - input[14] = true // skip_decontamination - input[15] = [] // ch_fasta - input[16] = [] // ch_reference - input[17] = [] // index_name - input[18] = "" // decontaminator - input[19] = true // skip_final_concatenation + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = true // skip_umitools_extract + input[11] = 0 // val_umi_discard_read + input[12] = true // skip_adapterremoval + input[13] = "" // val_adapter_tool + input[14] = [] // ch_custom_adapters_file + input[15] = false // val_save_merged + input[16] = false // val_fastp_discard_trimmed_pass + input[17] = false // val_fastp_save_trimmed_fail + input[18] = true // skip_complexity_filtering + input[19] = true // skip_deduplication + input[20] = true // skip_decontamination + input[21] = [] // ch_fasta + input[22] = [] // ch_reference + input[23] = [] // val_index_name + input[24] = "" // val_decontaminator + input[25] = true // skip_final_concatenation """ } } @@ -202,26 +218,31 @@ nextflow_workflow { [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) - input[1] = true // skip_fastqc - input[2] = true // skip_seqfu_check - input[3] = true // skip_seqfu_stats - input[4] = true // skip_seqkit_stats - input[5] = true // skip_seqtk_comp - input[6] = true // skip_seqkit_sana_pair - input[7] = true // skip_seqkit_seq - input[8] = true // skip_seqkit_replace - input[9] = true // skip_seqkit_rmdup - input[10] = true // skip_umitools_extract - input[11] = 0 // umi_discard_read - - input[12] = true // skip_complexity_filtering - input[13] = true // skip_deduplication - input[14] = true // skip_decontamination - input[15] = [] // ch_fasta - input[16] = [] // ch_reference - input[17] = [] // index_name - input[18] = "" // decontaminator - input[19] = true // skip_final_concatenation + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = true // skip_umitools_extract + input[11] = 0 // val_umi_discard_read + input[12] = true // skip_adapterremoval + input[13] = "" // val_adapter_tool + input[14] = [] // ch_custom_adapters_file + input[15] = false // val_save_merged + input[16] = false // val_fastp_discard_trimmed_pass + input[17] = false // val_fastp_save_trimmed_fail + input[18] = true // skip_complexity_filtering + input[19] = true // skip_deduplication + input[20] = true // skip_decontamination + input[21] = [] // ch_fasta + input[22] = [] // ch_reference + input[23] = [] // val_index_name + input[24] = "" // val_decontaminator + input[25] = true // skip_final_concatenation """ } } From b215d3e5328ee1760cf2281c6e6041240fba4a71 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Thu, 15 Jan 2026 11:24:46 +0000 Subject: [PATCH 17/31] paired-end, adapterremoval test added --- .../fastq_shortreads_preprocess_qc/main.nf | 4 +- .../tests/main.nf.test | 61 +++++++++++++++++ .../tests/main.nf.test.snap | 65 +++++++++++++++++++ 3 files changed, 128 insertions(+), 2 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index e94aef0fd6ed..86f152f82ae7 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -139,8 +139,8 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { val_fastp_save_trimmed_fail ) ch_adapterremoval_discarded_reads = FASTQ_REMOVEADAPTERS_MERGE.out.discarded_reads - ch_adapterremoval_logfile = FASTQ_REMOVEADAPTERS_MERGE.out.ch_log - ch_adapterremoval_report = FASTQ_REMOVEADAPTERS_MERGE.out.ch_report + ch_adapterremoval_logfile = FASTQ_REMOVEADAPTERS_MERGE.out.logfile + ch_adapterremoval_report = FASTQ_REMOVEADAPTERS_MERGE.out.report ch_reads = FASTQ_REMOVEADAPTERS_MERGE.out.processed_reads ch_multiqc_files = ch_multiqc_files.mix(FASTQ_REMOVEADAPTERS_MERGE.out.multiqc_files) ch_versions = ch_versions.mix(FASTQ_REMOVEADAPTERS_MERGE.out.versions) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index d115fdee289f..5658f1793e02 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -159,6 +159,67 @@ nextflow_workflow { } } + test("sarscov2 - fastq - fastqc - seqkit - cutadapt - clumpify - cat - paired_end") { + + when { + workflow { + """ + input[0] = channel.of([ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = false // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = false // skip_seqkit_sana_pair + input[7] = false // skip_seqkit_seq + input[8] = false // skip_seqkit_replace + input[9] = false // skip_seqkit_rmdup + input[10] = true // skip_umitools_extract + input[11] = 0 // val_umi_discard_read + input[12] = false // skip_adapterremoval + input[13] = "cutadapt" // val_adapter_tool + input[14] = [] // ch_custom_adapters_file + input[15] = false // val_save_merged + input[16] = false // val_fastp_discard_trimmed_pass + input[17] = false // val_fastp_save_trimmed_fail + input[18] = true // skip_complexity_filtering + input[19] = false // skip_deduplication + input[20] = true // skip_decontamination + input[21] = [] // ch_fasta + input[22] = [] // ch_reference + input[23] = [] // val_index_name + input[24] = '' // val_decontaminator + input[25] = false // skip_final_concatenation + """ + } + } + then { + assert workflow.success + assertAll( + { assert snapshot( + workflow.out.reads, + file(workflow.out.pre_stats_fastqc_html[0][1][0]).name, + file(workflow.out.pre_stats_fastqc_html[0][1][1]).name, + file(workflow.out.post_stats_fastqc_html[0][1][0]).name, + file(workflow.out.post_stats_fastqc_html[0][1][1]).name, + file(workflow.out.pre_stats_fastqc_zip[0][1][0]).name, + file(workflow.out.pre_stats_fastqc_zip[0][1][1]).name, + file(workflow.out.post_stats_fastqc_zip[0][1][0]).name, + file(workflow.out.post_stats_fastqc_zip[0][1][1]).name, + workflow.out.adapterremoval_discarded_reads.collect { file(it[1]).name }, + path(workflow.out.clumpify_log[0][1]).readLines().size(), + workflow.out.versions.collect { path(it).yaml } + ).match() } + ) + } + } + test("sarscov2 - fastq - skip all - single_end") { when { diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index 4784df43b159..a0ad7f7f45a8 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -118,6 +118,71 @@ }, "timestamp": "2026-01-13T13:42:12.770307912" }, + "sarscov2 - fastq - fastqc - seqkit - cutadapt - clumpify - cat - paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,42fed2ac405007f67eb6aee7888aa161", + "test_2.merged.fastq.gz:md5,84ea538be7792467d2a8d8201a8d5da7" + ] + ] + ], + "test_1_fastqc.html", + "test_2_fastqc.html", + "test_1_fastqc.html", + "test_2_fastqc.html", + "test_1_fastqc.zip", + "test_2_fastqc.zip", + "test_1_fastqc.zip", + "test_2_fastqc.zip", + [ + + ], + 36, + [ + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:SEQKIT_REPLACE": { + "seqkit": "2.9.0" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:BBMAP_CLUMPIFY": { + "bbmap": 39.18 + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:SEQKIT_RMDUP": { + "seqkit": "v2.9.0" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:SEQKIT_SEQ": { + "seqkit": "v2.9.0" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:FASTQ_SANITISE_SEQKIT:SEQKIT_SANA": { + "seqkit": "2.10.1" + } + }, + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:FASTQ_SANITISE_SEQKIT:SEQKIT_PAIR": { + "seqkit": "2.9.0" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-15T11:19:16.402935285" + }, "sarscov2 - fastq - skip all - single_end": { "content": [ "/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz", From a92870964c535dbf7fd6b871bda11f89ebd3299d Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Thu, 15 Jan 2026 11:58:17 +0000 Subject: [PATCH 18/31] adapterremoval test --- .../tests/main.nf.test | 90 ++++++++++++++++++- .../tests/main.nf.test.snap | 39 ++++---- .../tests/nextflow.config | 18 ++-- 3 files changed, 125 insertions(+), 22 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 5658f1793e02..1453993a0caf 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -47,6 +47,10 @@ nextflow_workflow { test("sarscov2 - fastq - seqfu - seqkit - deacon - single_end") { when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } workflow { """ input[0] = channel.of([ @@ -107,6 +111,10 @@ nextflow_workflow { test("sarscov2 - fastq - umitools - prinseq - clumpify - cat - single_end") { when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } workflow { """ input[0] = channel.of([ @@ -162,6 +170,10 @@ nextflow_workflow { test("sarscov2 - fastq - fastqc - seqkit - cutadapt - clumpify - cat - paired_end") { when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } workflow { """ input[0] = channel.of([ @@ -203,7 +215,8 @@ nextflow_workflow { assert workflow.success assertAll( { assert snapshot( - workflow.out.reads, + path(workflow.out.reads[0][1][0]).linesGzip.size(), + path(workflow.out.reads[0][1][1]).linesGzip.size(), file(workflow.out.pre_stats_fastqc_html[0][1][0]).name, file(workflow.out.pre_stats_fastqc_html[0][1][1]).name, file(workflow.out.post_stats_fastqc_html[0][1][0]).name, @@ -212,7 +225,6 @@ nextflow_workflow { file(workflow.out.pre_stats_fastqc_zip[0][1][1]).name, file(workflow.out.post_stats_fastqc_zip[0][1][0]).name, file(workflow.out.post_stats_fastqc_zip[0][1][1]).name, - workflow.out.adapterremoval_discarded_reads.collect { file(it[1]).name }, path(workflow.out.clumpify_log[0][1]).readLines().size(), workflow.out.versions.collect { path(it).yaml } ).match() } @@ -220,9 +232,79 @@ nextflow_workflow { } } + test("sarscov2 - fastq - adapterremoval - merge - cat more files - paired_end") { + + when { + params { + save_merged = true + adapterremoval_args = save_merged ? "--collapse" : "" + } + workflow { + """ + input[0] = channel.of( + [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ], + [ + [ id:'test2', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) + ] + ] + ) + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = true // skip_umitools_extract + input[11] = 0 // val_umi_discard_read + input[12] = false // skip_adapterremoval + input[13] = "adapterremoval" // val_adapter_tool + input[14] = [] // ch_custom_adapters_file + input[15] = params.save_merged // val_save_merged + input[16] = false // val_fastp_discard_trimmed_pass + input[17] = false // val_fastp_save_trimmed_fail + input[18] = true // skip_complexity_filtering + input[19] = true // skip_deduplication + input[20] = true // skip_decontamination + input[21] = [] // ch_fasta + input[22] = [] // ch_reference + input[23] = [] // val_index_name + input[24] = '' // val_decontaminator + input[25] = false // skip_final_concatenation + """ + } + } + then { + assert workflow.success + assertAll( + { assert snapshot( + workflow.out.reads[0][1], + workflow.out.reads[1][1], + workflow.out.adapterremoval_discarded_reads.collect { file(it[1]).name }, + workflow.out.versions.collect { path(it).yaml } + ).match() } + ) + } + } + test("sarscov2 - fastq - skip all - single_end") { when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } workflow { """ input[0] = channel.of([ @@ -273,6 +355,10 @@ nextflow_workflow { options "-stub" when { + params { + save_merged = false + adapterremoval_args = save_merged ? "--collapse" : "" + } workflow { """ input[0] = channel.of([ diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index a0ad7f7f45a8..74b155711f62 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -118,20 +118,32 @@ }, "timestamp": "2026-01-13T13:42:12.770307912" }, - "sarscov2 - fastq - fastqc - seqkit - cutadapt - clumpify - cat - paired_end": { + "sarscov2 - fastq - adapterremoval - merge - cat more files - paired_end": { "content": [ + "test.merged.fastq.gz:md5,369452751050a7f1e31b839702d61417", + "test2.merged.fastq.gz:md5,369452751050a7f1e31b839702d61417", [ - [ - { - "id": "test", - "single_end": false - }, - [ - "test_1.merged.fastq.gz:md5,42fed2ac405007f67eb6aee7888aa161", - "test_2.merged.fastq.gz:md5,84ea538be7792467d2a8d8201a8d5da7" - ] - ] + "test.discarded.fastq.gz", + "test2.discarded.fastq.gz" ], + [ + { + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_REMOVEADAPTERS_MERGE:ADAPTERREMOVAL_PE": { + "adapterremoval": "2.3.2" + } + } + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-15T11:56:56.943674841" + }, + "sarscov2 - fastq - fastqc - seqkit - cutadapt - clumpify - cat - paired_end": { + "content": [ + 400, + 400, "test_1_fastqc.html", "test_2_fastqc.html", "test_1_fastqc.html", @@ -140,9 +152,6 @@ "test_2_fastqc.zip", "test_1_fastqc.zip", "test_2_fastqc.zip", - [ - - ], 36, [ { @@ -181,7 +190,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-01-15T11:19:16.402935285" + "timestamp": "2026-01-15T11:39:36.442733591" }, "sarscov2 - fastq - skip all - single_end": { "content": [ diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config index 8a439363b822..e7aff186186e 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config @@ -1,9 +1,9 @@ process { - withName: SEQKIT_SANA { + withName: 'SEQKIT_SANA' { ext.prefix = { "${meta.id}_${meta.strandness}" } } - withName: SEQKIT_SEQ { + withName: 'SEQKIT_SEQ' { ext.args = [ "--remove-gaps", "--upper-case", @@ -14,17 +14,25 @@ process { ext.prefix = { "intermediate_seqkit_seq_${meta.strandness}" } } - withName: SEQKIT_REPLACE { + withName: 'SEQKIT_REPLACE' { ext.args = '-p "/" -r "_"' ext.suffix = ".fasta" ext.prefix = { "intermediate_seqkit_replace_${meta.strandness}" } } - withName: SEQKIT_RMDUP { + withName: 'SEQKIT_RMDUP' { ext.prefix = { "${meta.id}_${meta.strandness}" } } - withName: UMITOOLS_EXTRACT { + withName: 'UMITOOLS_EXTRACT' { ext.args = '--bc-pattern="NNNN"' } + + withName: 'CUTADAPT' { + ext.args = '-q 25' + } + + withName: 'ADAPTERREMOVAL_PE' { + ext.args = params.adapterremoval_args + } } From c33293189a3a996e4930a0372c820ace7d08017f Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Thu, 15 Jan 2026 12:00:49 +0000 Subject: [PATCH 19/31] comment removed --- subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 86f152f82ae7..83aaee3fafe1 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -182,7 +182,6 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { // final concatenation if (!skip_final_concatenation) { - // CAT_FASTQ ( ch_reads.map { meta, reads -> [meta, reads.flatten()] } ) // TODO test more cases CAT_FASTQ ( ch_reads ) ch_reads = CAT_FASTQ.out.reads } From cc64bd2eee0c00eadac97361dfd92b0b0f25c081 Mon Sep 17 00:00:00 2001 From: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:29:49 +0000 Subject: [PATCH 20/31] Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml Co-authored-by: James A. Fellows Yates --- subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 0d043c66794f..90c504d85047 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -13,7 +13,7 @@ keywords: - reads - qc - stats - - preprocess + - preprocessing - barcoding - adapters - merge From c6c5edc891fe0bc4c5884aa678c6040a79673539 Mon Sep 17 00:00:00 2001 From: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:30:05 +0000 Subject: [PATCH 21/31] Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml Co-authored-by: James A. Fellows Yates --- subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 90c504d85047..0e63f5b21644 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -2,7 +2,7 @@ name: "fastq_shortreads_preprocess_qc" description: | Quality check and preprocessing subworkflow of Illumina short reads - that can do; quality check of input reads and generate statistics, + that can do: quality check of input reads and generate statistics, preprocess and validate reads, barcoding, remove adapters and merge reads, filter complexity, deduplicate reads, remove host contamination, concatenate reads and generate statistics for post-processing reads. From 774df05cea41da8cb2af3a3d08ed32b4c99aa6e0 Mon Sep 17 00:00:00 2001 From: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:30:25 +0000 Subject: [PATCH 22/31] Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml Co-authored-by: James A. Fellows Yates --- subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 0e63f5b21644..74e4e27d97c2 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -3,7 +3,7 @@ name: "fastq_shortreads_preprocess_qc" description: | Quality check and preprocessing subworkflow of Illumina short reads that can do: quality check of input reads and generate statistics, - preprocess and validate reads, barcoding, remove adapters and merge reads, + preprocess and validate reads, barcode removal, remove adapters and merge reads, filter complexity, deduplicate reads, remove host contamination, concatenate reads and generate statistics for post-processing reads. keywords: From d808b21eb5dc9cfbed669d2491af5acf847209f5 Mon Sep 17 00:00:00 2001 From: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:30:39 +0000 Subject: [PATCH 23/31] Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml Co-authored-by: James A. Fellows Yates --- subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 74e4e27d97c2..254c3c2a20b8 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -4,7 +4,7 @@ description: | Quality check and preprocessing subworkflow of Illumina short reads that can do: quality check of input reads and generate statistics, preprocess and validate reads, barcode removal, remove adapters and merge reads, - filter complexity, deduplicate reads, remove host contamination, + filter by sequence complexity, deduplicate reads, remove host contamination, concatenate reads and generate statistics for post-processing reads. keywords: - fastq From 9a16ec13afbac7ee554353e0f9a66c5a26a32a09 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 20 Jan 2026 14:35:49 +0000 Subject: [PATCH 24/31] adapter removal and merge params added to meta.yml --- .../fastq_shortreads_preprocess_qc/meta.yml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 254c3c2a20b8..628f77eab61c 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -100,6 +100,36 @@ input: type: integer description: | Discard R1 or R2 after UMI extraction (0 = keep both, 1 = discard R1, 2 = discard R2) + - skip_adapterremoval: + type: boolean + description: | + Skip the adapter removal and merge subworkflow completely + - val_adapter_tool: + type: string + description: | + Choose one of the available adapter removal and/or merging tools + enum: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"] + - ch_custom_adapters_file: + type: file + description: | + Optional reference files, containing adapter and/or contaminant sequences for removal. + In fasta format for bbmap/bbduk and fastp, or in text format for AdapterRemoval (one adapter per line). + - val_save_merged: + type: boolean + description: | + Specify true to output merged reads instead + Used by fastp and adapterremoval + - val_fastp_discard_trimmed_pass: + type: boolean + description: | + Used only by fastp. + Specify true to not write any reads that pass trimming thresholds from the fastp process. + This can be used to use fastp for the output report only. + - val_fastp_save_trimmed_fail: + type: boolean + description: | + Used only by fastp. + Specify true to save files that failed to pass fastp trimming thresholds - skip_complexity_filtering: type: boolean description: | From de55b836952a4542c41ef5e32deafbd35abd1e75 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 20 Jan 2026 14:43:24 +0000 Subject: [PATCH 25/31] comment updated --- subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 83aaee3fafe1..8a5356ab0d3f 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -37,7 +37,7 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { // adapter removal and merging skip_adapterremoval // boolean val_adapter_tool // string: [mandatory] tool_name // choose from: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"] - ch_custom_adapters_file // channel: [optional] {fasta,txt} // fasta, for bbduk or fastp, or txt, for adapterremoval + ch_custom_adapters_file // channel: [optional] [ {fasta,txt} ] // fasta, for bbduk or fastp, or txt, for adapterremoval val_save_merged // boolean: [mandatory] if true, will return the merged reads instead, for fastp and adapterremoval val_fastp_discard_trimmed_pass // boolean: [mandatory] // only for fastp val_fastp_save_trimmed_fail // boolean: [mandatory] // only for fastp From a1ff0e274a81a84a94a5a6c1ba8c1ce7e6f21b61 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Thu, 22 Jan 2026 10:55:14 +0000 Subject: [PATCH 26/31] fastq_complexity_filter added, nf-tests udpated, meta.yml updated --- .../fastq_shortreads_preprocess_qc/main.nf | 45 +++--- .../fastq_shortreads_preprocess_qc/meta.yml | 82 +++++++++- .../nextflow.config | 22 +++ .../tests/main.nf.test | 142 ++++++++++-------- .../tests/main.nf.test.snap | 10 +- .../tests/nextflow.config | 19 +++ 6 files changed, 226 insertions(+), 94 deletions(-) create mode 100644 subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 8a5356ab0d3f..ba487f644a7b 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -8,7 +8,7 @@ include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/um // adapter removal and merging include { FASTQ_REMOVEADAPTERS_MERGE } from '../fastq_removeadapters_merge/main' // complexity filtering -include { PRINSEQPLUSPLUS } from '../../../modules/nf-core/prinseqplusplus/main' +include { FASTQ_COMPLEXITY_FILTER } from '../fastq_complexity_filter/main' // deduplication include { BBMAP_CLUMPIFY } from '../../../modules/nf-core/bbmap/clumpify/main' // host decontamination @@ -43,6 +43,7 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { val_fastp_save_trimmed_fail // boolean: [mandatory] // only for fastp // complexity filtering skip_complexity_filtering // boolean + val_complexity_filter_tool // string: [mandatory] tool_name // choose from: ["prinseqplusplus", "bbduk", "fastp"] // deduplication skip_deduplication // boolean // host decontamination @@ -74,7 +75,8 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { ch_adapterremoval_discarded_reads = channel.empty() ch_adapterremoval_logfile = channel.empty() ch_adapterremoval_report = channel.empty() - ch_prinseq_log = channel.empty() + ch_complexity_filter_log = channel.empty() + ch_complexity_filter_report = channel.empty() ch_clumpify_log = channel.empty() ch_hostile_reference = channel.empty() ch_hostile_json = channel.empty() @@ -148,10 +150,12 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { // complexity filtering if (!skip_complexity_filtering) { - PRINSEQPLUSPLUS( ch_reads ) - ch_reads = PRINSEQPLUSPLUS.out.good_reads - ch_prinseq_log = PRINSEQPLUSPLUS.out.log - ch_versions = ch_versions.mix(PRINSEQPLUSPLUS.out.versions.first()) + FASTQ_COMPLEXITY_FILTER( ch_reads, val_complexity_filter_tool ) + ch_reads = FASTQ_COMPLEXITY_FILTER.out.filtered_reads + ch_complexity_filter_log = FASTQ_COMPLEXITY_FILTER.out.logfile + ch_complexity_filter_report = FASTQ_COMPLEXITY_FILTER.out.report + ch_multiqc_files = ch_multiqc_files.mix(FASTQ_COMPLEXITY_FILTER.out.multiqc_files) + ch_versions = ch_versions.mix(FASTQ_COMPLEXITY_FILTER.out.versions) } // deduplication @@ -208,18 +212,18 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { reads = ch_reads // channel: [ val(meta), [ fastq ] ] // statistics - pre_stats_fastqc_html = ch_pre_stats_fastqc_html - pre_stats_fastqc_zip = ch_pre_stats_fastqc_zip - pre_stats_seqfu_check = ch_pre_stats_seqfu_check - pre_stats_seqfu_stats = ch_pre_stats_seqfu_stats - pre_stats_seqkit_stats = ch_pre_stats_seqkit_stats - pre_stats_seqtk_stats = ch_pre_stats_seqtk_stats - post_stats_fastqc_html = ch_post_stats_fastqc_html - post_stats_fastqc_zip = ch_post_stats_fastqc_zip - post_stats_seqfu_check = ch_post_stats_seqfu_check - post_stats_seqfu_stats = ch_post_stats_seqfu_stats - post_stats_seqkit_stats = ch_post_stats_seqkit_stats - post_stats_seqtk_stats = ch_post_stats_seqtk_stats + pre_stats_fastqc_html = ch_pre_stats_fastqc_html + pre_stats_fastqc_zip = ch_pre_stats_fastqc_zip + pre_stats_seqfu_check = ch_pre_stats_seqfu_check + pre_stats_seqfu_stats = ch_pre_stats_seqfu_stats + pre_stats_seqkit_stats = ch_pre_stats_seqkit_stats + pre_stats_seqtk_stats = ch_pre_stats_seqtk_stats + post_stats_fastqc_html = ch_post_stats_fastqc_html + post_stats_fastqc_zip = ch_post_stats_fastqc_zip + post_stats_seqfu_check = ch_post_stats_seqfu_check + post_stats_seqfu_stats = ch_post_stats_seqfu_stats + post_stats_seqkit_stats = ch_post_stats_seqkit_stats + post_stats_seqtk_stats = ch_post_stats_seqtk_stats // barcoding umi_log = ch_umi_log @@ -230,7 +234,8 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { adapterremoval_report = ch_adapterremoval_report // complexity filtering - prinseq_log = ch_prinseq_log + complexity_filter_log = ch_complexity_filter_log + complexity_filter_report = ch_complexity_filter_report // deduplication clumpify_log = ch_clumpify_log @@ -241,6 +246,6 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { deacon_index = ch_deacon_index deacon_summary = ch_deacon_summary - versions = ch_versions // channel: [ versions.yml ] multiqc_files = ch_multiqc_files + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 628f77eab61c..42aaf47fa7f0 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -37,6 +37,15 @@ components: - seqkit/rmdup - umitools/extract - fastq_removeadapters_merge + - trimmomatic + - cutadapt + - trimgalore + - bbmap/bbduk + - leehom + - fastp + - adapterremoval + - cat/fastq + - fastq_complexity_filter - prinseqplusplus - bbmap/clumpify - fastq_decontaminate_deacon_hostile @@ -47,7 +56,6 @@ components: - bowtie2/build - deacon/filter - deacon/index - - cat/fastq input: - ch_reads: @@ -134,6 +142,11 @@ input: type: boolean description: | Skip PRINSEQ++ complexity filtering step + - val_complexity_filter_tool: + type: string + description: | + Complexity filtering tool to use. + Must be one of: 'prinseqplusplus', 'bbduk', or 'fastp'. - skip_deduplication: type: boolean description: | @@ -247,17 +260,76 @@ output: description: | SeqTk composition statistics for post-processing reads Structure: [ val(meta), path(stats) ] - - versions: + - umi_log: type: file description: | - File containing software versions - Structure: [ path(versions.yml) ] - pattern: "versions.yml" + UMI-tools extract log file + Structure: [ val(meta), path(log) ] + - adapterremoval_discarded_reads: + type: file + description: | + Reads discarded during adapter removal or merging + Structure: [ val(meta), path(fastq) ] + pattern: "*.fastq.gz" + - adapterremoval_logfile: + type: file + description: | + Adapter removal execution log file + (trimmomatic {log}, trimgalore {txt}, fastp {log}) + Structure: [ val(meta), path({log,txt}) ] + - adapterremoval_report: + type: file + description: | + Adapter removal report + (trimmomatic {summary}, trimgalore {html,zip}, fastp {html}) + Structure: [ val(meta), path({summary,html,zip}) ] + - complexity_filter_log: + type: file + description: | + Log file from complexity filtering + Structure: [ val(meta), path(log) ] + - complexity_filter_report: + type: file + description: | + Report generated by complexity filtering + HTML report generated by fastp. Empty for other tools. + Structure: [ val(meta), path(html) ] + - clumpify_log: + type: file + description: | + BBMap Clumpify log file + Structure: [ val(meta), path(log) ] + - hostile_reference: + type: file + description: | + Hostile reference files used for decontamination + Structure: [ val(reference_name), path(reference_dir) ] + - hostile_json: + type: file + description: | + Hostile JSON report + Structure: [ val(meta), path(json) ] + - deacon_index: + type: directory + description: | + Deacon index directory + Structure: [ val(meta), path(index) ] + - deacon_summary: + type: file + description: | + Deacon decontamination summary file + Structure: [ val(meta), path(log) ] - multiqc_files: type: file description: | MultiQC compatible files for aggregated reporting Structure: [ path(files) ] + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" authors: - "@vagkaratzas" diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config new file mode 100644 index 000000000000..78f02d1c519b --- /dev/null +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config @@ -0,0 +1,22 @@ +// IMPORTANT: Add this configuration to your modules.config + +process { + withName: ".*:FASTQ_COMPLEXITY_FILTER:BBMAP_BBDUK" { + ext.args = [ + 'entropy=0.7', // needed to turn on complexity filtering + 'minlength=0' // needed, to not discard reads shorter than this + ].join(' ') + ext.prefix = { "${meta.id}.trim" } + } + + // need FASTP to only do complexity filtering + withName: ".*:FASTQ_COMPLEXITY_FILTER:FASTP" { + ext.args = [ + '--low_complexity_filter', + '--disable_adapter_trimming', + '--disable_trim_poly_g', + '--disable_quality_filtering', + '--disable_length_filtering' + ].join(' ') + } +} diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 1453993a0caf..753f30b1f5f2 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -28,8 +28,18 @@ nextflow_workflow { tag "umitools" tag "umitools/extract" tag "subworkflows/fastq_removeadapters_merge" - tag "prinseqplusplus" + tag "trimmomatic" + tag "cutadapt" + tag "trimgalore" tag "bbmap" + tag "bbmap/bbduk" + tag "leehom" + tag "fastp" + tag "adapterremoval" + tag "cat" + tag "cat/fastq" + tag "subworkflows/fastq_complexity_filter" + tag "prinseqplusplus" tag "bbmap/clumpify" tag "subworkflows/fastq_decontaminate_deacon_hostile" tag "subworkflows/fastq_index_filter_deacon" @@ -41,8 +51,6 @@ nextflow_workflow { tag "deacon" tag "deacon/filter" tag "deacon/index" - tag "cat" - tag "cat/fastq" test("sarscov2 - fastq - seqfu - seqkit - deacon - single_end") { @@ -75,18 +83,19 @@ nextflow_workflow { input[16] = false // val_fastp_discard_trimmed_pass input[17] = false // val_fastp_save_trimmed_fail input[18] = true // skip_complexity_filtering - input[19] = true // skip_deduplication - input[20] = false // skip_decontamination - input[21] = channel.of( + input[19] = "" // val_complexity_filter_tool + input[20] = true // skip_deduplication + input[21] = false // skip_decontamination + input[22] = channel.of( [ [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ) // ch_fasta - input[22] = [] // ch_reference - input[23] = [] // val_index_name - input[24] = 'deacon' // val_decontaminator - input[25] = true // skip_final_concatenation + input[23] = [] // ch_reference + input[24] = [] // val_index_name + input[25] = 'deacon' // val_decontaminator + input[26] = true // skip_final_concatenation """ } } @@ -121,36 +130,37 @@ nextflow_workflow { [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]) - input[1] = true // skip_fastqc - input[2] = true // skip_seqfu_check - input[3] = true // skip_seqfu_stats - input[4] = true // skip_seqkit_stats - input[5] = true // skip_seqtk_comp - input[6] = true // skip_seqkit_sana_pair - input[7] = true // skip_seqkit_seq - input[8] = true // skip_seqkit_replace - input[9] = true // skip_seqkit_rmdup - input[10] = false // skip_umitools_extract - input[11] = 0 // val_umi_discard_read - input[12] = true // skip_adapterremoval - input[13] = "" // val_adapter_tool - input[14] = [] // ch_custom_adapters_file - input[15] = false // val_save_merged - input[16] = false // val_fastp_discard_trimmed_pass - input[17] = false // val_fastp_save_trimmed_fail - input[18] = false // skip_complexity_filtering - input[19] = false // skip_deduplication - input[20] = true // skip_decontamination - input[21] = channel.of( + input[1] = true // skip_fastqc + input[2] = true // skip_seqfu_check + input[3] = true // skip_seqfu_stats + input[4] = true // skip_seqkit_stats + input[5] = true // skip_seqtk_comp + input[6] = true // skip_seqkit_sana_pair + input[7] = true // skip_seqkit_seq + input[8] = true // skip_seqkit_replace + input[9] = true // skip_seqkit_rmdup + input[10] = false // skip_umitools_extract + input[11] = 0 // val_umi_discard_read + input[12] = true // skip_adapterremoval + input[13] = "" // val_adapter_tool + input[14] = [] // ch_custom_adapters_file + input[15] = false // val_save_merged + input[16] = false // val_fastp_discard_trimmed_pass + input[17] = false // val_fastp_save_trimmed_fail + input[18] = false // skip_complexity_filtering + input[19] = 'prinseqplusplus' // val_complexity_filter_tool + input[20] = false // skip_deduplication + input[21] = true // skip_decontamination + input[22] = channel.of( [ [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - ) // ch_fasta - input[22] = [] // ch_reference - input[23] = [] // val_index_name - input[24] = 'deacon' // val_decontaminator - input[25] = false // skip_final_concatenation + ) // ch_fasta + input[23] = [] // ch_reference + input[24] = [] // val_index_name + input[25] = 'deacon' // val_decontaminator + input[26] = false // skip_final_concatenation """ } } @@ -201,13 +211,14 @@ nextflow_workflow { input[16] = false // val_fastp_discard_trimmed_pass input[17] = false // val_fastp_save_trimmed_fail input[18] = true // skip_complexity_filtering - input[19] = false // skip_deduplication - input[20] = true // skip_decontamination - input[21] = [] // ch_fasta - input[22] = [] // ch_reference - input[23] = [] // val_index_name - input[24] = '' // val_decontaminator - input[25] = false // skip_final_concatenation + input[19] = "" // val_complexity_filter_tool + input[20] = false // skip_deduplication + input[21] = true // skip_decontamination + input[22] = [] // ch_fasta + input[23] = [] // ch_reference + input[24] = [] // val_index_name + input[25] = '' // val_decontaminator + input[26] = false // skip_final_concatenation """ } } @@ -275,13 +286,14 @@ nextflow_workflow { input[16] = false // val_fastp_discard_trimmed_pass input[17] = false // val_fastp_save_trimmed_fail input[18] = true // skip_complexity_filtering - input[19] = true // skip_deduplication - input[20] = true // skip_decontamination - input[21] = [] // ch_fasta - input[22] = [] // ch_reference - input[23] = [] // val_index_name - input[24] = '' // val_decontaminator - input[25] = false // skip_final_concatenation + input[19] = "" // val_complexity_filter_tool + input[20] = true // skip_deduplication + input[21] = true // skip_decontamination + input[22] = [] // ch_fasta + input[23] = [] // ch_reference + input[24] = [] // val_index_name + input[25] = '' // val_decontaminator + input[26] = false // skip_final_concatenation """ } } @@ -329,13 +341,14 @@ nextflow_workflow { input[16] = false // val_fastp_discard_trimmed_pass input[17] = false // val_fastp_save_trimmed_fail input[18] = true // skip_complexity_filtering - input[19] = true // skip_deduplication - input[20] = true // skip_decontamination - input[21] = [] // ch_fasta - input[22] = [] // ch_reference - input[23] = [] // val_index_name - input[24] = "" // val_decontaminator - input[25] = true // skip_final_concatenation + input[19] = "" // val_complexity_filter_tool + input[20] = true // skip_deduplication + input[21] = true // skip_decontamination + input[22] = [] // ch_fasta + input[23] = [] // ch_reference + input[24] = [] // val_index_name + input[25] = "" // val_decontaminator + input[26] = true // skip_final_concatenation """ } } @@ -383,13 +396,14 @@ nextflow_workflow { input[16] = false // val_fastp_discard_trimmed_pass input[17] = false // val_fastp_save_trimmed_fail input[18] = true // skip_complexity_filtering - input[19] = true // skip_deduplication - input[20] = true // skip_decontamination - input[21] = [] // ch_fasta - input[22] = [] // ch_reference - input[23] = [] // val_index_name - input[24] = "" // val_decontaminator - input[25] = true // skip_final_concatenation + input[19] = "" // val_complexity_filter_tool + input[20] = true // skip_deduplication + input[21] = true // skip_decontamination + input[22] = [] // ch_fasta + input[23] = [] // ch_reference + input[24] = [] // val_index_name + input[25] = "" // val_decontaminator + input[26] = true // skip_final_concatenation """ } } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index 74b155711f62..bf301f7ca11d 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -212,13 +212,13 @@ 36, [ { - "FASTQ_SHORTREADS_PREPROCESS_QC:PRINSEQPLUSPLUS": { - "prinseqplusplus": 1.2 + "FASTQ_SHORTREADS_PREPROCESS_QC:BBMAP_CLUMPIFY": { + "bbmap": 39.18 } }, { - "FASTQ_SHORTREADS_PREPROCESS_QC:BBMAP_CLUMPIFY": { - "bbmap": 39.18 + "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_COMPLEXITY_FILTER:PRINSEQPLUSPLUS": { + "prinseqplusplus": 1.2 } }, { @@ -232,7 +232,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-01-14T10:49:09.073574264" + "timestamp": "2026-01-22T10:34:19.712126932" }, "sarscov2 - fastq - skip all - single_end - stub": { "content": [ diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config index e7aff186186e..0d216555b8c7 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config @@ -35,4 +35,23 @@ process { withName: 'ADAPTERREMOVAL_PE' { ext.args = params.adapterremoval_args } + + withName: ".*:FASTQ_COMPLEXITY_FILTER:BBMAP_BBDUK" { + ext.args = [ + 'entropy=0.7', // needed to turn on complexity filtering + 'minlength=0' // needed, to not discard reads shorter than this + ].join(' ') + ext.prefix = { "${meta.id}.trim" } + } + + // need FASTP to only do complexity filtering + withName: ".*:FASTQ_COMPLEXITY_FILTER:FASTP" { + ext.args = [ + '--low_complexity_filter', + '--disable_adapter_trimming', + '--disable_trim_poly_g', + '--disable_quality_filtering', + '--disable_length_filtering' + ].join(' ') + } } From 72fa54e0a6ce3f629c905f5554a573eecc81f253 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 27 Jan 2026 12:56:44 +0000 Subject: [PATCH 27/31] variable renames --- .../fastq_shortreads_preprocess_qc/main.nf | 16 +++---- .../fastq_shortreads_preprocess_qc/meta.yml | 8 ++-- .../tests/main.nf.test | 48 +++++++++---------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index ba487f644a7b..2641ed7fadaa 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -48,10 +48,10 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { skip_deduplication // boolean // host decontamination skip_decontamination // boolean - ch_fasta // channel: [ val(meta), [ fasta ] ] (optional) - ch_reference // channel: [ val(reference_name), path(reference_dir) ] (optional) - val_index_name // val (optional) - val_decontaminator // string (enum): 'hostile' or 'deacon' + ch_decontamination_fasta // channel: [ val(meta), [ fasta ] ] (optional) + ch_decontamination_reference // channel: [ val(reference_name), path(reference_dir) ] (optional) + val_decontamination_index_name // val (optional) + val_decontamination_tool // string (enum): 'hostile' or 'deacon' // final concatenation skip_final_concatenation // boolean @@ -170,10 +170,10 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { if (!skip_decontamination) { FASTQ_DECONTAMINATE_DEACON_HOSTILE ( ch_reads, - ch_fasta, - ch_reference, - val_index_name, - val_decontaminator + ch_decontamination_fasta, + ch_decontamination_reference, + val_decontamination_index_name, + val_decontamination_tool ) ch_reads = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.fastq_filtered ch_hostile_reference = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.reference diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml index 42aaf47fa7f0..7249143ee0c5 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml @@ -155,22 +155,22 @@ input: type: boolean description: | Skip host decontamination step - - ch_fasta: + - ch_decontamination_fasta: type: file description: | Reference genome FASTA file for decontamination (optional) Structure: [ val(meta), [ path(fasta) ] ] pattern: "*.{fasta,fa,fna}" - - ch_reference: + - ch_decontamination_reference: type: directory description: | Pre-built reference index directory for decontamination (optional) Structure: [ val(reference_name), path(reference_dir) ] - - val_index_name: + - val_decontamination_index_name: type: string description: | Name for the decontamination index (optional) - - val_decontaminator: + - val_decontamination_tool: type: string description: | Decontamination tool to use ('hostile' or 'deacon') diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index 753f30b1f5f2..d92bf7eb2951 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -91,10 +91,10 @@ nextflow_workflow { [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - ) // ch_fasta - input[23] = [] // ch_reference - input[24] = [] // val_index_name - input[25] = 'deacon' // val_decontaminator + ) // ch_decontamination_fasta + input[23] = [] // ch_decontamination_reference + input[24] = [] // val_decontamination_index_name + input[25] = 'deacon' // val_decontamination_tool input[26] = true // skip_final_concatenation """ } @@ -156,10 +156,10 @@ nextflow_workflow { [ id:'test', single_end:true ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - ) // ch_fasta - input[23] = [] // ch_reference - input[24] = [] // val_index_name - input[25] = 'deacon' // val_decontaminator + ) // ch_decontamination_fasta + input[23] = [] // ch_decontamination_reference + input[24] = [] // val_decontamination_index_name + input[25] = 'deacon' // val_decontamination_tool input[26] = false // skip_final_concatenation """ } @@ -214,10 +214,10 @@ nextflow_workflow { input[19] = "" // val_complexity_filter_tool input[20] = false // skip_deduplication input[21] = true // skip_decontamination - input[22] = [] // ch_fasta - input[23] = [] // ch_reference - input[24] = [] // val_index_name - input[25] = '' // val_decontaminator + input[22] = [] // ch_decontamination_fasta + input[23] = [] // ch_decontamination_reference + input[24] = [] // val_decontamination_index_name + input[25] = '' // val_decontamination_tool input[26] = false // skip_final_concatenation """ } @@ -289,10 +289,10 @@ nextflow_workflow { input[19] = "" // val_complexity_filter_tool input[20] = true // skip_deduplication input[21] = true // skip_decontamination - input[22] = [] // ch_fasta - input[23] = [] // ch_reference - input[24] = [] // val_index_name - input[25] = '' // val_decontaminator + input[22] = [] // ch_decontamination_fasta + input[23] = [] // ch_decontamination_reference + input[24] = [] // val_decontamination_index_name + input[25] = '' // val_decontamination_tool input[26] = false // skip_final_concatenation """ } @@ -344,10 +344,10 @@ nextflow_workflow { input[19] = "" // val_complexity_filter_tool input[20] = true // skip_deduplication input[21] = true // skip_decontamination - input[22] = [] // ch_fasta - input[23] = [] // ch_reference - input[24] = [] // val_index_name - input[25] = "" // val_decontaminator + input[22] = [] // ch_decontamination_fasta + input[23] = [] // ch_decontamination_reference + input[24] = [] // val_decontamination_index_name + input[25] = "" // val_decontamination_tool input[26] = true // skip_final_concatenation """ } @@ -399,10 +399,10 @@ nextflow_workflow { input[19] = "" // val_complexity_filter_tool input[20] = true // skip_deduplication input[21] = true // skip_decontamination - input[22] = [] // ch_fasta - input[23] = [] // ch_reference - input[24] = [] // val_index_name - input[25] = "" // val_decontaminator + input[22] = [] // ch_decontamination_fasta + input[23] = [] // ch_decontamination_reference + input[24] = [] // val_decontamination_index_name + input[25] = "" // val_decontamination_tool input[26] = true // skip_final_concatenation """ } From f33e1e03505a16a5a4d460645a73eeea7c720a40 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 27 Jan 2026 15:42:41 +0000 Subject: [PATCH 28/31] dedupe flag for clumpify --- .../nextflow.config | 4 ++++ .../tests/main.nf.test.snap | 10 +++++----- .../tests/nextflow.config | 18 +++++++++++------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config index 78f02d1c519b..b81471fa0600 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/nextflow.config @@ -19,4 +19,8 @@ process { '--disable_length_filtering' ].join(' ') } + + withName: "BBMAP_CLUMPIFY" { + ext.args = 'dedupe=t' + } } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index bf301f7ca11d..c0a18fbad674 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -152,7 +152,7 @@ "test_2_fastqc.zip", "test_1_fastqc.zip", "test_2_fastqc.zip", - 36, + 41, [ { "FASTQ_SHORTREADS_PREPROCESS_QC:FASTQ_PREPROCESS_SEQKIT:SEQKIT_REPLACE": { @@ -190,7 +190,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-01-15T11:39:36.442733591" + "timestamp": "2026-01-27T15:36:35.200576192" }, "sarscov2 - fastq - skip all - single_end": { "content": [ @@ -207,9 +207,9 @@ }, "sarscov2 - fastq - umitools - prinseq - clumpify - cat - single_end": { "content": [ - "test.merged.fastq.gz:md5,e9a7e89c097b6f23464382b91c274013", + "test.merged.fastq.gz:md5,21afcb176709adf3cad5d5afe4bd8ddc", 51, - 36, + 41, [ { "FASTQ_SHORTREADS_PREPROCESS_QC:BBMAP_CLUMPIFY": { @@ -232,7 +232,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-01-22T10:34:19.712126932" + "timestamp": "2026-01-27T15:34:20.817099383" }, "sarscov2 - fastq - skip all - single_end - stub": { "content": [ diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config index 0d216555b8c7..f30477b3b19d 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/nextflow.config @@ -1,9 +1,9 @@ process { - withName: 'SEQKIT_SANA' { + withName: "SEQKIT_SANA" { ext.prefix = { "${meta.id}_${meta.strandness}" } } - withName: 'SEQKIT_SEQ' { + withName: "SEQKIT_SEQ" { ext.args = [ "--remove-gaps", "--upper-case", @@ -14,25 +14,25 @@ process { ext.prefix = { "intermediate_seqkit_seq_${meta.strandness}" } } - withName: 'SEQKIT_REPLACE' { + withName: "SEQKIT_REPLACE" { ext.args = '-p "/" -r "_"' ext.suffix = ".fasta" ext.prefix = { "intermediate_seqkit_replace_${meta.strandness}" } } - withName: 'SEQKIT_RMDUP' { + withName: "SEQKIT_RMDUP" { ext.prefix = { "${meta.id}_${meta.strandness}" } } - withName: 'UMITOOLS_EXTRACT' { + withName: "UMITOOLS_EXTRACT" { ext.args = '--bc-pattern="NNNN"' } - withName: 'CUTADAPT' { + withName: "CUTADAPT" { ext.args = '-q 25' } - withName: 'ADAPTERREMOVAL_PE' { + withName: "ADAPTERREMOVAL_PE" { ext.args = params.adapterremoval_args } @@ -54,4 +54,8 @@ process { '--disable_length_filtering' ].join(' ') } + + withName: "BBMAP_CLUMPIFY" { + ext.args = 'dedupe=t' + } } From e3b50c5d75957f91ad3dc41a142969445caab658 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 27 Jan 2026 15:52:30 +0000 Subject: [PATCH 29/31] check if deterministic output --- .../fastq_shortreads_preprocess_qc/tests/main.nf.test.snap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index c0a18fbad674..8fbb1219151f 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -207,7 +207,7 @@ }, "sarscov2 - fastq - umitools - prinseq - clumpify - cat - single_end": { "content": [ - "test.merged.fastq.gz:md5,21afcb176709adf3cad5d5afe4bd8ddc", + "test.merged.fastq.gz:md5,d128e30915f03eb4425eb811cefbf265", 51, 41, [ @@ -232,7 +232,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-01-27T15:34:20.817099383" + "timestamp": "2026-01-27T15:49:36.127268089" }, "sarscov2 - fastq - skip all - single_end - stub": { "content": [ From 38c59f0a0844fcb0eab516937fc50bf6a4060bb8 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Tue, 27 Jan 2026 16:04:25 +0000 Subject: [PATCH 30/31] updating non-deterministic snapshot --- .../nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test | 2 +- .../fastq_shortreads_preprocess_qc/tests/main.nf.test.snap | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test index d92bf7eb2951..563ec899cffd 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test @@ -168,7 +168,7 @@ nextflow_workflow { assert workflow.success assertAll( { assert snapshot( - workflow.out.reads[0][1], + path(workflow.out.reads[0][1]).linesGzip.size(), path(workflow.out.umi_log[0][1]).readLines().size(), path(workflow.out.clumpify_log[0][1]).readLines().size(), workflow.out.versions.collect { path(it).yaml } diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap index 8fbb1219151f..e0ad50793e06 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/tests/main.nf.test.snap @@ -207,7 +207,7 @@ }, "sarscov2 - fastq - umitools - prinseq - clumpify - cat - single_end": { "content": [ - "test.merged.fastq.gz:md5,d128e30915f03eb4425eb811cefbf265", + 400, 51, 41, [ @@ -232,7 +232,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-01-27T15:49:36.127268089" + "timestamp": "2026-01-27T16:03:31.345098636" }, "sarscov2 - fastq - skip all - single_end - stub": { "content": [ From b42e76033b7437110ed8d07c4625f675017932bc Mon Sep 17 00:00:00 2001 From: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> Date: Tue, 27 Jan 2026 16:12:07 +0000 Subject: [PATCH 31/31] Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf Co-authored-by: Simon Pearce <24893913+SPPearce@users.noreply.github.com> --- .../nf-core/fastq_shortreads_preprocess_qc/main.nf | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf index 2641ed7fadaa..0a8cc10b3b8b 100644 --- a/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf +++ b/subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf @@ -59,18 +59,6 @@ workflow FASTQ_SHORTREADS_PREPROCESS_QC { ch_versions = channel.empty() ch_multiqc_files = channel.empty() - ch_pre_stats_fastqc_html = channel.empty() - ch_pre_stats_fastqc_zip = channel.empty() - ch_pre_stats_seqfu_check = channel.empty() - ch_pre_stats_seqfu_stats = channel.empty() - ch_pre_stats_seqkit_stats = channel.empty() - ch_pre_stats_seqtk_stats = channel.empty() - ch_post_stats_fastqc_html = channel.empty() - ch_post_stats_fastqc_zip = channel.empty() - ch_post_stats_seqfu_check = channel.empty() - ch_post_stats_seqfu_stats = channel.empty() - ch_post_stats_seqkit_stats = channel.empty() - ch_post_stats_seqtk_stats = channel.empty() ch_umi_log = channel.empty() ch_adapterremoval_discarded_reads = channel.empty() ch_adapterremoval_logfile = channel.empty()