diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..6963219 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,10 @@ +**What problems does this PR solve?** +Provide a short description or reference to the relevant issue, explaining what problems this PR solves. + +**An outline of the validation procedure for this feature** +In addition to automatic tests, has any manual testing been carried out? + +**Risk analysis - Reasons for careful code review** +If any of the boxes below are checked, extra careful code review should be inititated. + + - [ ] This PR contains code that could remove data diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index ffb81dc..f1eb519 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -26,7 +26,7 @@ jobs: env: CAPSULE_LOG: none run: | - curl -s https://get.nextflow.io | bash + curl -fsSL https://github.com/nextflow-io/nextflow/releases/download/v24.10.4/nextflow -o nextflow | bash sudo mv nextflow /usr/local/bin/ - name: Make Nextflow binary executable @@ -35,11 +35,12 @@ jobs: - name: Set up python uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.11 architecture: x64 - name: Install test requirements - run: pip install -r requirements-dev.txt + run: | + pip install -r requirements-dev.txt - name: Run tests run: pytest tests diff --git a/bin/get_metadata.py b/bin/get_metadata.py index 282b37d..202b47b 100644 --- a/bin/get_metadata.py +++ b/bin/get_metadata.py @@ -3,6 +3,8 @@ import xmltodict from collections import OrderedDict import re +import glob +import csv import argparse import os import json @@ -12,11 +14,10 @@ class RunfolderInfo: - def __init__(self, runfolder, bcl2fastq_outdir): + def __init__(self, runfolder): self.runfolder = runfolder self.run_info = self.read_run_info() self.run_parameters = self.read_run_parameters() - self.stats_json = self.read_stats_json(bcl2fastq_outdir) self.description_and_identifier = OrderedDict() self.run_parameters_tags = { "RunId": "Run ID", @@ -79,27 +80,11 @@ def find_flowcell_type_novaseqx(self): return None return {"Flowcell type": flowcell_type} - def read_stats_json(self, bcl2fastq_outdir): - stats_json_path = os.path.join( - self.runfolder, bcl2fastq_outdir, "Stats/Stats.json" - ) - if os.path.exists(stats_json_path): - with open(stats_json_path) as f: - return json.load(f) - else: - return None - - def get_bcl2fastq_version(self, runfolder): - with open(os.path.join(runfolder, "bcl2fastq_version")) as f: - bcl2fastq_str = f.read() - return bcl2fastq_str.split("v")[1].strip() - def get_software_version(self, runfolder): - with open( - Path(runfolder) - / "pipeline_info" - / "nf_core_pipeline_software_mqc_versions.yml" - ) as f: + pipeline_dir = Path(runfolder) / "pipeline_info" + pipeline_info_filename = next(pipeline_dir.glob("*_software_mqc_versions.yml")) + + with open(pipeline_info_filename) as f: return { software: version for software_dict in yaml.safe_load(f).values() @@ -151,15 +136,6 @@ def get_info(self): return results def get_demultiplexing_info(self): - try: - return { - "Demultiplexing": { - "bcl2fastq": self.get_bcl2fastq_version(self.runfolder) - } - } - except FileNotFoundError: - pass - try: return {"Demultiplexing": self.get_software_version(self.runfolder)} except FileNotFoundError: @@ -173,18 +149,11 @@ def get_demultiplexing_info(self): parser.add_argument( "--runfolder", type=str, required=True, help="Path to runfolder" ) - parser.add_argument( - "--bcl2fastq-outdir", - type=str, - default="Data/Intensities/BaseCalls", - help="Path to bcl2fastq output folder relative to the runfolder", - ) args = parser.parse_args() runfolder = args.runfolder - bcl2fastq_outdir = args.bcl2fastq_outdir - runfolder_info = RunfolderInfo(runfolder, bcl2fastq_outdir) + runfolder_info = RunfolderInfo(runfolder) info = runfolder_info.get_info() print( diff --git a/config/nextflow_config/singularity.config b/config/nextflow_config/singularity.config index 3e37f21..f963b7b 100644 --- a/config/nextflow_config/singularity.config +++ b/config/nextflow_config/singularity.config @@ -5,25 +5,25 @@ singularity { process { withName: 'FASTQC' { - container = 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--hdfd78af_1' + container = 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' } withName: 'FASTQ_SCREEN' { - container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.14.0--pl5262hdfd78af_1' + container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.16.0--pl5321hdfd78af_0' } withName: 'GET_QC_THRESHOLDS' { - container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0' + container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0' } withName: 'GET_METADATA' { - container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0' + container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0' } withName: 'INTEROP_SUMMARY' { - container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.2.4--hdbdd923_2' + container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.5.0--h503566f_0' } withName: 'MULTIQC_PER_FLOWCELL' { - container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' + container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1' } withName: 'MULTIQC_PER_PROJECT' { - container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' + container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1' } } diff --git a/config/nextflow_config/test_bclconvert.config b/config/nextflow_config/test_bclconvert.config new file mode 100644 index 0000000..77f0865 --- /dev/null +++ b/config/nextflow_config/test_bclconvert.config @@ -0,0 +1,19 @@ +/* +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run main.nf -profile dev,test,singularity + + + This config takes inspiration from https://github.com/nf-core/rnaseq +---------------------------------------------------------------------------------------- +*/ + +params { + run_folder = "$baseDir/test_data/230825_M04034_0043_000000000-L6NVV" + fastqscreen_databases = "$baseDir/test_data/Test_FastQ_Screen_Genomes" + checkqc_config = "$baseDir/test_data/checkqc_config.yaml" + config_dir = "$baseDir/test_data/test_config" +} diff --git a/config/tool_config/multiqc_flowcell_config.yaml b/config/tool_config/multiqc_flowcell_config.yaml index fd2f775..3c6205e 100644 --- a/config/tool_config/multiqc_flowcell_config.yaml +++ b/config/tool_config/multiqc_flowcell_config.yaml @@ -4,6 +4,7 @@ run_modules: - fastqc - fastq_screen - bcl2fastq + - bclconvert - interop - custom_content diff --git a/main.nf b/main.nf index 8bb3cf0..06f57b4 100644 --- a/main.nf +++ b/main.nf @@ -12,7 +12,9 @@ params.run_folder = "/path/to/run_folder" params.result_dir = "results" fastqscreen_default_databases = "FastQ_Screen_Genomes" params.fastqscreen_databases = fastqscreen_default_databases -params.bcl2fastq_outdir = "Unaligned" +params.demultiplexer = "bcl2fastq" +params.demultiplexer_outdir = "Unaligned" + params.checkqc_config = "" // See: https://github.com/Molmed/checkQC params.assets_dir = "$baseDir/assets" params.config_dir = "$baseDir/config/tool_config" @@ -47,11 +49,12 @@ def helpMessage() { Optional parameters: --result_dir Path to write results (default: results) - --bcl2fastq_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned) + --demultiplexer_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned) --checkqc_config Configuration file for CheckQC --assets_dir Location of project assests (default: "\$baseDir/assets"). --config_dir Location of tool configuration files (default: "\$baseDir/config/tool_config"). --script_dir Location of project scripts (default: "\$baseDir/bin") + --demultiplexer Name of demultiplexer used e.g 'bcl2fastq' or 'bclconvert' --help Print this help message. @@ -61,7 +64,7 @@ def helpMessage() { """ } -if (params.help || !params.run_folder){ +if (params.help || !params.run_folder || !params.demultiplexer){ helpMessage() exit 0 } @@ -72,7 +75,8 @@ workflow { Channel.fromPath(params.run_folder,checkIfExists:true) .ifEmpty { "Error: No run folder (--run_folder) given."; exit 1 } .set {run_folder} - CHECK_RUN_QUALITY(run_folder) + Channel.value(params.demultiplexer).set {demultiplexer} + CHECK_RUN_QUALITY(run_folder, demultiplexer) } @@ -83,13 +87,13 @@ workflow.onComplete { def get_project_and_reads(run_folder) { Channel - .fromPath("${run_folder}/${params.bcl2fastq_outdir}/**.fastq.gz" ) + .fromPath("${run_folder}/${params.demultiplexer_outdir}/**.fastq.gz" ) .filter( ~/.*_[^I]\d_001\.fastq\.gz$/ ) .ifEmpty { "Error: No fastq files found under ${run_folder}/ !\n"; exit 1 } .map { it.toString().indexOf('Undetermined') > 0 ? ['NoProject', it] : - [(it.toString() =~ /^.*\/${params.bcl2fastq_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it] + [(it.toString() =~ /^.*\/${params.demultiplexer_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it] } } @@ -121,8 +125,21 @@ workflow CHECK_RUN_QUALITY { take: run_folder + demultiplexer main: + if (params.demultiplexer == 'bclconvert') { + Channel.fromPath([ + "${params.run_folder}/${params.demultiplexer_outdir}/Reports/*.csv", + "${params.run_folder}/RunInfo.xml"]) + .collect().ifEmpty([]) + .set { demux_stats } + } else { + Channel.fromPath("${params.run_folder}/${params.demultiplexer_outdir}/Stats/Stats.json") + .collect().ifEmpty([]) + .set { demux_stats } + } + INTEROP_SUMMARY(run_folder) GET_QC_THRESHOLDS(run_folder) GET_METADATA(run_folder) @@ -132,14 +149,15 @@ workflow CHECK_RUN_QUALITY { FASTQ_SCREEN(project_and_reads, params.config_dir, params.fastqscreen_databases) - MULTIQC_PER_FLOWCELL( params.run_folder, + MULTIQC_PER_FLOWCELL( + params.run_folder, FASTQC.out.map{ it[1] }.collect(), FASTQ_SCREEN.out.results.map{ it[1] }.collect(), FASTQ_SCREEN.out.tsv.map{ it[1] }.collectFile(keepHeader:true,skip:1,sort:true), INTEROP_SUMMARY.out.collect(), GET_QC_THRESHOLDS.out.collect().ifEmpty([]), GET_METADATA.out.collect(), - Channel.fromPath("${params.run_folder}/${params.bcl2fastq_outdir}/Stats/Stats.json").collect().ifEmpty([]), + demux_stats, params.assets_dir, params.config_dir) MULTIQC_PER_PROJECT( params.run_folder, @@ -239,14 +257,9 @@ process GET_METADATA { path 'sequencing_metadata_mqc.yaml' script: - if ( params.bcl2fastq_outdir ){ - bcl2fastq_outdir_section = "--bcl2fastq-outdir ${params.bcl2fastq_outdir}" - } else { - bcl2fastq_outdir_section = "" - } """ python ${params.script_dir}/get_metadata.py --runfolder $runfolder \\ - $bcl2fastq_outdir_section &> sequencing_metadata_mqc.yaml + &> sequencing_metadata_mqc.yaml """ } @@ -277,7 +290,7 @@ process MULTIQC_PER_FLOWCELL { path ('Interop_summary/*') // Interop log path qc_thresholds // Quality check thresholds (optional) path sequencing_metadata // Sequencing meta data ( custom content data ) - path bcl2fastq_stats // Bcl2Fastq logs + path demux_stats // demux logs path assets // Staged copy of assets folder path config_dir // Staged copy of config folder diff --git a/nextflow.config b/nextflow.config index d0c673c..0f1fe50 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,5 +62,9 @@ profiles { test { includeConfig 'config/nextflow_config/test.config' } + + test_bclconvert { + includeConfig 'config/nextflow_config/test_bclconvert.config' + } } diff --git a/requirements-dev.txt b/requirements-dev.txt index 3fc5417..5e23c1f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ pytest==6.2.5 black==24.3.0 beautifulsoup4==4.10.0 -checkqc==3.6.6 +checkqc==4.1.1rc1 lxml==4.9.2 diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ControlMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ControlMetricsOut.bin new file mode 100755 index 0000000..9b2e9da Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ControlMetricsOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/CorrectedIntMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/CorrectedIntMetricsOut.bin new file mode 100755 index 0000000..914d339 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/CorrectedIntMetricsOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ErrorMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ErrorMetricsOut.bin new file mode 100755 index 0000000..2c121e7 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ErrorMetricsOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ExtractionMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ExtractionMetricsOut.bin new file mode 100755 index 0000000..abf5d68 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ExtractionMetricsOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/IndexMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/IndexMetricsOut.bin new file mode 100755 index 0000000..c66f85c Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/IndexMetricsOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetrics2030Out.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetrics2030Out.bin new file mode 100755 index 0000000..abbad46 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetrics2030Out.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsByLaneOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsByLaneOut.bin new file mode 100755 index 0000000..53018aa Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsByLaneOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsOut.bin new file mode 100755 index 0000000..a9a7301 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/TileMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/TileMetricsOut.bin new file mode 100755 index 0000000..6602480 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/TileMetricsOut.bin differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/RunInfo.xml b/test_data/230825_M04034_0043_000000000-L6NVV/RunInfo.xml new file mode 100755 index 0000000..8234561 --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/RunInfo.xml @@ -0,0 +1,15 @@ + + + + 000000000-L6NVV + M04034 + 230825 + + + + + + + + + \ No newline at end of file diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/RunParameters.xml b/test_data/230825_M04034_0043_000000000-L6NVV/RunParameters.xml new file mode 100755 index 0000000..bc54725 --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/RunParameters.xml @@ -0,0 +1,92 @@ + + + MiSeq_1_2 + false + + 000000000-L6NVV + 15028382 + 2024-06-15T00:00:00 + 20764717 + + + MS7903868-00PR2 + 15041807 + 2024-06-10T00:00:00 + 20764159 + + + MS3500199-300V2 + 15033572 + 2024-05-08T00:00:00 + 20770331 + + true + + Post-Run Wash + + true + 4.0.0.1769 + 14 + 1 + 1 + MiSeq Control Software + + 230825_M04034_0043_000000000-L6NVV + M04034 + 43 + 9.5.12 + 4.0.0.1769 + 1.18.54.4 + 000000000-L6NVV + MS7903868-00PR2 + 15033572 + Version2 + MS3500199-300V2 + + + Pillar MultiCancer DNA RNA 25AUG2023 + Pillar MC DNA and RNA panel using TSO Comprehensive control DNA as an input + Custom + Custom + Amplicon + kfortmann@illumina.com + false + + + + + + + D:\Illumina\MiSeqTemp\230825_M04034_0043_000000000-L6NVV + D:\Illumina\RTATemp\230825_M04034_0043_000000000-L6NVV + D:\Illumina\MiSeqAnalysis\230825_M04034_0043_000000000-L6NVV + 230825 + MissedPostRunWash + C:\ProgramData\Illumina\MiSeq Control Software\Recipe + C:\program files\Illumina\MiSeq Control Software\Recipe + MS3500199-300V2 + D:\Illumina\MiSeq Control Software\SampleSheets + C:\Users\sbsadmin\Desktop\MiSeq-SampleSheet2023_08_25.csv + D:\Illumina\MiSeq Control Software\Manifests + D:\230825_M04034_0043_000000000-L6NVV + AutoFocus + Both + false + true + SampleSheet + 264827563 + 264827563 + Pillar MultiCancer DNA RNA 25AUG2023 + GenerateFastQWorkflow + 3.0.0.738 + 43044 + + + IpdRequested RunMonitoringRequested RunStorageRequested + kfortmann@illumina.com + false + false + false + 1A83A4EC1FB7986F + 43044 + \ No newline at end of file diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/SampleSheet.csv b/test_data/230825_M04034_0043_000000000-L6NVV/SampleSheet.csv new file mode 100755 index 0000000..a7707e6 --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/SampleSheet.csv @@ -0,0 +1,29 @@ +[Header],, +FileFormatVersion,2, +RunDescription,Pillar Multi-Cancer with CNV and RNA Fusion panel using TSO Comprehensive DNA and RNA controls as inputs, +RunName,Pillar Multi-Cancer with CNV and RNA Fusion Panel, +,, +[Reads],, +Index1Cycles,8, +Index2Cycles,8, +Read1Cycles,151, +Read2Cycles,151, +,, +[BCLConvert_Settings],, +MaskShortReads,35, +FastqCompressionFormat,gzip, +,, +[BCLConvert_Data],, +Lane,Sample_ID,index,index2,Sample_Project,OverrideCycles,custom_Description +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1 \ No newline at end of file diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Adapter_Metrics.csv b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Adapter_Metrics.csv new file mode 100644 index 0000000..c64bf1a --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Adapter_Metrics.csv @@ -0,0 +1,27 @@ +Lane,Sample_ID,index,index2,ReadNumber,AdapterBases,SampleBases,% Adapter Bases +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,1,0,11328926,0.000 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,2,0,11328926,0.000 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAG,CCTATCCT,1,0,15529595,0.000 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAG,CCTATCCT,2,0,15529595,0.000 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCAT,CCTATCCT,1,0,15800489,0.000 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCAT,CCTATCCT,2,0,15800489,0.000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,1,0,19429321,0.000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,2,0,19429321,0.000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,1,0,14657872,0.000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,2,0,14657872,0.000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,1,0,10804201,0.000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,2,0,10804201,0.000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCT,GGCTCTG,1,0,957642,0.000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCT,GGCTCTG,2,0,957642,0.000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,1,0,1232160,0.000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,2,0,1232160,0.000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,1,0,1524798,0.000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,2,0,1524798,0.000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,1,0,1425591,0.000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,2,0,1425591,0.000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,1,0,1479498,0.000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,2,0,1479498,0.000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,1,0,1167532,0.000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,2,0,1167532,0.000 +1,Undetermined,,,1,0,20586736,0.000 +1,Undetermined,,,2,0,20586736,0.000 diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Demultiplex_Stats.csv b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Demultiplex_Stats.csv new file mode 100644 index 0000000..f7b48ad --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Demultiplex_Stats.csv @@ -0,0 +1,14 @@ +Lane,SampleID,Index,# Reads,# Perfect Index Reads,# One Mismatch Index Reads,# Two Mismatch Index Reads,% Reads,% Perfect Index Reads,% One Mismatch Index Reads,% Two Mismatch Index Reads +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG-CCTATCCT,75026,73648,1378,0,0.0977,0.9816,0.0184,0.0000 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA-CCTATCCT,102724,101344,1380,0,0.1338,0.9866,0.0134,0.0000 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT-CCTATCCT,104581,102912,1669,0,0.1362,0.9840,0.0160,0.0000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC-CCTATCCT,128671,126908,1763,0,0.1676,0.9863,0.0137,0.0000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA-CCTATCCT,97072,95813,1259,0,0.1264,0.9870,0.0130,0.0000 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT-CCTATCCT,71551,70250,1301,0,0.0932,0.9818,0.0182,0.0000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG-GGCTCTGA,6369,6119,250,0,0.0083,0.9607,0.0393,0.0000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA-GGCTCTGA,8160,7923,237,0,0.0106,0.9710,0.0290,0.0000 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT-GGCTCTGA,10098,9777,321,0,0.0132,0.9682,0.0318,0.0000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC-GGCTCTGA,9441,9146,295,0,0.0123,0.9688,0.0312,0.0000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA-GGCTCTGA,9798,9537,261,0,0.0128,0.9734,0.0266,0.0000 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT-GGCTCTGA,7732,7462,270,0,0.0101,0.9651,0.0349,0.0000 +1,Undetermined,,136488,136488,0,0,0.1778,1.0000,0.0000,0.0000 diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Quality_Metrics.csv b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Quality_Metrics.csv new file mode 100644 index 0000000..7bf11ec --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Quality_Metrics.csv @@ -0,0 +1,27 @@ +Lane,SampleID,index,index2,ReadNumber,Yield,YieldQ30,QualityScoreSum,Mean Quality Score (PF),% Q30 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,1,11328926,10785245,417203701,36.83,0.95 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,2,11328926,10672112,414993706,36.63,0.94 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA,CCTATCCT,1,15511324,14712802,570224419,36.76,0.95 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA,CCTATCCT,2,15511324,14020052,552227865,35.60,0.90 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT,CCTATCCT,1,15791731,14984220,580688312,36.77,0.95 +1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT,CCTATCCT,2,15791731,14829761,577799233,36.59,0.94 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,1,19429321,18513191,716480168,36.88,0.95 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,2,19429321,18559221,719309887,37.02,0.96 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,1,14657872,13963605,540130466,36.85,0.95 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,2,14657872,13319397,523356633,35.70,0.91 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,1,10804201,10296552,398233671,36.86,0.95 +1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,2,10804201,10223490,397013622,36.75,0.95 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG,GGCTCTGA,1,961719,894390,34891320,36.28,0.93 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG,GGCTCTGA,2,961719,838346,33606784,34.94,0.87 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,1,1232160,1108845,43830583,35.57,0.90 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,2,1232160,1004704,41279669,33.50,0.82 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,1,1524798,1410366,55120939,36.15,0.92 +1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,2,1524798,1326032,53209191,34.90,0.87 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,1,1425591,1322906,51619293,36.21,0.93 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,2,1425591,1251623,50095564,35.14,0.88 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,1,1479498,1365894,53434892,36.12,0.92 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,2,1479498,1200631,49402041,33.39,0.81 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,1,1167532,1085821,42349972,36.27,0.93 +1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,2,1167532,1018714,40850803,34.99,0.87 +1,Undetermined,,,1,20609688,19714280,761042950,36.93,0.96 +1,Undetermined,,,2,20609688,18401701,729509339,35.40,0.89 diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/RunInfo.xml b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/RunInfo.xml new file mode 100755 index 0000000..8234561 --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/RunInfo.xml @@ -0,0 +1,15 @@ + + + + 000000000-L6NVV + M04034 + 230825 + + + + + + + + + \ No newline at end of file diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R1_001.fastq.gz b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R1_001.fastq.gz new file mode 100644 index 0000000..e61edd6 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R1_001.fastq.gz differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R2_001.fastq.gz b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R2_001.fastq.gz new file mode 100644 index 0000000..4f90717 Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R2_001.fastq.gz differ diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/pipeline_info/nf_core_demultiplex_software_mqc_versions.yml b/test_data/230825_M04034_0043_000000000-L6NVV/pipeline_info/nf_core_demultiplex_software_mqc_versions.yml new file mode 100644 index 0000000..980c4d4 --- /dev/null +++ b/test_data/230825_M04034_0043_000000000-L6NVV/pipeline_info/nf_core_demultiplex_software_mqc_versions.yml @@ -0,0 +1,5 @@ +BCLCONVERT: + bclconvert: 4.3.13 +Workflow: + nf-core/demultiplex: v1.6.1 + Nextflow: 24.10.5 diff --git a/test_data/test_config/multiqc_flowcell_config.yaml b/test_data/test_config/multiqc_flowcell_config.yaml deleted file mode 120000 index 5ed8412..0000000 --- a/test_data/test_config/multiqc_flowcell_config.yaml +++ /dev/null @@ -1 +0,0 @@ -../../config/tool_config/multiqc_flowcell_config.yaml \ No newline at end of file diff --git a/test_data/test_config/multiqc_flowcell_config.yaml b/test_data/test_config/multiqc_flowcell_config.yaml new file mode 100644 index 0000000..5ed8412 --- /dev/null +++ b/test_data/test_config/multiqc_flowcell_config.yaml @@ -0,0 +1 @@ +../../config/tool_config/multiqc_flowcell_config.yaml \ No newline at end of file diff --git a/tests/integration_tests/test_validate_output.py b/tests/integration_tests/test_validate_output.py index 42bf27f..65751f2 100644 --- a/tests/integration_tests/test_validate_output.py +++ b/tests/integration_tests/test_validate_output.py @@ -4,36 +4,35 @@ import os.path import subprocess from bs4 import BeautifulSoup -import itertools # Run pipeline in test mode, this is done once per test session @pytest.fixture(scope="session", autouse=True) -def run_pipeline(tmpdir_factory): +def result_dir(request, tmpdir_factory): + demultiplexer = request.param + result_dir = tmpdir_factory.mktemp("results") + extra_profile = "test_bclconvert" if demultiplexer == "bclconvert" else "test" + subprocess.run( [ "nextflow", "run", "main.nf", "-profile", - "dev,test,singularity", + f"dev,{extra_profile},singularity", + "--demultiplexer", + demultiplexer, "--result_dir", result_dir, ], check=True, ) - yield result_dir - -# Returns directory where pipeline results have been written. -# All tests use this folder as input, veryfing that reports -# have been generated as expected. -@pytest.fixture -def result_dir(run_pipeline): - return run_pipeline + yield result_dir +@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True) def test_results_dirs_exist(result_dir): flowcell_dir = os.path.join(result_dir, "flowcell_report") projects_dir = os.path.join(result_dir, "projects") @@ -42,6 +41,7 @@ def test_results_dirs_exist(result_dir): assert os.path.isdir(projects_dir) +@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True) def test_project_dirs_exist(result_dir): projects_dir = os.path.join(result_dir, "projects") projects = ["Zymo", "Qiagen", "NoProject"] @@ -50,6 +50,7 @@ def test_project_dirs_exist(result_dir): assert os.path.isdir(os.path.join(projects_dir, project)) +@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True) def test_flowcell_report_exist(result_dir): flowcell_dir = os.path.join(result_dir, "flowcell_report") report_path = os.path.join( @@ -59,6 +60,7 @@ def test_flowcell_report_exist(result_dir): assert os.path.isfile(report_path) +@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True) def test_project_reports_exist(result_dir): projects_dir = os.path.join(result_dir, "projects") projects = ["Zymo", "Qiagen", "NoProject"] @@ -72,6 +74,7 @@ def test_project_reports_exist(result_dir): assert os.path.isfile(report_path) +@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True) def check_sections_in_report(report_path, sections): with open(report_path, "r") as html_file: parser = BeautifulSoup(html_file.read(), "lxml") @@ -80,6 +83,7 @@ def check_sections_in_report(report_path, sections): assert len(hits) > 0 +@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True) def test_all_sections_included_in_flowcell_report(result_dir): flowcell_dir = os.path.join(result_dir, "flowcell_report") report_path = os.path.join( @@ -98,6 +102,26 @@ def test_all_sections_included_in_flowcell_report(result_dir): check_sections_in_report(report_path, sections) +@pytest.mark.parametrize("result_dir", ["bclconvert"], indirect=True) +def test_all_sections_included_in_bclcovert_flowcell_report(result_dir): + flowcell_dir = os.path.join(result_dir, "flowcell_report") + report_path = os.path.join( + flowcell_dir, "230825_M04034_0043_000000000-L6NVV_multiqc_report.html" + ) + sections = [ + "general_stats", + "rrna", + "sequencing_metadata", + "bclconvert", + "interop", + "fastq_screen", + "fastqc", + ] + + check_sections_in_report(report_path, sections) + + +@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True) def test_all_sections_included_in_project_reports(result_dir): projects_dir = os.path.join(result_dir, "projects") projects = ["Zymo", "Qiagen", "NoProject"] diff --git a/tests/unit_tests/test_get_metadata.py b/tests/unit_tests/test_get_metadata.py index 49e034c..f5ee44e 100644 --- a/tests/unit_tests/test_get_metadata.py +++ b/tests/unit_tests/test_get_metadata.py @@ -13,7 +13,7 @@ @pytest.fixture def runfolder_info(): - return RunfolderInfo("test_data/210510_M03910_0104_000000000-JHGJL", "Unaligned") + return RunfolderInfo("test_data/210510_M03910_0104_000000000-JHGJL") @pytest.fixture @@ -46,18 +46,6 @@ def test_read_run_parameters(runfolder_info): assert len(run_parameters["RunParameters"]) == 63 -def test_read_stats_json(runfolder_info): - stats_json = runfolder_info.read_stats_json("Unaligned") - assert len(stats_json) == 6 - - -def test_bcl2fastq_version(runfolder_info): - bcl2fastq_version = runfolder_info.get_bcl2fastq_version( - "test_data/210510_M03910_0104_000000000-JHGJL" - ) - assert bcl2fastq_version == "2.20.0.422" - - def test_get_software_version(runfolder_info): software_versions = runfolder_info.get_software_version( "test_data/210510_M03910_0104_000000000-JHGJL" @@ -76,9 +64,7 @@ def test_get_run_parameters(runfolder_info): def test_run_parameters_novaseq_x(): - runfolder_info = RunfolderInfo( - "test_data/20230125_lh00103_0036_A222VGWLT3", "Unaligned" - ) + runfolder_info = RunfolderInfo("test_data/20230125_lh00103_0036_A222VGWLT3") filtered_run_parameters = runfolder_info.get_run_parameters() assert filtered_run_parameters["Instrument type"] == "NovaSeqXPlus" assert filtered_run_parameters["Control software"] == "control-software" @@ -86,14 +72,10 @@ def test_run_parameters_novaseq_x(): def test_find_flowcell_type_novaseqx(): - runfolder_info = RunfolderInfo( - "test_data/20230125_lh00103_0036_A222VGWLT3", "Unaligned" - ) + runfolder_info = RunfolderInfo("test_data/20230125_lh00103_0036_A222VGWLT3") flowcell_type = runfolder_info.find_flowcell_type_novaseqx() assert flowcell_type["Flowcell type"] == "10B" - runfolder_info = RunfolderInfo( - "test_data/210510_M03910_0104_000000000-JHGJL", "Unaligned" - ) + runfolder_info = RunfolderInfo("test_data/210510_M03910_0104_000000000-JHGJL") flowcell_type = runfolder_info.find_flowcell_type_novaseqx() assert flowcell_type is None