diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..6963219
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,10 @@
+**What problems does this PR solve?**
+Provide a short description or reference to the relevant issue, explaining what problems this PR solves.
+
+**An outline of the validation procedure for this feature**
+In addition to automatic tests, has any manual testing been carried out?
+
+**Risk analysis - Reasons for careful code review**
+If any of the boxes below are checked, extra careful code review should be inititated.
+
+ - [ ] This PR contains code that could remove data
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index ffb81dc..f1eb519 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -26,7 +26,7 @@ jobs:
env:
CAPSULE_LOG: none
run: |
- curl -s https://get.nextflow.io | bash
+ curl -fsSL https://github.com/nextflow-io/nextflow/releases/download/v24.10.4/nextflow -o nextflow | bash
sudo mv nextflow /usr/local/bin/
- name: Make Nextflow binary executable
@@ -35,11 +35,12 @@ jobs:
- name: Set up python
uses: actions/setup-python@v2
with:
- python-version: 3.9
+ python-version: 3.11
architecture: x64
- name: Install test requirements
- run: pip install -r requirements-dev.txt
+ run: |
+ pip install -r requirements-dev.txt
- name: Run tests
run: pytest tests
diff --git a/bin/get_metadata.py b/bin/get_metadata.py
index 282b37d..202b47b 100644
--- a/bin/get_metadata.py
+++ b/bin/get_metadata.py
@@ -3,6 +3,8 @@
import xmltodict
from collections import OrderedDict
import re
+import glob
+import csv
import argparse
import os
import json
@@ -12,11 +14,10 @@
class RunfolderInfo:
- def __init__(self, runfolder, bcl2fastq_outdir):
+ def __init__(self, runfolder):
self.runfolder = runfolder
self.run_info = self.read_run_info()
self.run_parameters = self.read_run_parameters()
- self.stats_json = self.read_stats_json(bcl2fastq_outdir)
self.description_and_identifier = OrderedDict()
self.run_parameters_tags = {
"RunId": "Run ID",
@@ -79,27 +80,11 @@ def find_flowcell_type_novaseqx(self):
return None
return {"Flowcell type": flowcell_type}
- def read_stats_json(self, bcl2fastq_outdir):
- stats_json_path = os.path.join(
- self.runfolder, bcl2fastq_outdir, "Stats/Stats.json"
- )
- if os.path.exists(stats_json_path):
- with open(stats_json_path) as f:
- return json.load(f)
- else:
- return None
-
- def get_bcl2fastq_version(self, runfolder):
- with open(os.path.join(runfolder, "bcl2fastq_version")) as f:
- bcl2fastq_str = f.read()
- return bcl2fastq_str.split("v")[1].strip()
-
def get_software_version(self, runfolder):
- with open(
- Path(runfolder)
- / "pipeline_info"
- / "nf_core_pipeline_software_mqc_versions.yml"
- ) as f:
+ pipeline_dir = Path(runfolder) / "pipeline_info"
+ pipeline_info_filename = next(pipeline_dir.glob("*_software_mqc_versions.yml"))
+
+ with open(pipeline_info_filename) as f:
return {
software: version
for software_dict in yaml.safe_load(f).values()
@@ -151,15 +136,6 @@ def get_info(self):
return results
def get_demultiplexing_info(self):
- try:
- return {
- "Demultiplexing": {
- "bcl2fastq": self.get_bcl2fastq_version(self.runfolder)
- }
- }
- except FileNotFoundError:
- pass
-
try:
return {"Demultiplexing": self.get_software_version(self.runfolder)}
except FileNotFoundError:
@@ -173,18 +149,11 @@ def get_demultiplexing_info(self):
parser.add_argument(
"--runfolder", type=str, required=True, help="Path to runfolder"
)
- parser.add_argument(
- "--bcl2fastq-outdir",
- type=str,
- default="Data/Intensities/BaseCalls",
- help="Path to bcl2fastq output folder relative to the runfolder",
- )
args = parser.parse_args()
runfolder = args.runfolder
- bcl2fastq_outdir = args.bcl2fastq_outdir
- runfolder_info = RunfolderInfo(runfolder, bcl2fastq_outdir)
+ runfolder_info = RunfolderInfo(runfolder)
info = runfolder_info.get_info()
print(
diff --git a/config/nextflow_config/singularity.config b/config/nextflow_config/singularity.config
index 3e37f21..f963b7b 100644
--- a/config/nextflow_config/singularity.config
+++ b/config/nextflow_config/singularity.config
@@ -5,25 +5,25 @@ singularity {
process {
withName: 'FASTQC' {
- container = 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--hdfd78af_1'
+ container = 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0'
}
withName: 'FASTQ_SCREEN' {
- container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.14.0--pl5262hdfd78af_1'
+ container = 'https://depot.galaxyproject.org/singularity/fastq-screen:0.16.0--pl5321hdfd78af_0'
}
withName: 'GET_QC_THRESHOLDS' {
- container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
+ container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0'
}
withName: 'GET_METADATA' {
- container = 'https://depot.galaxyproject.org/singularity/checkqc:3.6.6--pyhdfd78af_0'
+ container = 'https://depot.galaxyproject.org/singularity/checkqc:4.0.7--pyhdfd78af_0'
}
withName: 'INTEROP_SUMMARY' {
- container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.2.4--hdbdd923_2'
+ container = 'https://depot.galaxyproject.org/singularity/illumina-interop:1.5.0--h503566f_0'
}
withName: 'MULTIQC_PER_FLOWCELL' {
- container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0'
+ container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1'
}
withName: 'MULTIQC_PER_PROJECT' {
- container = 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0'
+ container = 'https://depot.galaxyproject.org/singularity/multiqc:1.32--pyhdfd78af_1'
}
}
diff --git a/config/nextflow_config/test_bclconvert.config b/config/nextflow_config/test_bclconvert.config
new file mode 100644
index 0000000..77f0865
--- /dev/null
+++ b/config/nextflow_config/test_bclconvert.config
@@ -0,0 +1,19 @@
+/*
+========================================================================================
+ Nextflow config file for running minimal tests
+========================================================================================
+ Defines input files and everything required to run a fast and simple pipeline test.
+ Use as follows:
+ nextflow run main.nf -profile dev,test,singularity
+
+
+ This config takes inspiration from https://github.com/nf-core/rnaseq
+----------------------------------------------------------------------------------------
+*/
+
+params {
+ run_folder = "$baseDir/test_data/230825_M04034_0043_000000000-L6NVV"
+ fastqscreen_databases = "$baseDir/test_data/Test_FastQ_Screen_Genomes"
+ checkqc_config = "$baseDir/test_data/checkqc_config.yaml"
+ config_dir = "$baseDir/test_data/test_config"
+}
diff --git a/config/tool_config/multiqc_flowcell_config.yaml b/config/tool_config/multiqc_flowcell_config.yaml
index fd2f775..3c6205e 100644
--- a/config/tool_config/multiqc_flowcell_config.yaml
+++ b/config/tool_config/multiqc_flowcell_config.yaml
@@ -4,6 +4,7 @@ run_modules:
- fastqc
- fastq_screen
- bcl2fastq
+ - bclconvert
- interop
- custom_content
diff --git a/main.nf b/main.nf
index 8bb3cf0..06f57b4 100644
--- a/main.nf
+++ b/main.nf
@@ -12,7 +12,9 @@ params.run_folder = "/path/to/run_folder"
params.result_dir = "results"
fastqscreen_default_databases = "FastQ_Screen_Genomes"
params.fastqscreen_databases = fastqscreen_default_databases
-params.bcl2fastq_outdir = "Unaligned"
+params.demultiplexer = "bcl2fastq"
+params.demultiplexer_outdir = "Unaligned"
+
params.checkqc_config = "" // See: https://github.com/Molmed/checkQC
params.assets_dir = "$baseDir/assets"
params.config_dir = "$baseDir/config/tool_config"
@@ -47,11 +49,12 @@ def helpMessage() {
Optional parameters:
--result_dir Path to write results (default: results)
- --bcl2fastq_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned)
+ --demultiplexer_outdir Folder name to check for fastq.gz files and demultiplexing stats (default: Unaligned)
--checkqc_config Configuration file for CheckQC
--assets_dir Location of project assests (default: "\$baseDir/assets").
--config_dir Location of tool configuration files (default: "\$baseDir/config/tool_config").
--script_dir Location of project scripts (default: "\$baseDir/bin")
+ --demultiplexer Name of demultiplexer used e.g 'bcl2fastq' or 'bclconvert'
--help Print this help message.
@@ -61,7 +64,7 @@ def helpMessage() {
"""
}
-if (params.help || !params.run_folder){
+if (params.help || !params.run_folder || !params.demultiplexer){
helpMessage()
exit 0
}
@@ -72,7 +75,8 @@ workflow {
Channel.fromPath(params.run_folder,checkIfExists:true)
.ifEmpty { "Error: No run folder (--run_folder) given."; exit 1 }
.set {run_folder}
- CHECK_RUN_QUALITY(run_folder)
+ Channel.value(params.demultiplexer).set {demultiplexer}
+ CHECK_RUN_QUALITY(run_folder, demultiplexer)
}
@@ -83,13 +87,13 @@ workflow.onComplete {
def get_project_and_reads(run_folder) {
Channel
- .fromPath("${run_folder}/${params.bcl2fastq_outdir}/**.fastq.gz" )
+ .fromPath("${run_folder}/${params.demultiplexer_outdir}/**.fastq.gz" )
.filter( ~/.*_[^I]\d_001\.fastq\.gz$/ )
.ifEmpty { "Error: No fastq files found under ${run_folder}/ !\n"; exit 1 }
.map {
it.toString().indexOf('Undetermined') > 0 ?
['NoProject', it] :
- [(it.toString() =~ /^.*\/${params.bcl2fastq_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it]
+ [(it.toString() =~ /^.*\/${params.demultiplexer_outdir}\/([^\/]+)\/.*\.fastq\.gz$/)[0][1],it]
}
}
@@ -121,8 +125,21 @@ workflow CHECK_RUN_QUALITY {
take:
run_folder
+ demultiplexer
main:
+ if (params.demultiplexer == 'bclconvert') {
+ Channel.fromPath([
+ "${params.run_folder}/${params.demultiplexer_outdir}/Reports/*.csv",
+ "${params.run_folder}/RunInfo.xml"])
+ .collect().ifEmpty([])
+ .set { demux_stats }
+ } else {
+ Channel.fromPath("${params.run_folder}/${params.demultiplexer_outdir}/Stats/Stats.json")
+ .collect().ifEmpty([])
+ .set { demux_stats }
+ }
+
INTEROP_SUMMARY(run_folder)
GET_QC_THRESHOLDS(run_folder)
GET_METADATA(run_folder)
@@ -132,14 +149,15 @@ workflow CHECK_RUN_QUALITY {
FASTQ_SCREEN(project_and_reads,
params.config_dir,
params.fastqscreen_databases)
- MULTIQC_PER_FLOWCELL( params.run_folder,
+ MULTIQC_PER_FLOWCELL(
+ params.run_folder,
FASTQC.out.map{ it[1] }.collect(),
FASTQ_SCREEN.out.results.map{ it[1] }.collect(),
FASTQ_SCREEN.out.tsv.map{ it[1] }.collectFile(keepHeader:true,skip:1,sort:true),
INTEROP_SUMMARY.out.collect(),
GET_QC_THRESHOLDS.out.collect().ifEmpty([]),
GET_METADATA.out.collect(),
- Channel.fromPath("${params.run_folder}/${params.bcl2fastq_outdir}/Stats/Stats.json").collect().ifEmpty([]),
+ demux_stats,
params.assets_dir,
params.config_dir)
MULTIQC_PER_PROJECT( params.run_folder,
@@ -239,14 +257,9 @@ process GET_METADATA {
path 'sequencing_metadata_mqc.yaml'
script:
- if ( params.bcl2fastq_outdir ){
- bcl2fastq_outdir_section = "--bcl2fastq-outdir ${params.bcl2fastq_outdir}"
- } else {
- bcl2fastq_outdir_section = ""
- }
"""
python ${params.script_dir}/get_metadata.py --runfolder $runfolder \\
- $bcl2fastq_outdir_section &> sequencing_metadata_mqc.yaml
+ &> sequencing_metadata_mqc.yaml
"""
}
@@ -277,7 +290,7 @@ process MULTIQC_PER_FLOWCELL {
path ('Interop_summary/*') // Interop log
path qc_thresholds // Quality check thresholds (optional)
path sequencing_metadata // Sequencing meta data ( custom content data )
- path bcl2fastq_stats // Bcl2Fastq logs
+ path demux_stats // demux logs
path assets // Staged copy of assets folder
path config_dir // Staged copy of config folder
diff --git a/nextflow.config b/nextflow.config
index d0c673c..0f1fe50 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -62,5 +62,9 @@ profiles {
test {
includeConfig 'config/nextflow_config/test.config'
}
+
+ test_bclconvert {
+ includeConfig 'config/nextflow_config/test_bclconvert.config'
+ }
}
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 3fc5417..5e23c1f 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,5 @@
pytest==6.2.5
black==24.3.0
beautifulsoup4==4.10.0
-checkqc==3.6.6
+checkqc==4.1.1rc1
lxml==4.9.2
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ControlMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ControlMetricsOut.bin
new file mode 100755
index 0000000..9b2e9da
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ControlMetricsOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/CorrectedIntMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/CorrectedIntMetricsOut.bin
new file mode 100755
index 0000000..914d339
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/CorrectedIntMetricsOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ErrorMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ErrorMetricsOut.bin
new file mode 100755
index 0000000..2c121e7
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ErrorMetricsOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ExtractionMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ExtractionMetricsOut.bin
new file mode 100755
index 0000000..abf5d68
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/ExtractionMetricsOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/IndexMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/IndexMetricsOut.bin
new file mode 100755
index 0000000..c66f85c
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/IndexMetricsOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetrics2030Out.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetrics2030Out.bin
new file mode 100755
index 0000000..abbad46
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetrics2030Out.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsByLaneOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsByLaneOut.bin
new file mode 100755
index 0000000..53018aa
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsByLaneOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsOut.bin
new file mode 100755
index 0000000..a9a7301
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/QMetricsOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/TileMetricsOut.bin b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/TileMetricsOut.bin
new file mode 100755
index 0000000..6602480
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/InterOp/TileMetricsOut.bin differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/RunInfo.xml b/test_data/230825_M04034_0043_000000000-L6NVV/RunInfo.xml
new file mode 100755
index 0000000..8234561
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/RunInfo.xml
@@ -0,0 +1,15 @@
+
+
+
+ 000000000-L6NVV
+ M04034
+ 230825
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/RunParameters.xml b/test_data/230825_M04034_0043_000000000-L6NVV/RunParameters.xml
new file mode 100755
index 0000000..bc54725
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/RunParameters.xml
@@ -0,0 +1,92 @@
+
+
+ MiSeq_1_2
+ false
+
+ 000000000-L6NVV
+ 15028382
+ 2024-06-15T00:00:00
+ 20764717
+
+
+ MS7903868-00PR2
+ 15041807
+ 2024-06-10T00:00:00
+ 20764159
+
+
+ MS3500199-300V2
+ 15033572
+ 2024-05-08T00:00:00
+ 20770331
+
+ true
+
+ Post-Run Wash
+
+ true
+ 4.0.0.1769
+ 14
+ 1
+ 1
+ MiSeq Control Software
+
+ 230825_M04034_0043_000000000-L6NVV
+ M04034
+ 43
+ 9.5.12
+ 4.0.0.1769
+ 1.18.54.4
+ 000000000-L6NVV
+ MS7903868-00PR2
+ 15033572
+ Version2
+ MS3500199-300V2
+
+
+ Pillar MultiCancer DNA RNA 25AUG2023
+ Pillar MC DNA and RNA panel using TSO Comprehensive control DNA as an input
+ Custom
+ Custom
+ Amplicon
+ kfortmann@illumina.com
+ false
+
+
+
+
+
+
+ D:\Illumina\MiSeqTemp\230825_M04034_0043_000000000-L6NVV
+ D:\Illumina\RTATemp\230825_M04034_0043_000000000-L6NVV
+ D:\Illumina\MiSeqAnalysis\230825_M04034_0043_000000000-L6NVV
+ 230825
+ MissedPostRunWash
+ C:\ProgramData\Illumina\MiSeq Control Software\Recipe
+ C:\program files\Illumina\MiSeq Control Software\Recipe
+ MS3500199-300V2
+ D:\Illumina\MiSeq Control Software\SampleSheets
+ C:\Users\sbsadmin\Desktop\MiSeq-SampleSheet2023_08_25.csv
+ D:\Illumina\MiSeq Control Software\Manifests
+ D:\230825_M04034_0043_000000000-L6NVV
+ AutoFocus
+ Both
+ false
+ true
+ SampleSheet
+ 264827563
+ 264827563
+ Pillar MultiCancer DNA RNA 25AUG2023
+ GenerateFastQWorkflow
+ 3.0.0.738
+ 43044
+
+
+ IpdRequested RunMonitoringRequested RunStorageRequested
+ kfortmann@illumina.com
+ false
+ false
+ false
+ 1A83A4EC1FB7986F
+ 43044
+
\ No newline at end of file
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/SampleSheet.csv b/test_data/230825_M04034_0043_000000000-L6NVV/SampleSheet.csv
new file mode 100755
index 0000000..a7707e6
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/SampleSheet.csv
@@ -0,0 +1,29 @@
+[Header],,
+FileFormatVersion,2,
+RunDescription,Pillar Multi-Cancer with CNV and RNA Fusion panel using TSO Comprehensive DNA and RNA controls as inputs,
+RunName,Pillar Multi-Cancer with CNV and RNA Fusion Panel,
+,,
+[Reads],,
+Index1Cycles,8,
+Index2Cycles,8,
+Read1Cycles,151,
+Read2Cycles,151,
+,,
+[BCLConvert_Settings],,
+MaskShortReads,35,
+FastqCompressionFormat,gzip,
+,,
+[BCLConvert_Data],,
+Lane,Sample_ID,index,index2,Sample_Project,OverrideCycles,custom_Description
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,Y151;I8;I8;Y151,AB-1234,LIBRARY_NAME:Pool1
\ No newline at end of file
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Adapter_Metrics.csv b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Adapter_Metrics.csv
new file mode 100644
index 0000000..c64bf1a
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Adapter_Metrics.csv
@@ -0,0 +1,27 @@
+Lane,Sample_ID,index,index2,ReadNumber,AdapterBases,SampleBases,% Adapter Bases
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,1,0,11328926,0.000
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,2,0,11328926,0.000
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAG,CCTATCCT,1,0,15529595,0.000
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAG,CCTATCCT,2,0,15529595,0.000
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCAT,CCTATCCT,1,0,15800489,0.000
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCAT,CCTATCCT,2,0,15800489,0.000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,1,0,19429321,0.000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,2,0,19429321,0.000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,1,0,14657872,0.000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,2,0,14657872,0.000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,1,0,10804201,0.000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,2,0,10804201,0.000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCT,GGCTCTG,1,0,957642,0.000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCT,GGCTCTG,2,0,957642,0.000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,1,0,1232160,0.000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,2,0,1232160,0.000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,1,0,1524798,0.000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,2,0,1524798,0.000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,1,0,1425591,0.000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,2,0,1425591,0.000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,1,0,1479498,0.000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,2,0,1479498,0.000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,1,0,1167532,0.000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,2,0,1167532,0.000
+1,Undetermined,,,1,0,20586736,0.000
+1,Undetermined,,,2,0,20586736,0.000
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Demultiplex_Stats.csv b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Demultiplex_Stats.csv
new file mode 100644
index 0000000..f7b48ad
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Demultiplex_Stats.csv
@@ -0,0 +1,14 @@
+Lane,SampleID,Index,# Reads,# Perfect Index Reads,# One Mismatch Index Reads,# Two Mismatch Index Reads,% Reads,% Perfect Index Reads,% One Mismatch Index Reads,% Two Mismatch Index Reads
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG-CCTATCCT,75026,73648,1378,0,0.0977,0.9816,0.0184,0.0000
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA-CCTATCCT,102724,101344,1380,0,0.1338,0.9866,0.0134,0.0000
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT-CCTATCCT,104581,102912,1669,0,0.1362,0.9840,0.0160,0.0000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC-CCTATCCT,128671,126908,1763,0,0.1676,0.9863,0.0137,0.0000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA-CCTATCCT,97072,95813,1259,0,0.1264,0.9870,0.0130,0.0000
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT-CCTATCCT,71551,70250,1301,0,0.0932,0.9818,0.0182,0.0000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG-GGCTCTGA,6369,6119,250,0,0.0083,0.9607,0.0393,0.0000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA-GGCTCTGA,8160,7923,237,0,0.0106,0.9710,0.0290,0.0000
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT-GGCTCTGA,10098,9777,321,0,0.0132,0.9682,0.0318,0.0000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC-GGCTCTGA,9441,9146,295,0,0.0123,0.9688,0.0312,0.0000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA-GGCTCTGA,9798,9537,261,0,0.0128,0.9734,0.0266,0.0000
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT-GGCTCTGA,7732,7462,270,0,0.0101,0.9651,0.0349,0.0000
+1,Undetermined,,136488,136488,0,0,0.1778,1.0000,0.0000,0.0000
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Quality_Metrics.csv b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Quality_Metrics.csv
new file mode 100644
index 0000000..7bf11ec
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/Quality_Metrics.csv
@@ -0,0 +1,27 @@
+Lane,SampleID,index,index2,ReadNumber,Yield,YieldQ30,QualityScoreSum,Mean Quality Score (PF),% Q30
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,1,11328926,10785245,417203701,36.83,0.95
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep1,ATTACTCG,CCTATCCT,2,11328926,10672112,414993706,36.63,0.94
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA,CCTATCCT,1,15511324,14712802,570224419,36.76,0.95
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep2,TCCGGAGA,CCTATCCT,2,15511324,14020052,552227865,35.60,0.90
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT,CCTATCCT,1,15791731,14984220,580688312,36.77,0.95
+1,Sample_TSOCDNA-25ng-MultiCancerDNA-rep3,CGCTCATT,CCTATCCT,2,15791731,14829761,577799233,36.59,0.94
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,1,19429321,18513191,716480168,36.88,0.95
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep1,GAGATTCC,CCTATCCT,2,19429321,18559221,719309887,37.02,0.96
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,1,14657872,13963605,540130466,36.85,0.95
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep2,ATTCAGAA,CCTATCCT,2,14657872,13319397,523356633,35.70,0.91
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,1,10804201,10296552,398233671,36.86,0.95
+1,Sample_TSOCDNA-50ng-MultiCancerDNA-rep3,GAATTCGT,CCTATCCT,2,10804201,10223490,397013622,36.75,0.95
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG,GGCTCTGA,1,961719,894390,34891320,36.28,0.93
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep1,ATTACTCG,GGCTCTGA,2,961719,838346,33606784,34.94,0.87
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,1,1232160,1108845,43830583,35.57,0.90
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep2,TCCGGAGA,GGCTCTGA,2,1232160,1004704,41279669,33.50,0.82
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,1,1524798,1410366,55120939,36.15,0.92
+1,Sample_TSOCRNA-25ng-MultiCancerRNA-rep3,CGCTCATT,GGCTCTGA,2,1524798,1326032,53209191,34.90,0.87
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,1,1425591,1322906,51619293,36.21,0.93
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep1,GAGATTCC,GGCTCTGA,2,1425591,1251623,50095564,35.14,0.88
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,1,1479498,1365894,53434892,36.12,0.92
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep2,ATTCAGAA,GGCTCTGA,2,1479498,1200631,49402041,33.39,0.81
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,1,1167532,1085821,42349972,36.27,0.93
+1,Sample_TSOCRNA-50ng-MultiCancerRNA-rep3,GAATTCGT,GGCTCTGA,2,1167532,1018714,40850803,34.99,0.87
+1,Undetermined,,,1,20609688,19714280,761042950,36.93,0.96
+1,Undetermined,,,2,20609688,18401701,729509339,35.40,0.89
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/RunInfo.xml b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/RunInfo.xml
new file mode 100755
index 0000000..8234561
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Reports/RunInfo.xml
@@ -0,0 +1,15 @@
+
+
+
+ 000000000-L6NVV
+ M04034
+ 230825
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R1_001.fastq.gz b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R1_001.fastq.gz
new file mode 100644
index 0000000..e61edd6
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R1_001.fastq.gz differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R2_001.fastq.gz b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R2_001.fastq.gz
new file mode 100644
index 0000000..4f90717
Binary files /dev/null and b/test_data/230825_M04034_0043_000000000-L6NVV/Unaligned/Undetermined_S0_L001_R2_001.fastq.gz differ
diff --git a/test_data/230825_M04034_0043_000000000-L6NVV/pipeline_info/nf_core_demultiplex_software_mqc_versions.yml b/test_data/230825_M04034_0043_000000000-L6NVV/pipeline_info/nf_core_demultiplex_software_mqc_versions.yml
new file mode 100644
index 0000000..980c4d4
--- /dev/null
+++ b/test_data/230825_M04034_0043_000000000-L6NVV/pipeline_info/nf_core_demultiplex_software_mqc_versions.yml
@@ -0,0 +1,5 @@
+BCLCONVERT:
+ bclconvert: 4.3.13
+Workflow:
+ nf-core/demultiplex: v1.6.1
+ Nextflow: 24.10.5
diff --git a/test_data/test_config/multiqc_flowcell_config.yaml b/test_data/test_config/multiqc_flowcell_config.yaml
deleted file mode 120000
index 5ed8412..0000000
--- a/test_data/test_config/multiqc_flowcell_config.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../config/tool_config/multiqc_flowcell_config.yaml
\ No newline at end of file
diff --git a/test_data/test_config/multiqc_flowcell_config.yaml b/test_data/test_config/multiqc_flowcell_config.yaml
new file mode 100644
index 0000000..5ed8412
--- /dev/null
+++ b/test_data/test_config/multiqc_flowcell_config.yaml
@@ -0,0 +1 @@
+../../config/tool_config/multiqc_flowcell_config.yaml
\ No newline at end of file
diff --git a/tests/integration_tests/test_validate_output.py b/tests/integration_tests/test_validate_output.py
index 42bf27f..65751f2 100644
--- a/tests/integration_tests/test_validate_output.py
+++ b/tests/integration_tests/test_validate_output.py
@@ -4,36 +4,35 @@
import os.path
import subprocess
from bs4 import BeautifulSoup
-import itertools
# Run pipeline in test mode, this is done once per test session
@pytest.fixture(scope="session", autouse=True)
-def run_pipeline(tmpdir_factory):
+def result_dir(request, tmpdir_factory):
+ demultiplexer = request.param
+
result_dir = tmpdir_factory.mktemp("results")
+ extra_profile = "test_bclconvert" if demultiplexer == "bclconvert" else "test"
+
subprocess.run(
[
"nextflow",
"run",
"main.nf",
"-profile",
- "dev,test,singularity",
+ f"dev,{extra_profile},singularity",
+ "--demultiplexer",
+ demultiplexer,
"--result_dir",
result_dir,
],
check=True,
)
- yield result_dir
-
-# Returns directory where pipeline results have been written.
-# All tests use this folder as input, veryfing that reports
-# have been generated as expected.
-@pytest.fixture
-def result_dir(run_pipeline):
- return run_pipeline
+ yield result_dir
+@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
def test_results_dirs_exist(result_dir):
flowcell_dir = os.path.join(result_dir, "flowcell_report")
projects_dir = os.path.join(result_dir, "projects")
@@ -42,6 +41,7 @@ def test_results_dirs_exist(result_dir):
assert os.path.isdir(projects_dir)
+@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
def test_project_dirs_exist(result_dir):
projects_dir = os.path.join(result_dir, "projects")
projects = ["Zymo", "Qiagen", "NoProject"]
@@ -50,6 +50,7 @@ def test_project_dirs_exist(result_dir):
assert os.path.isdir(os.path.join(projects_dir, project))
+@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
def test_flowcell_report_exist(result_dir):
flowcell_dir = os.path.join(result_dir, "flowcell_report")
report_path = os.path.join(
@@ -59,6 +60,7 @@ def test_flowcell_report_exist(result_dir):
assert os.path.isfile(report_path)
+@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
def test_project_reports_exist(result_dir):
projects_dir = os.path.join(result_dir, "projects")
projects = ["Zymo", "Qiagen", "NoProject"]
@@ -72,6 +74,7 @@ def test_project_reports_exist(result_dir):
assert os.path.isfile(report_path)
+@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
def check_sections_in_report(report_path, sections):
with open(report_path, "r") as html_file:
parser = BeautifulSoup(html_file.read(), "lxml")
@@ -80,6 +83,7 @@ def check_sections_in_report(report_path, sections):
assert len(hits) > 0
+@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
def test_all_sections_included_in_flowcell_report(result_dir):
flowcell_dir = os.path.join(result_dir, "flowcell_report")
report_path = os.path.join(
@@ -98,6 +102,26 @@ def test_all_sections_included_in_flowcell_report(result_dir):
check_sections_in_report(report_path, sections)
+@pytest.mark.parametrize("result_dir", ["bclconvert"], indirect=True)
+def test_all_sections_included_in_bclcovert_flowcell_report(result_dir):
+ flowcell_dir = os.path.join(result_dir, "flowcell_report")
+ report_path = os.path.join(
+ flowcell_dir, "230825_M04034_0043_000000000-L6NVV_multiqc_report.html"
+ )
+ sections = [
+ "general_stats",
+ "rrna",
+ "sequencing_metadata",
+ "bclconvert",
+ "interop",
+ "fastq_screen",
+ "fastqc",
+ ]
+
+ check_sections_in_report(report_path, sections)
+
+
+@pytest.mark.parametrize("result_dir", ["bcl2fastq"], indirect=True)
def test_all_sections_included_in_project_reports(result_dir):
projects_dir = os.path.join(result_dir, "projects")
projects = ["Zymo", "Qiagen", "NoProject"]
diff --git a/tests/unit_tests/test_get_metadata.py b/tests/unit_tests/test_get_metadata.py
index 49e034c..f5ee44e 100644
--- a/tests/unit_tests/test_get_metadata.py
+++ b/tests/unit_tests/test_get_metadata.py
@@ -13,7 +13,7 @@
@pytest.fixture
def runfolder_info():
- return RunfolderInfo("test_data/210510_M03910_0104_000000000-JHGJL", "Unaligned")
+ return RunfolderInfo("test_data/210510_M03910_0104_000000000-JHGJL")
@pytest.fixture
@@ -46,18 +46,6 @@ def test_read_run_parameters(runfolder_info):
assert len(run_parameters["RunParameters"]) == 63
-def test_read_stats_json(runfolder_info):
- stats_json = runfolder_info.read_stats_json("Unaligned")
- assert len(stats_json) == 6
-
-
-def test_bcl2fastq_version(runfolder_info):
- bcl2fastq_version = runfolder_info.get_bcl2fastq_version(
- "test_data/210510_M03910_0104_000000000-JHGJL"
- )
- assert bcl2fastq_version == "2.20.0.422"
-
-
def test_get_software_version(runfolder_info):
software_versions = runfolder_info.get_software_version(
"test_data/210510_M03910_0104_000000000-JHGJL"
@@ -76,9 +64,7 @@ def test_get_run_parameters(runfolder_info):
def test_run_parameters_novaseq_x():
- runfolder_info = RunfolderInfo(
- "test_data/20230125_lh00103_0036_A222VGWLT3", "Unaligned"
- )
+ runfolder_info = RunfolderInfo("test_data/20230125_lh00103_0036_A222VGWLT3")
filtered_run_parameters = runfolder_info.get_run_parameters()
assert filtered_run_parameters["Instrument type"] == "NovaSeqXPlus"
assert filtered_run_parameters["Control software"] == "control-software"
@@ -86,14 +72,10 @@ def test_run_parameters_novaseq_x():
def test_find_flowcell_type_novaseqx():
- runfolder_info = RunfolderInfo(
- "test_data/20230125_lh00103_0036_A222VGWLT3", "Unaligned"
- )
+ runfolder_info = RunfolderInfo("test_data/20230125_lh00103_0036_A222VGWLT3")
flowcell_type = runfolder_info.find_flowcell_type_novaseqx()
assert flowcell_type["Flowcell type"] == "10B"
- runfolder_info = RunfolderInfo(
- "test_data/210510_M03910_0104_000000000-JHGJL", "Unaligned"
- )
+ runfolder_info = RunfolderInfo("test_data/210510_M03910_0104_000000000-JHGJL")
flowcell_type = runfolder_info.find_flowcell_type_novaseqx()
assert flowcell_type is None