From b067d60ae53424c3b6e3d74229b78e17bf4d2246 Mon Sep 17 00:00:00 2001 From: johnegarza Date: Wed, 2 Sep 2020 21:38:02 -0500 Subject: [PATCH 1/7] beginning to modify somatic_exome pipeline for bam_readcount cram support --- .../sequence_align_and_tag_adapter.cwl | 6 +-- definitions/subworkflows/sequence_to_bqsr.cwl | 15 ++++---- definitions/tools/merge_crams.cwl | 37 +++++++++++++++++++ definitions/tools/name_sort_samtools.cwl | 30 +++++++++++++++ definitions/tools/sequence_align_and_tag.cwl | 8 ++-- 5 files changed, 82 insertions(+), 14 deletions(-) create mode 100644 definitions/tools/merge_crams.cwl create mode 100644 definitions/tools/name_sort_samtools.cwl diff --git a/definitions/subworkflows/sequence_align_and_tag_adapter.cwl b/definitions/subworkflows/sequence_align_and_tag_adapter.cwl index b9ba863bc..bb2e7b536 100644 --- a/definitions/subworkflows/sequence_align_and_tag_adapter.cwl +++ b/definitions/subworkflows/sequence_align_and_tag_adapter.cwl @@ -22,9 +22,9 @@ inputs: secondaryFiles: [.amb, .ann, .bwt, .pac, .sa] doc: 'bwa-indexed reference file' outputs: - aligned_bam: + aligned_cram: type: File - outputSource: align_and_tag/aligned_bam + outputSource: align_and_tag/aligned_cram steps: align_and_tag: run: ../tools/sequence_align_and_tag.cwl @@ -44,4 +44,4 @@ steps: valueFrom: $(self.readgroup) out: - [aligned_bam] + [aligned_cram] diff --git a/definitions/subworkflows/sequence_to_bqsr.cwl b/definitions/subworkflows/sequence_to_bqsr.cwl index 2337ac552..e65bbb3ff 100644 --- a/definitions/subworkflows/sequence_to_bqsr.cwl +++ b/definitions/subworkflows/sequence_to_bqsr.cwl @@ -50,20 +50,21 @@ steps: unaligned: unaligned reference: reference out: - [aligned_bam] + [aligned_cram] merge: - run: ../tools/merge_bams_samtools.cwl + run: ../tools/merge_crams.cwl in: - bams: align/aligned_bam + crams: align/aligned_cram name: final_name + reference: reference out: - [merged_bam] + [merged_cram] name_sort: - run: ../tools/name_sort.cwl + run: ../tools/name_sort_samtools.cwl in: - bam: merge/merged_bam + cram: merge/merged_cram out: - [name_sorted_bam] + [name_sorted_cram] mark_duplicates_and_sort: run: ../tools/mark_duplicates_and_sort.cwl in: diff --git a/definitions/tools/merge_crams.cwl b/definitions/tools/merge_crams.cwl new file mode 100644 index 000000000..08faf52d5 --- /dev/null +++ b/definitions/tools/merge_crams.cwl @@ -0,0 +1,37 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "Samtools: merge" +baseCommand: ["/opt/samtools/bin/samtools", "merge"] +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 4 + - class: DockerRequirement + dockerPull: "mgibio/samtools-cwl:1.0.0" + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml +arguments: ["$(inputs.name).merged.cram", { prefix: "--threads", valueFrom: $(runtime.cores) }, { prefix: '-O', valueFrom: "CRAM"}] +inputs: + crams: + type: File[] + inputBinding: + position: 1 + name: + type: string + reference: + type: + - string + - File + inputBinding: + position: 2 + prefix: '--reference' + +outputs: + merged_cram: + type: File + outputBinding: + glob: "$(inputs.name).merged.cram" + diff --git a/definitions/tools/name_sort_samtools.cwl b/definitions/tools/name_sort_samtools.cwl new file mode 100644 index 000000000..de8c83cfb --- /dev/null +++ b/definitions/tools/name_sort_samtools.cwl @@ -0,0 +1,30 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: ["/opt/samtools/bin/samtools", "sort", "-n"] +requirements: + - class: ResourceRequirement + ramMin: 26000 + coresMin: 8 + - class: DockerRequirement + dockerPull: "mgibio/samtools-cwl:1.0.0" + +arguments: + - prefix: -o + valueFrom: $(inputs.cram.nameroot).NameSorted.cram + - prefix: -@ + valueFrom: $(runtime.cores) + +inputs: + cram: + type: File + inputBinding: + position: 1 + +outputs: + sorted_cram: + type: File + outputBinding: + glob: $(inputs.cram.nameroot).NameSorted.cram + diff --git a/definitions/tools/sequence_align_and_tag.cwl b/definitions/tools/sequence_align_and_tag.cwl index 529baf21d..a7c0e8535 100644 --- a/definitions/tools/sequence_align_and_tag.cwl +++ b/definitions/tools/sequence_align_and_tag.cwl @@ -49,12 +49,12 @@ requirements: done if [[ "$MODE" == 'fastq' ]]; then - /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -R "$READGROUP" "$REFERENCE" "$FASTQ1" "$FASTQ2" | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -R "$READGROUP" "$REFERENCE" "$FASTQ1" "$FASTQ2" | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -C -T "$REFERENCE" -S /dev/stdin fi if [[ "$MODE" == 'bam' ]]; then - /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -C -T "$REFERENCE" -S /dev/stdin fi -stdout: "refAlign.bam" +stdout: "refAlign.cram" arguments: - valueFrom: $(runtime.cores) position: 5 @@ -86,5 +86,5 @@ inputs: prefix: '-r' doc: 'bwa-indexed reference file' outputs: - aligned_bam: + aligned_cram: type: stdout From 275b5c191556c4595463b5b1f4cb32b78f3f1dbb Mon Sep 17 00:00:00 2001 From: John Garza Date: Fri, 4 Sep 2020 17:40:41 -0500 Subject: [PATCH 2/7] Update all parts of workflow to accept cram inputs when possible --- definitions/pipelines/alignment_exome.cwl | 10 +-- definitions/pipelines/detect_variants.cwl | 34 +++++------ definitions/pipelines/somatic_exome.cwl | 61 +++++-------------- definitions/subworkflows/docm_cle.cwl | 16 ++--- definitions/subworkflows/filter_vcf.cwl | 6 +- definitions/subworkflows/fp_filter.cwl | 13 +++- definitions/subworkflows/hs_metrics.cwl | 10 +-- definitions/subworkflows/mutect.cwl | 14 ++--- definitions/subworkflows/pindel.cwl | 14 ++--- definitions/subworkflows/pindel_cat.cwl | 26 ++++++-- definitions/subworkflows/qc_exome.cwl | 24 +++++--- definitions/subworkflows/sequence_to_bqsr.cwl | 24 ++++---- .../strelka_and_post_processing.cwl | 14 ++--- definitions/subworkflows/varscan.cwl | 12 ++-- .../varscan_pre_and_post_processing.cwl | 14 ++--- definitions/tools/apply_bqsr.cwl | 12 ++-- definitions/tools/bam_readcount.cwl | 4 +- definitions/tools/bqsr.cwl | 4 +- definitions/tools/cnvkit_batch.cwl | 26 ++++---- .../collect_alignment_summary_metrics.cwl | 8 +-- definitions/tools/collect_hs_metrics.cwl | 16 ++--- .../tools/collect_insert_size_metrics.cwl | 12 ++-- definitions/tools/concordance.cwl | 12 ++-- .../tools/docm_gatk_haplotype_caller.cwl | 22 +++---- definitions/tools/filter_vcf_docm.cwl | 4 +- definitions/tools/filter_vcf_mapq0.cwl | 4 +- .../tools/mark_duplicates_and_sort.cwl | 32 +++++----- definitions/tools/mutect.cwl | 16 ++--- definitions/tools/name_sort_samtools.cwl | 10 ++- definitions/tools/samtools_flagstat.cwl | 12 +++- definitions/tools/strelka.cwl | 8 +-- definitions/tools/varscan_somatic.cwl | 18 +++--- 32 files changed, 265 insertions(+), 247 deletions(-) diff --git a/definitions/pipelines/alignment_exome.cwl b/definitions/pipelines/alignment_exome.cwl index 829adeabf..ed305ea71 100644 --- a/definitions/pipelines/alignment_exome.cwl +++ b/definitions/pipelines/alignment_exome.cwl @@ -55,10 +55,10 @@ inputs: qc_minimum_base_quality: type: int? outputs: - bam: + cram: type: File - outputSource: alignment/final_bam - secondaryFiles: [.bai, ^.bai] + outputSource: alignment/final_cram + secondaryFiles: [.crai, ^.crai] mark_duplicates_metrics: type: File outputSource: alignment/mark_duplicates_metrics_file @@ -110,11 +110,11 @@ steps: dbsnp_vcf: dbsnp_vcf bqsr_intervals: bqsr_intervals final_name: final_name - out: [final_bam,mark_duplicates_metrics_file] + out: [final_cram,mark_duplicates_metrics_file] qc: run: ../subworkflows/qc_exome.cwl in: - bam: alignment/final_bam + cram: alignment/final_cram reference: reference bait_intervals: bait_intervals target_intervals: target_intervals diff --git a/definitions/pipelines/detect_variants.cwl b/definitions/pipelines/detect_variants.cwl index 63c14d0b9..be32c3bcf 100644 --- a/definitions/pipelines/detect_variants.cwl +++ b/definitions/pipelines/detect_variants.cwl @@ -16,12 +16,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai,^.bai] - normal_bam: + secondaryFiles: [.crai,^.crai] + normal_cram: type: File - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] roi_intervals: type: File label: "roi_intervals: regions of interest in which variants will be called" @@ -189,8 +189,8 @@ steps: run: ../subworkflows/mutect.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals scatter_count: mutect_scatter_count tumor_sample_name: tumor_sample_name @@ -200,8 +200,8 @@ steps: run: ../subworkflows/strelka_and_post_processing.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals exome_mode: strelka_exome_mode cpu_reserved: strelka_cpu_reserved @@ -213,8 +213,8 @@ steps: run: ../subworkflows/varscan_pre_and_post_processing.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals strand_filter: varscan_strand_filter min_coverage: varscan_min_coverage @@ -229,8 +229,8 @@ steps: run: ../subworkflows/pindel.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals insert_size: pindel_insert_size tumor_sample_name: tumor_sample_name @@ -241,8 +241,8 @@ steps: run: ../subworkflows/docm_cle.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram docm_vcf: docm_vcf interval_list: roi_intervals filter_docm_variants: filter_docm_variants @@ -300,7 +300,7 @@ steps: vcf: annotate_variants/annotated_vcf sample: tumor_sample_name reference_fasta: reference - bam: tumor_bam + cram: tumor_cram min_base_quality: readcount_minimum_base_quality min_mapping_quality: readcount_minimum_mapping_quality out: @@ -311,7 +311,7 @@ steps: vcf: annotate_variants/annotated_vcf sample: normal_sample_name reference_fasta: reference - bam: normal_bam + cram: normal_cram min_base_quality: readcount_minimum_base_quality min_mapping_quality: readcount_minimum_mapping_quality out: @@ -352,7 +352,7 @@ steps: filter_mapq0_threshold: filter_mapq0_threshold filter_somatic_llr_threshold: filter_somatic_llr_threshold filter_minimum_depth: filter_minimum_depth - tumor_bam: tumor_bam + tumor_cram: tumor_cram do_cle_vcf_filter: cle_vcf_filter reference: reference normal_sample_name: normal_sample_name diff --git a/definitions/pipelines/somatic_exome.cwl b/definitions/pipelines/somatic_exome.cwl index 30424d3d6..7dd6d3d84 100644 --- a/definitions/pipelines/somatic_exome.cwl +++ b/definitions/pipelines/somatic_exome.cwl @@ -266,7 +266,7 @@ inputs: outputs: tumor_cram: type: File - outputSource: tumor_index_cram/indexed_cram + outputSource: tumor_alignment_and_qc/cram tumor_mark_duplicates_metrics: type: File outputSource: tumor_alignment_and_qc/mark_duplicates_metrics @@ -305,7 +305,7 @@ outputs: outputSource: tumor_alignment_and_qc/verify_bam_id_depth normal_cram: type: File - outputSource: normal_index_cram/indexed_cram + outputSource: normal_alignment_and_qc/cram normal_mark_duplicates_metrics: type: File outputSource: normal_alignment_and_qc/mark_duplicates_metrics @@ -483,11 +483,9 @@ steps: picard_metric_accumulation_level: picard_metric_accumulation_level qc_minimum_mapping_quality: qc_minimum_mapping_quality qc_minimum_base_quality: qc_minimum_base_quality - final_name: - source: tumor_name - valueFrom: "$(self).bam" + final_name: tumor_name out: - [bam, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] + [cram, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] normal_alignment_and_qc: run: alignment_exome.cwl in: @@ -507,17 +505,15 @@ steps: picard_metric_accumulation_level: picard_metric_accumulation_level qc_minimum_mapping_quality: qc_minimum_mapping_quality qc_minimum_base_quality: qc_minimum_base_quality - final_name: - source: normal_name - valueFrom: "$(self).bam" + final_name: normal_name out: - [bam, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] + [cram, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] concordance: run: ../tools/concordance.cwl in: reference: reference - bam_1: tumor_alignment_and_qc/bam - bam_2: normal_alignment_and_qc/bam + cram_1: tumor_alignment_and_qc/cram + cram_2: normal_alignment_and_qc/cram vcf: somalier_vcf out: [somalier_pairs, somalier_samples] @@ -532,8 +528,8 @@ steps: run: detect_variants.cwl in: reference: reference - tumor_bam: tumor_alignment_and_qc/bam - normal_bam: normal_alignment_and_qc/bam + tumor_cram: tumor_alignment_and_qc/cram + normal_cram: normal_alignment_and_qc/cram roi_intervals: pad_target_intervals/expanded_interval_list strelka_exome_mode: default: true @@ -567,14 +563,14 @@ steps: cnvkit: run: ../tools/cnvkit_batch.cwl in: - tumor_bam: tumor_alignment_and_qc/bam + tumor_cram: tumor_alignment_and_qc/cram reference: - source: [normal_alignment_and_qc/bam, reference] + source: [normal_alignment_and_qc/cram, reference] valueFrom: | ${ var normal = self[0]; var fasta = self[1]; - return {'normal_bam': normal, 'fasta_file': fasta}; + return {'normal_cram': normal, 'fasta_file': fasta}; } bait_intervals: bait_intervals out: @@ -582,37 +578,12 @@ steps: manta: run: ../tools/manta_somatic.cwl in: - normal_bam: normal_alignment_and_qc/bam - tumor_bam: tumor_alignment_and_qc/bam + normal_bam: normal_alignment_and_qc/cram + tumor_bam: tumor_alignment_and_qc/cram reference: reference call_regions: manta_call_regions non_wgs: manta_non_wgs output_contigs: manta_output_contigs out: [diploid_variants, somatic_variants, all_candidates, small_candidates, tumor_only_variants] - tumor_bam_to_cram: - run: ../tools/bam_to_cram.cwl - in: - bam: tumor_alignment_and_qc/bam - reference: reference - out: - [cram] - tumor_index_cram: - run: ../tools/index_cram.cwl - in: - cram: tumor_bam_to_cram/cram - out: - [indexed_cram] - normal_bam_to_cram: - run: ../tools/bam_to_cram.cwl - in: - bam: normal_alignment_and_qc/bam - reference: reference - out: - [cram] - normal_index_cram: - run: ../tools/index_cram.cwl - in: - cram: normal_bam_to_cram/cram - out: - [indexed_cram] + diff --git a/definitions/subworkflows/docm_cle.cwl b/definitions/subworkflows/docm_cle.cwl index c54817a63..bd00f209d 100644 --- a/definitions/subworkflows/docm_cle.cwl +++ b/definitions/subworkflows/docm_cle.cwl @@ -11,12 +11,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai] - normal_bam: + secondaryFiles: [^.crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] docm_vcf: type: File secondaryFiles: [.tbi] @@ -34,8 +34,8 @@ steps: run: ../tools/docm_gatk_haplotype_caller.cwl in: reference: reference - bam: tumor_bam - normal_bam: normal_bam + cram: tumor_cram + normal_cram: normal_cram docm_vcf: docm_vcf interval_list: interval_list out: @@ -62,8 +62,8 @@ steps: run: ../tools/filter_vcf_docm.cwl in: docm_raw_variants: decompose/decomposed_vcf - normal_bam: normal_bam - tumor_bam: tumor_bam + normal_cram: normal_cram + tumor_cram: tumor_cram filter_docm_variants: filter_docm_variants out: [docm_filtered_variants] diff --git a/definitions/subworkflows/filter_vcf.cwl b/definitions/subworkflows/filter_vcf.cwl index daf7931bf..5d710ae54 100644 --- a/definitions/subworkflows/filter_vcf.cwl +++ b/definitions/subworkflows/filter_vcf.cwl @@ -17,9 +17,9 @@ inputs: type: float gnomad_field_name: type: string - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai] + secondaryFiles: [.crai] do_cle_vcf_filter: type: boolean filter_somatic_llr_threshold: @@ -63,7 +63,7 @@ steps: run: ../tools/filter_vcf_mapq0.cwl in: vcf: filter_vcf_gnomADe_allele_freq/filtered_vcf - tumor_bam: tumor_bam + tumor_cram: tumor_cram threshold: filter_mapq0_threshold reference: reference out: diff --git a/definitions/subworkflows/fp_filter.cwl b/definitions/subworkflows/fp_filter.cwl index 0b9a96bca..3f3ef1bb3 100644 --- a/definitions/subworkflows/fp_filter.cwl +++ b/definitions/subworkflows/fp_filter.cwl @@ -7,9 +7,9 @@ requirements: - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: - bam: + cram: type: File - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] reference: type: - string @@ -59,11 +59,18 @@ steps: vcf: decompose_variants/decomposed_vcf out: [indexed_vcf] + cram_to_bam: + run: cram_to_bam_and_index + in: + cram: cram + reference: reference + out: + [bam] fp_filter: run: ../tools/fp_filter.cwl in: reference: reference - bam: bam + bam: cram_to_bam/bam vcf: index/indexed_vcf sample_name: sample_name min_var_freq: min_var_freq diff --git a/definitions/subworkflows/hs_metrics.cwl b/definitions/subworkflows/hs_metrics.cwl index d89348bf3..c5d663ce5 100644 --- a/definitions/subworkflows/hs_metrics.cwl +++ b/definitions/subworkflows/hs_metrics.cwl @@ -11,9 +11,9 @@ requirements: - class: StepInputExpressionRequirement - class: SubworkflowFeatureRequirement inputs: - bam: + cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] minimum_mapping_quality: type: int? minimum_base_quality: @@ -51,7 +51,7 @@ steps: scatter: [bait_intervals, target_intervals, output_prefix] scatterMethod: dotproduct in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" @@ -77,7 +77,7 @@ steps: scatter: [bait_intervals, target_intervals, output_prefix] scatterMethod: dotproduct in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" @@ -103,7 +103,7 @@ steps: scatter: [bait_intervals, target_intervals, output_prefix] scatterMethod: dotproduct in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" diff --git a/definitions/subworkflows/mutect.cwl b/definitions/subworkflows/mutect.cwl index c35818127..4f4b7b660 100644 --- a/definitions/subworkflows/mutect.cwl +++ b/definitions/subworkflows/mutect.cwl @@ -14,12 +14,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai, .bai] - normal_bam: + secondaryFiles: [^.crai, .crai] + normal_cram: type: File? - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] interval_list: type: File scatter_count: @@ -47,8 +47,8 @@ steps: run: ../tools/mutect.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: split_interval_list/split_interval_lists out: [vcf] @@ -68,7 +68,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: index/indexed_vcf variant_caller: valueFrom: "mutect" diff --git a/definitions/subworkflows/pindel.cwl b/definitions/subworkflows/pindel.cwl index a6d761228..cce3a790e 100644 --- a/definitions/subworkflows/pindel.cwl +++ b/definitions/subworkflows/pindel.cwl @@ -14,12 +14,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai, ^.bai] - normal_bam: + secondaryFiles: [.crai, ^.crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] interval_list: type: File insert_size: @@ -54,8 +54,8 @@ steps: run: pindel_cat.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram region_file: split_interval_list_to_bed/split_beds insert_size: insert_size tumor_sample_name: tumor_sample_name @@ -103,7 +103,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: reindex/indexed_vcf variant_caller: valueFrom: "pindel" diff --git a/definitions/subworkflows/pindel_cat.cwl b/definitions/subworkflows/pindel_cat.cwl index a7f19aae6..6f9d4bb5c 100644 --- a/definitions/subworkflows/pindel_cat.cwl +++ b/definitions/subworkflows/pindel_cat.cwl @@ -12,12 +12,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: ["^.bai"] - normal_bam: + secondaryFiles: ["^.crai"] + normal_cram: type: File - secondaryFiles: ["^.bai"] + secondaryFiles: ["^.crai"] region_file: type: File insert_size: @@ -32,12 +32,26 @@ outputs: type: File outputSource: cat/pindel_out steps: + tumor_cram_to_bam: + run: cram_to_bam_and_index.cwl + in: + reference: reference + cram: tumor_cram + out: + [bam] + normal_cram_to_bam: + run: cram_to_bam_and_index.cwl + in: + reference: reference + cram: normal_cram + out: + [bam] pindel: run: ../tools/pindel.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_bam: tumor_cram_to_bam/bam + normal_bam: normal_cram_to_bam/bam insert_size: insert_size region_file: region_file tumor_sample_name: tumor_sample_name diff --git a/definitions/subworkflows/qc_exome.cwl b/definitions/subworkflows/qc_exome.cwl index 54dc21dcb..dee450275 100644 --- a/definitions/subworkflows/qc_exome.cwl +++ b/definitions/subworkflows/qc_exome.cwl @@ -10,9 +10,9 @@ requirements: - class: StepInputExpressionRequirement - class: SubworkflowFeatureRequirement inputs: - bam: + cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -79,7 +79,7 @@ steps: collect_insert_size_metrics: run: ../tools/collect_insert_size_metrics.cwl in: - bam: bam + cram: cram reference: reference metric_accumulation_level: picard_metric_accumulation_level out: @@ -87,7 +87,7 @@ steps: collect_alignment_summary_metrics: run: ../tools/collect_alignment_summary_metrics.cwl in: - bam: bam + cram: cram reference: reference metric_accumulation_level: picard_metric_accumulation_level out: @@ -95,7 +95,7 @@ steps: collect_roi_hs_metrics: run: ../tools/collect_hs_metrics.cwl in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" @@ -114,7 +114,7 @@ steps: collect_detailed_hs_metrics: run: hs_metrics.cwl in: - bam: bam + cram: cram minimum_mapping_quality: minimum_mapping_quality minimum_base_quality: minimum_base_quality per_base_intervals: per_base_intervals @@ -126,7 +126,8 @@ steps: samtools_flagstat: run: ../tools/samtools_flagstat.cwl in: - bam: bam + cram: cram + reference: reference out: [flagstats] select_variants: run: ../tools/select_variants.cwl @@ -136,10 +137,17 @@ steps: interval_list: target_intervals out: [filtered_vcf] + cram_to_bam: + run: cram_to_bam_and_index.cwl + in: + cram: cram + reference: reference + out: + [bam] verify_bam_id: run: ../tools/verify_bam_id.cwl in: - bam: bam + bam: cram_to_bam/bam vcf: select_variants/filtered_vcf out: [verify_bam_id_metrics, verify_bam_id_depth] diff --git a/definitions/subworkflows/sequence_to_bqsr.cwl b/definitions/subworkflows/sequence_to_bqsr.cwl index d6e02586f..6c51edbe4 100644 --- a/definitions/subworkflows/sequence_to_bqsr.cwl +++ b/definitions/subworkflows/sequence_to_bqsr.cwl @@ -39,9 +39,9 @@ inputs: type: File secondaryFiles: [.tbi] outputs: - final_bam: + final_cram: type: File - outputSource: index_bam/indexed_bam + outputSource: index_cram/indexed_cram secondaryFiles: [.bai, ^.bai] mark_duplicates_metrics_file: type: File @@ -69,19 +69,21 @@ steps: run: ../tools/name_sort_samtools.cwl in: cram: merge/merged_cram + reference: reference out: [name_sorted_cram] mark_duplicates_and_sort: run: ../tools/mark_duplicates_and_sort.cwl in: - bam: name_sort/name_sorted_bam + cram: name_sort/name_sorted_cram + reference: reference out: - [sorted_bam, metrics_file] + [sorted_cram, metrics_file] bqsr: run: ../tools/bqsr.cwl in: reference: reference - bam: mark_duplicates_and_sort/sorted_bam + cram: mark_duplicates_and_sort/sorted_cram intervals: bqsr_intervals known_sites: [dbsnp_vcf, mills, known_indels] out: @@ -90,14 +92,14 @@ steps: run: ../tools/apply_bqsr.cwl in: reference: reference - bam: mark_duplicates_and_sort/sorted_bam + cram: mark_duplicates_and_sort/sorted_cram bqsr_table: bqsr/bqsr_table output_name: final_name out: - [bqsr_bam] - index_bam: - run: ../tools/index_bam.cwl + [bqsr_cram] + index_cram: + run: ../tools/index_cram.cwl in: - bam: apply_bqsr/bqsr_bam + cram: apply_bqsr/bqsr_cram out: - [indexed_bam] + [indexed_cram] diff --git a/definitions/subworkflows/strelka_and_post_processing.cwl b/definitions/subworkflows/strelka_and_post_processing.cwl index d877dfe0a..65c1d1a7c 100644 --- a/definitions/subworkflows/strelka_and_post_processing.cwl +++ b/definitions/subworkflows/strelka_and_post_processing.cwl @@ -9,12 +9,12 @@ requirements: - class: MultipleInputFeatureRequirement - class: StepInputExpressionRequirement inputs: - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai,^.bai] - normal_bam: + secondaryFiles: [.crai,^.crai] + normal_cram: type: File - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] reference: type: - string @@ -44,8 +44,8 @@ steps: strelka: run: ../tools/strelka.cwl in: - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram reference: reference exome_mode: exome_mode cpu_reserved: cpu_reserved @@ -100,7 +100,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: region_filter/filtered_vcf variant_caller: valueFrom: "strelka" diff --git a/definitions/subworkflows/varscan.cwl b/definitions/subworkflows/varscan.cwl index 22d72a3a6..8df4ed5e6 100644 --- a/definitions/subworkflows/varscan.cwl +++ b/definitions/subworkflows/varscan.cwl @@ -9,12 +9,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai] - normal_bam: + secondaryFiles: [^.crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] roi_bed: type: File? strand_filter: @@ -75,8 +75,8 @@ steps: run: ../tools/varscan_somatic.cwl in: reference: reference - normal_bam: normal_bam - tumor_bam: tumor_bam + normal_cram: normal_cram + tumor_cram: tumor_cram roi_bed: roi_bed strand_filter: strand_filter min_coverage: min_coverage diff --git a/definitions/subworkflows/varscan_pre_and_post_processing.cwl b/definitions/subworkflows/varscan_pre_and_post_processing.cwl index a1bc4fda2..a5cea8c60 100644 --- a/definitions/subworkflows/varscan_pre_and_post_processing.cwl +++ b/definitions/subworkflows/varscan_pre_and_post_processing.cwl @@ -13,12 +13,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai, .bai] - normal_bam: + secondaryFiles: [^.crai, .crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] interval_list: type: File strand_filter: @@ -59,8 +59,8 @@ steps: run: varscan.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram roi_bed: intervals_to_bed/interval_bed strand_filter: strand_filter min_coverage: min_coverage @@ -155,7 +155,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: index/indexed_vcf min_var_freq: min_var_freq variant_caller: diff --git a/definitions/tools/apply_bqsr.cwl b/definitions/tools/apply_bqsr.cwl index 7d8590ea4..b47543784 100644 --- a/definitions/tools/apply_bqsr.cwl +++ b/definitions/tools/apply_bqsr.cwl @@ -5,7 +5,7 @@ class: CommandLineTool label: 'apply BQSR' baseCommand: ["/usr/bin/java", "-Xmx16g", "-jar", "/opt/GenomeAnalysisTK.jar", "-T", "PrintReads"] arguments: - ["-o", { valueFrom: $(runtime.outdir)/$(inputs.output_name).bam }, + ["-o", { valueFrom: $(runtime.outdir)/$(inputs.output_name).cram }, "-preserveQ", "6", "-SQQ", "10", "-SQQ", "20", @@ -26,12 +26,12 @@ inputs: inputBinding: prefix: "-R" position: 1 - bam: + cram: type: File inputBinding: prefix: "-I" position: 2 - secondaryFiles: [.bai] + secondaryFiles: [.crai] bqsr_table: type: File inputBinding: @@ -41,8 +41,8 @@ inputs: type: string? default: 'final' outputs: - bqsr_bam: + bqsr_cram: type: File outputBinding: - glob: $(inputs.output_name).bam - secondaryFiles: [^.bai] + glob: $(inputs.output_name).cram + secondaryFiles: [^.crai] diff --git a/definitions/tools/bam_readcount.cwl b/definitions/tools/bam_readcount.cwl index 8e6948fcc..a6c049849 100644 --- a/definitions/tools/bam_readcount.cwl +++ b/definitions/tools/bam_readcount.cwl @@ -143,11 +143,11 @@ inputs: secondaryFiles: [.fai, ^.dict] inputBinding: position: -6 - bam: + cram: type: File inputBinding: position: -5 - secondaryFiles: [.bai] + secondaryFiles: [.crai] prefix: type: string? default: 'NOPREFIX' diff --git a/definitions/tools/bqsr.cwl b/definitions/tools/bqsr.cwl index 897fc61ec..bd8d18529 100644 --- a/definitions/tools/bqsr.cwl +++ b/definitions/tools/bqsr.cwl @@ -25,12 +25,12 @@ inputs: inputBinding: prefix: "-R" position: 2 - bam: + cram: type: File inputBinding: prefix: "-I" position: 3 - secondaryFiles: [.bai] + secondaryFiles: [.crai] known_sites: type: type: array diff --git a/definitions/tools/cnvkit_batch.cwl b/definitions/tools/cnvkit_batch.cwl index d8118d171..e111143cb 100644 --- a/definitions/tools/cnvkit_batch.cwl +++ b/definitions/tools/cnvkit_batch.cwl @@ -15,7 +15,7 @@ requirements: - class: InlineJavascriptRequirement arguments: [{ valueFrom: "$((inputs.reference.hasOwnProperty('cnn_file'))? null : '--normal')" }] inputs: - tumor_bam: + tumor_cram: type: File inputBinding: position: -1 @@ -45,11 +45,11 @@ inputs: inputBinding: position: 2 prefix: "--fasta" - normal_bam: + normal_cram: type: File? inputBinding: position: 1 - doc: "Normal samples (.bam) used to construct the pooled, paired, or flat reference. If this option is used but no filenames are given, a 'flat' reference will be built. Otherwise, all filenames following this option will be used." + doc: "Normal samples (.cram) used to construct the pooled, paired, or flat reference. If this option is used but no filenames are given, a 'flat' reference will be built. Otherwise, all filenames following this option will be used." access: type: File? inputBinding: @@ -125,8 +125,8 @@ outputs: glob: | ${ var glob_base = ".antitargetcoverage.cnn"; - if (inputs.normal_bam) { - glob_base = inputs.normal_bam.nameroot + glob_base; + if (inputs.normal_cram) { + glob_base = inputs.normal_cram.nameroot + glob_base; } return glob_base; } @@ -136,8 +136,8 @@ outputs: glob: | ${ var glob_base = ".targetcoverage.cnn"; - if (inputs.normal_bam) { - glob_base = inputs.normal_bam.nameroot + glob_base; + if (inputs.normal_cram) { + glob_base = inputs.normal_cram.nameroot + glob_base; } return glob_base; } @@ -148,24 +148,24 @@ outputs: cn_diagram: type: File? outputBinding: - glob: $(inputs.tumor_bam.nameroot)-diagram.pdf + glob: $(inputs.tumor_cram.nameroot)-diagram.pdf cn_scatter_plot: type: File? outputBinding: - glob: $(inputs.tumor_bam.nameroot)-scatter.pdf + glob: $(inputs.tumor_cram.nameroot)-scatter.pdf tumor_antitarget_coverage: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).antitargetcoverage.cnn + glob: $(inputs.tumor_cram.nameroot).antitargetcoverage.cnn tumor_target_coverage: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).targetcoverage.cnn + glob: $(inputs.tumor_cram.nameroot).targetcoverage.cnn tumor_bin_level_ratios: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).cnr + glob: $(inputs.tumor_cram.nameroot).cnr tumor_segmented_ratios: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).cns + glob: $(inputs.tumor_cram.nameroot).cns diff --git a/definitions/tools/collect_alignment_summary_metrics.cwl b/definitions/tools/collect_alignment_summary_metrics.cwl index 200106c40..bef6011e2 100644 --- a/definitions/tools/collect_alignment_summary_metrics.cwl +++ b/definitions/tools/collect_alignment_summary_metrics.cwl @@ -5,18 +5,18 @@ class: CommandLineTool label: "collect alignment summary metrics" baseCommand: ["/usr/bin/java", "-Xmx16g", "-jar", "/usr/picard/picard.jar", "CollectAlignmentSummaryMetrics"] arguments: - ["OUTPUT=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).AlignmentSummaryMetrics.txt }] + ["OUTPUT=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).AlignmentSummaryMetrics.txt }] requirements: - class: ResourceRequirement ramMin: 18000 - class: DockerRequirement dockerPull: "mgibio/picard-cwl:2.18.1" inputs: - bam: + cram: type: File inputBinding: prefix: "INPUT=" - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -32,4 +32,4 @@ outputs: alignment_summary_metrics: type: File outputBinding: - glob: "$(inputs.bam.nameroot).AlignmentSummaryMetrics.txt" + glob: "$(inputs.cram.nameroot).AlignmentSummaryMetrics.txt" diff --git a/definitions/tools/collect_hs_metrics.cwl b/definitions/tools/collect_hs_metrics.cwl index a58b80f47..ef2524ce4 100644 --- a/definitions/tools/collect_hs_metrics.cwl +++ b/definitions/tools/collect_hs_metrics.cwl @@ -5,7 +5,7 @@ class: CommandLineTool label: "collect HS metrics" baseCommand: ["/usr/bin/java", "-Xmx48g", "-jar", "/usr/picard/picard.jar", "CollectHsMetrics"] arguments: - ["O=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).$(inputs.output_prefix)-HsMetrics.txt }] + ["O=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).$(inputs.output_prefix)-HsMetrics.txt }] requirements: - class: ResourceRequirement ramMin: 52000 @@ -14,11 +14,11 @@ requirements: dockerPull: "mgibio/picard-cwl:2.18.1" - class: StepInputExpressionRequirement inputs: - bam: + cram: type: File inputBinding: prefix: "I=" - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -45,7 +45,7 @@ inputs: valueFrom: | ${ if(self) { - return inputs.bam.nameroot + "." + inputs.output_prefix + "-PerTargetCoverage.txt" + return inputs.cram.nameroot + "." + inputs.output_prefix + "-PerTargetCoverage.txt" } else { return false; } @@ -57,7 +57,7 @@ inputs: valueFrom: | ${ if(self) { - return inputs.bam.nameroot + "." + inputs.output_prefix + "-PerBaseCoverage.txt" + return inputs.cram.nameroot + "." + inputs.output_prefix + "-PerBaseCoverage.txt" } else { return false; } @@ -79,12 +79,12 @@ outputs: hs_metrics: type: File outputBinding: - glob: "$(inputs.bam.nameroot).$(inputs.output_prefix)-HsMetrics.txt" + glob: "$(inputs.cram.nameroot).$(inputs.output_prefix)-HsMetrics.txt" per_target_coverage_metrics: type: File? outputBinding: - glob: "$(inputs.bam.nameroot).$(inputs.output_prefix)-PerTargetCoverage.txt" + glob: "$(inputs.cram.nameroot).$(inputs.output_prefix)-PerTargetCoverage.txt" per_base_coverage_metrics: type: File? outputBinding: - glob: "$(inputs.bam.nameroot).$(inputs.output_prefix)-PerBaseCoverage.txt" + glob: "$(inputs.cram.nameroot).$(inputs.output_prefix)-PerBaseCoverage.txt" diff --git a/definitions/tools/collect_insert_size_metrics.cwl b/definitions/tools/collect_insert_size_metrics.cwl index f86eec32a..a3370bb34 100644 --- a/definitions/tools/collect_insert_size_metrics.cwl +++ b/definitions/tools/collect_insert_size_metrics.cwl @@ -5,19 +5,19 @@ class: CommandLineTool label: "collect insert size metrics" baseCommand: ["/usr/bin/java", "-Xmx16g", "-jar", "/usr/picard/picard.jar", "CollectInsertSizeMetrics"] arguments: - ["O=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).InsertSizeMetrics.txt }, - "H=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).InsertSizeHistogram.pdf }] + ["O=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).InsertSizeMetrics.txt }, + "H=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).InsertSizeHistogram.pdf }] requirements: - class: ResourceRequirement ramMin: 18000 - class: DockerRequirement dockerPull: "mgibio/picard-cwl:2.18.1" inputs: - bam: + cram: type: File inputBinding: prefix: "I=" - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -33,8 +33,8 @@ outputs: insert_size_metrics: type: File outputBinding: - glob: "$(inputs.bam.nameroot).InsertSizeMetrics.txt" + glob: "$(inputs.cram.nameroot).InsertSizeMetrics.txt" insert_size_histogram: type: File outputBinding: - glob: "$(inputs.bam.nameroot).InsertSizeHistogram.pdf" + glob: "$(inputs.cram.nameroot).InsertSizeHistogram.pdf" diff --git a/definitions/tools/concordance.cwl b/definitions/tools/concordance.cwl index 7f03cf1bd..591bfab9c 100644 --- a/definitions/tools/concordance.cwl +++ b/definitions/tools/concordance.cwl @@ -26,21 +26,21 @@ inputs: inputBinding: prefix: "-f" position: 2 - bam_1: + cram_1: type: File inputBinding: position: 3 - secondaryFiles: [.bai] - bam_2: + secondaryFiles: [.crai] + cram_2: type: File inputBinding: position: 4 - secondaryFiles: [.bai] - bam_3: + secondaryFiles: [.crai] + cram_3: type: File? inputBinding: position: 5 - secondaryFiles: [.bai] + secondaryFiles: [.crai] outputs: somalier_pairs: type: File diff --git a/definitions/tools/docm_gatk_haplotype_caller.cwl b/definitions/tools/docm_gatk_haplotype_caller.cwl index 0eb3cf5e5..599660e52 100644 --- a/definitions/tools/docm_gatk_haplotype_caller.cwl +++ b/definitions/tools/docm_gatk_haplotype_caller.cwl @@ -17,26 +17,26 @@ requirements: set -o errexit # Running haplotype caller using the newly created interval list - if [[ "$#" == 5 ]];then # If normal_bam is passed. + if [[ "$#" == 5 ]];then # If normal_cram is passed. # explicitly capturing variables reference=$1 - normal_bam=$2 - tumor_bam=$3 + normal_cram=$2 + tumor_cram=$3 docm_vcf=$4 interval_list=$5 # Chaning the interval_list to a new docm_interval_list that spans the docm regions by 200bp cat $interval_list | grep '^@' > docm.interval_list # Extracting the header from the interval_list zcat $docm_vcf | grep ^chr | awk '{FS = "\t";OFS = "\t";print $1,$2-100,$2+100,"+",$1"_"$2-100"_"$2+100}' >> docm.interval_list # Extracting the docm regions with a 100bp flanking region on both directions - /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $normal_bam -I $tumor_bam --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf - else # If normal_bam is not passed + /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $normal_cram -I $tumor_cram --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf + else # If normal_cram is not passed reference=$1 - tumor_bam=$2 + tumor_cram=$2 docm_vcf=$3 interval_list=$4 # Chaning the interval_list to a new docm_interval_list that spans the docm regions by 200bp cat $interval_list | grep '^@' > docm.interval_list # Extracting the header from the interval_list zcat $docm_vcf | grep ^chr | awk '{FS = "\t";OFS = "\t";print $1,$2-100,$2+100,"+",$1"_"$2-100"_"$2+100}' >> docm.interval_list # Extracting the docm regions with a 100bp flanking region on both directions - /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $tumor_bam --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf + /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $tumor_cram --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf fi inputs: @@ -47,16 +47,16 @@ inputs: secondaryFiles: [.fai, ^.dict] inputBinding: position: 1 - normal_bam: + normal_cram: type: File? inputBinding: position: 2 - secondaryFiles: [^.bai] - bam: + secondaryFiles: [^.crai] + cram: type: File inputBinding: position: 3 - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] docm_vcf: type: File inputBinding: diff --git a/definitions/tools/filter_vcf_docm.cwl b/definitions/tools/filter_vcf_docm.cwl index 84911a700..8ec453687 100644 --- a/definitions/tools/filter_vcf_docm.cwl +++ b/definitions/tools/filter_vcf_docm.cwl @@ -107,11 +107,11 @@ inputs: type: File inputBinding: position: -4 - normal_bam: + normal_cram: type: File inputBinding: position: -3 - tumor_bam: + tumor_cram: type: File inputBinding: position: -2 diff --git a/definitions/tools/filter_vcf_mapq0.cwl b/definitions/tools/filter_vcf_mapq0.cwl index 964b5b83a..b2aa7a947 100644 --- a/definitions/tools/filter_vcf_mapq0.cwl +++ b/definitions/tools/filter_vcf_mapq0.cwl @@ -17,11 +17,11 @@ inputs: type: File inputBinding: position: 1 - tumor_bam: + tumor_cram: type: File inputBinding: position: 2 - secondaryFiles: [.bai] + secondaryFiles: [.crai] reference: type: - string diff --git a/definitions/tools/mark_duplicates_and_sort.cwl b/definitions/tools/mark_duplicates_and_sort.cwl index 79097442c..ab35017cb 100644 --- a/definitions/tools/mark_duplicates_and_sort.cwl +++ b/definitions/tools/mark_duplicates_and_sort.cwl @@ -10,7 +10,7 @@ requirements: coresMin: 8 ramMin: 40000 - class: DockerRequirement - dockerPull: "mgibio/mark_duplicates-cwl:1.0.1" + dockerPull: "mgibio/mark_duplicates-cwl:2.0.0" - class: InitialWorkDirRequirement listing: - entryname: 'markduplicates_helper.sh' @@ -18,20 +18,15 @@ requirements: set -o pipefail set -o errexit - declare MD_BARCODE_TAG - if [ ! -z "$6" ]; then - MD_BARCODE_TAG="BARCODE_TAG=$6" - /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT "$MD_BARCODE_TAG" | /usr/bin/sambamba sort -t $2 -m 18G -o $3 /dev/stdin - else - /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT | /usr/bin/sambamba sort -t $2 -m 18G -o $3 /dev/stdin - fi + /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT REFERENCE_SEQUENCE=$6 | /opt/samtools/bin/samtools sort -@ $2 -m 4G --reference "$6" -o "$3" -O cram /dev/stdin + arguments: - position: 2 valueFrom: "$(runtime.cores)" - position: 4 - valueFrom: "$(inputs.bam.nameroot).mark_dups_metrics.txt" + valueFrom: "$(inputs.cram.nameroot).mark_dups_metrics.txt" inputs: - bam: + cram: type: File inputBinding: position: 1 @@ -42,17 +37,24 @@ inputs: position: 5 output_name: type: string? - default: 'MarkedSorted.bam' + default: 'MarkedSorted' inputBinding: position: 3 + reference: + type: + - string + - File + secondaryFiles: [.fai] + inputBinding: + position: 6 outputs: - sorted_bam: + sorted_cram: type: File outputBinding: - glob: $(inputs.output_name) - secondaryFiles: [.bai] + glob: $(inputs.output_name).cram + secondaryFiles: [.crai] metrics_file: type: File outputBinding: - glob: "$(inputs.bam.nameroot).mark_dups_metrics.txt" + glob: "$(inputs.cram.nameroot).mark_dups_metrics.txt" diff --git a/definitions/tools/mutect.cwl b/definitions/tools/mutect.cwl index 4077eb434..b36813f56 100644 --- a/definitions/tools/mutect.cwl +++ b/definitions/tools/mutect.cwl @@ -19,11 +19,11 @@ requirements: set -o pipefail set -o errexit - export tumor_bam="$3" - export normal_bam="$4" + export tumor_cram="$3" + export normal_cram="$4" - TUMOR=`perl -e 'my $header_str = qx(samtools view -H $ENV{tumor_bam}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the TUMOR bam. - NORMAL=`perl -e 'my $header_str = qx(samtools view -H $ENV{normal_bam}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the NORMAL bam. + TUMOR=`perl -e 'my $header_str = qx(samtools view -H $ENV{tumor_cram}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the TUMOR cram. + NORMAL=`perl -e 'my $header_str = qx(samtools view -H $ENV{normal_cram}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the NORMAL cram. /gatk/gatk Mutect2 --java-options "-Xmx20g" -O $1 -R $2 -I $3 -tumor "$TUMOR" -I $4 -normal "$NORMAL" -L $5 #Running Mutect2. /gatk/gatk FilterMutectCalls -R $2 -V mutect.vcf.gz -O mutect.filtered.vcf.gz #Running FilterMutectCalls on the output vcf. @@ -39,16 +39,16 @@ inputs: secondaryFiles: [.fai, ^.dict] inputBinding: position: 2 - tumor_bam: + tumor_cram: type: File inputBinding: position: 3 - secondaryFiles: [.bai] - normal_bam: + secondaryFiles: [.crai] + normal_cram: type: File? inputBinding: position: 4 - secondaryFiles: [.bai] + secondaryFiles: [.crai] interval_list: type: File inputBinding: diff --git a/definitions/tools/name_sort_samtools.cwl b/definitions/tools/name_sort_samtools.cwl index de8c83cfb..08c364c76 100644 --- a/definitions/tools/name_sort_samtools.cwl +++ b/definitions/tools/name_sort_samtools.cwl @@ -17,10 +17,18 @@ arguments: valueFrom: $(runtime.cores) inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai] + inputBinding: + position: 1 + prefix: '--reference' cram: type: File inputBinding: - position: 1 + position: 2 outputs: sorted_cram: diff --git a/definitions/tools/samtools_flagstat.cwl b/definitions/tools/samtools_flagstat.cwl index a70931de9..65d426c27 100644 --- a/definitions/tools/samtools_flagstat.cwl +++ b/definitions/tools/samtools_flagstat.cwl @@ -9,13 +9,19 @@ requirements: ramMin: 4000 - class: DockerRequirement dockerPull: "mgibio/samtools-cwl:1.0.0" -stdout: "$(inputs.bam.basename).flagstat" +stdout: "$(inputs.cram.basename).flagstat" inputs: - bam: + reference: type: File + secondaryFiles: [.fai] inputBinding: position: 1 - secondaryFiles: [^.bai] + prefix: '--reference' + cram: + type: File + inputBinding: + position: 2 + secondaryFiles: [^.crai] outputs: flagstats: type: stdout diff --git a/definitions/tools/strelka.cwl b/definitions/tools/strelka.cwl index 51603b1d5..3728d2d70 100644 --- a/definitions/tools/strelka.cwl +++ b/definitions/tools/strelka.cwl @@ -14,20 +14,20 @@ arguments: [ { valueFrom: $(inputs.cpu_reserved), position: 1 }, { valueFrom: $(runtime.outdir), position: 2 }] inputs: - tumor_bam: + tumor_cram: type: File inputBinding: prefix: '--tumorBam=' separate: false position: 3 - secondaryFiles: [.bai,^.bai] - normal_bam: + secondaryFiles: [.crai,^.crai] + normal_cram: type: File inputBinding: prefix: '--normalBam=' separate: false position: 4 - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] reference: type: - string diff --git a/definitions/tools/varscan_somatic.cwl b/definitions/tools/varscan_somatic.cwl index 6aadf6554..ff98fe7d8 100644 --- a/definitions/tools/varscan_somatic.cwl +++ b/definitions/tools/varscan_somatic.cwl @@ -21,12 +21,12 @@ requirements: if [ $# -lt 7 ] then - echo "Usage: $0 [TUMOR_BAM] [NORMAL_BAM] [REFERENCE] [STRAND_FILTER] [MIN_COVERAGE] [MIN_VAR_FREQ] [P_VALUE] [roi_bed?]" + echo "Usage: $0 [TUMOR_CRAM] [NORMAL_CRAM] [REFERENCE] [STRAND_FILTER] [MIN_COVERAGE] [MIN_VAR_FREQ] [P_VALUE] [roi_bed?]" exit 1 fi - TUMOR_BAM="$1" - NORMAL_BAM="$2" + TUMOR_CRAM="$1" + NORMAL_CRAM="$2" REFERENCE="$3" STRAND_FILTER="$4" MIN_COVERAGE="$5" @@ -38,7 +38,7 @@ requirements: then #run without ROI java -jar /opt/varscan/VarScan.jar somatic \ - <(/opt/samtools/bin/samtools mpileup --no-baq -f "$REFERENCE" "$NORMAL_BAM" "$TUMOR_BAM") \ + <(/opt/samtools/bin/samtools mpileup --no-baq -f "$REFERENCE" "$NORMAL_CRAM" "$TUMOR_CRAM") \ $OUTPUT \ --strand-filter $STRAND_FILTER \ --min-coverage $MIN_COVERAGE \ @@ -49,7 +49,7 @@ requirements: else ROI_BED="$8" java -jar /opt/varscan/VarScan.jar somatic \ - <(/opt/samtools/bin/samtools mpileup --no-baq -l "$ROI_BED" -f "$REFERENCE" "$NORMAL_BAM" "$TUMOR_BAM") \ + <(/opt/samtools/bin/samtools mpileup --no-baq -l "$ROI_BED" -f "$REFERENCE" "$NORMAL_CRAM" "$TUMOR_CRAM") \ $OUTPUT \ --strand-filter $STRAND_FILTER \ --min-coverage $MIN_COVERAGE \ @@ -60,16 +60,16 @@ requirements: fi inputs: - tumor_bam: + tumor_cram: type: File inputBinding: position: 1 - secondaryFiles: [^.bai] - normal_bam: + secondaryFiles: [^.crai] + normal_cram: type: File inputBinding: position: 2 - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string From b4d02c5321abce6d54567ce152760d2e63413eb9 Mon Sep 17 00:00:00 2001 From: John Garza Date: Fri, 4 Sep 2020 18:33:05 -0500 Subject: [PATCH 3/7] bugfixes --- definitions/subworkflows/fp_filter.cwl | 2 +- definitions/subworkflows/pindel_cat.cwl | 1 + definitions/tools/name_sort_samtools.cwl | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/definitions/subworkflows/fp_filter.cwl b/definitions/subworkflows/fp_filter.cwl index 3f3ef1bb3..128002a3c 100644 --- a/definitions/subworkflows/fp_filter.cwl +++ b/definitions/subworkflows/fp_filter.cwl @@ -60,7 +60,7 @@ steps: out: [indexed_vcf] cram_to_bam: - run: cram_to_bam_and_index + run: cram_to_bam_and_index.cwl in: cram: cram reference: reference diff --git a/definitions/subworkflows/pindel_cat.cwl b/definitions/subworkflows/pindel_cat.cwl index 6f9d4bb5c..b53acafe6 100644 --- a/definitions/subworkflows/pindel_cat.cwl +++ b/definitions/subworkflows/pindel_cat.cwl @@ -6,6 +6,7 @@ class: Workflow label: "Per-region pindel" requirements: - class: MultipleInputFeatureRequirement + - class: SubworkflowFeatureRequirement inputs: reference: type: diff --git a/definitions/tools/name_sort_samtools.cwl b/definitions/tools/name_sort_samtools.cwl index 08c364c76..c59ed058c 100644 --- a/definitions/tools/name_sort_samtools.cwl +++ b/definitions/tools/name_sort_samtools.cwl @@ -31,7 +31,7 @@ inputs: position: 2 outputs: - sorted_cram: + name_sorted_cram: type: File outputBinding: glob: $(inputs.cram.nameroot).NameSorted.cram From b16fbd519c55804d7f74bfb6f7dbd8c070199731 Mon Sep 17 00:00:00 2001 From: johnegarza Date: Fri, 4 Dec 2020 11:51:56 -0600 Subject: [PATCH 4/7] Samtools packaged with mutect is old and can't open cram files --- definitions/pipelines/detect_variants.cwl | 1 + definitions/subworkflows/mutect.cwl | 4 ++++ definitions/tools/mutect.cwl | 15 +++++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/definitions/pipelines/detect_variants.cwl b/definitions/pipelines/detect_variants.cwl index 3780b94ea..fc51099a7 100644 --- a/definitions/pipelines/detect_variants.cwl +++ b/definitions/pipelines/detect_variants.cwl @@ -205,6 +205,7 @@ steps: interval_list: roi_intervals scatter_count: mutect_scatter_count tumor_sample_name: tumor_sample_name + normal_sample_name: normal_sample_name out: [unfiltered_vcf, filtered_vcf] strelka: diff --git a/definitions/subworkflows/mutect.cwl b/definitions/subworkflows/mutect.cwl index 4f4b7b660..7c0f9dd5b 100644 --- a/definitions/subworkflows/mutect.cwl +++ b/definitions/subworkflows/mutect.cwl @@ -26,6 +26,8 @@ inputs: type: int tumor_sample_name: type: string + normal_sample_name: + type: string outputs: unfiltered_vcf: type: File @@ -50,6 +52,8 @@ steps: tumor_cram: tumor_cram normal_cram: normal_cram interval_list: split_interval_list/split_interval_lists + tumor_sample_name: tumor_sample_name + normal_sample_name: normal_sample_name out: [vcf] merge: diff --git a/definitions/tools/mutect.cwl b/definitions/tools/mutect.cwl index b36813f56..fc1591028 100644 --- a/definitions/tools/mutect.cwl +++ b/definitions/tools/mutect.cwl @@ -19,12 +19,7 @@ requirements: set -o pipefail set -o errexit - export tumor_cram="$3" - export normal_cram="$4" - - TUMOR=`perl -e 'my $header_str = qx(samtools view -H $ENV{tumor_cram}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the TUMOR cram. - NORMAL=`perl -e 'my $header_str = qx(samtools view -H $ENV{normal_cram}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the NORMAL cram. - /gatk/gatk Mutect2 --java-options "-Xmx20g" -O $1 -R $2 -I $3 -tumor "$TUMOR" -I $4 -normal "$NORMAL" -L $5 #Running Mutect2. + /gatk/gatk Mutect2 --java-options "-Xmx20g" -O $1 -R $2 -I $3 -tumor "$6" -I $4 -normal "$7" -L $5 #Running Mutect2. /gatk/gatk FilterMutectCalls -R $2 -V mutect.vcf.gz -O mutect.filtered.vcf.gz #Running FilterMutectCalls on the output vcf. arguments: @@ -53,6 +48,14 @@ inputs: type: File inputBinding: position: 5 + tumor_sample_name: + type: string + inputBinding: + position: 6 + normal_sample_name: + type: string + inputBinding: + position: 7 outputs: vcf: From b77344d2439127bd4b9c66eb5115ab9f19edd361 Mon Sep 17 00:00:00 2001 From: johnegarza Date: Fri, 4 Dec 2020 11:56:05 -0600 Subject: [PATCH 5/7] Use custom patched version of cnvkit that supports opening crams with a local reference fasta --- definitions/tools/cnvkit_batch.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/definitions/tools/cnvkit_batch.cwl b/definitions/tools/cnvkit_batch.cwl index e111143cb..25c930916 100644 --- a/definitions/tools/cnvkit_batch.cwl +++ b/definitions/tools/cnvkit_batch.cwl @@ -2,12 +2,12 @@ cwlVersion: v1.0 class: CommandLineTool -baseCommand: ["/usr/bin/python", "/usr/local/bin/cnvkit.py", "batch"] +baseCommand: ["/usr/bin/python3", "/git/cnvkit/cnvkit.py", "batch"] requirements: - class: ShellCommandRequirement - class: DockerRequirement - dockerPull: "etal/cnvkit:0.9.5" + dockerPull: "johnegarza/cnvkit:0.9.8" - class: ResourceRequirement coresMin: 1 ramMin: 4000 From dbdbb3b084423f97b4cfc8c89124e8d3d16d83ab Mon Sep 17 00:00:00 2001 From: johnegarza Date: Fri, 4 Dec 2020 11:56:38 -0600 Subject: [PATCH 6/7] Use patched version of bam-readcount that supports crams --- definitions/tools/bam_readcount.cwl | 114 +--------------------------- 1 file changed, 2 insertions(+), 112 deletions(-) diff --git a/definitions/tools/bam_readcount.cwl b/definitions/tools/bam_readcount.cwl index a6c049849..d690db971 100644 --- a/definitions/tools/bam_readcount.cwl +++ b/definitions/tools/bam_readcount.cwl @@ -4,124 +4,14 @@ cwlVersion: v1.0 class: CommandLineTool label: "run bam-readcount" -baseCommand: ["/usr/bin/python", "bam_readcount_helper.py"] +baseCommand: ["python", "/usr/bin/bam_readcount_helper.py"] requirements: - class: ShellCommandRequirement - class: DockerRequirement - dockerPull: "mgibio/bam_readcount_helper-cwl:1.1.1" + dockerPull: "seqfu/bam_readcount_helper-cwl:1.1.1-samtools-1.10" - class: ResourceRequirement ramMin: 16000 - class: InlineJavascriptRequirement - - class: InitialWorkDirRequirement - listing: - - entryname: 'bam_readcount_helper.py' - entry: | - #!/usr/bin/env python - - import sys - import os - from cyvcf2 import VCF - import tempfile - import csv - from subprocess import Popen, PIPE - - def generate_region_list(hash): - fh = tempfile.NamedTemporaryFile('w', delete=False) - writer = csv.writer(fh, delimiter='\t') - for chr, positions in hash.items(): - for pos in sorted(positions.keys()): - writer.writerow([chr, pos, pos]) - fh.close() - return fh.name - - def filter_sites_in_hash(region_list, bam_file, ref_fasta, prefixed_sample, output_dir, insertion_centric, map_qual, base_qual): - bam_readcount_cmd = ['/usr/bin/bam-readcount', '-f', ref_fasta, '-l', region_list, '-w', '0', '-b', str(base_qual), '-q', str(map_qual)] - if insertion_centric: - bam_readcount_cmd.append('-i') - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_indel.tsv') - else: - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_snv.tsv') - bam_readcount_cmd.append(bam_file) - execution = Popen(bam_readcount_cmd, stdout=PIPE, stderr=PIPE) - stdout, stderr = execution.communicate() - if execution.returncode == 0: - with open(output_file, 'wb') as output_fh: - output_fh.write(stdout) - else: - sys.exit(stderr) - - #initializing these with default values - min_base_qual = 20 - min_mapping_qual = 0 - - if len(sys.argv) == 7: - (script_name, vcf_filename, sample, ref_fasta, bam_file, prefix, output_dir)= sys.argv - elif len(sys.argv) == 8: - (script_name, vcf_filename, sample, ref_fasta, bam_file, prefix, output_dir, min_base_qual)= sys.argv - elif len(sys.argv) == 9: #elif instead of else for explicit safety - (script_name, vcf_filename, sample, ref_fasta, bam_file, prefix, output_dir, min_base_qual, min_mapping_qual)= sys.argv - - if prefix == 'NOPREFIX': - prefixed_sample = sample - else: - prefixed_sample = '_'.join([prefix, sample]) - - vcf_file = VCF(vcf_filename) - sample_index = vcf_file.samples.index(sample) - - rc_for_indel = {} - rc_for_snp = {} - for variant in vcf_file: - ref = variant.REF - chr = variant.CHROM - start = variant.start - end = variant.end - pos = variant.POS - for var in variant.ALT: - if len(ref) > 1 or len(var) > 1: - #it's an indel or mnp - if len(ref) == len(var) or (len(ref) > 1 and len(var) > 1): - sys.stderr.write("Complex variant or MNP will be skipped: %s\t%s\t%s\t%s\n" % (chr, pos, ref , var)) - continue - elif len(ref) > len(var): - #it's a deletion - pos += 1 - unmodified_ref = ref - ref = unmodified_ref[1] - var = "-%s" % unmodified_ref[1:] - else: - #it's an insertion - var = "+%s" % var[1:] - if chr not in rc_for_indel: - rc_for_indel[chr] = {} - if pos not in rc_for_indel[chr]: - rc_for_indel[chr][pos] = {} - if ref not in rc_for_indel[chr][pos]: - rc_for_indel[chr][pos][ref] = {} - rc_for_indel[chr][pos][ref] = variant - else: - #it's a SNP - if chr not in rc_for_snp: - rc_for_snp[chr] = {} - if pos not in rc_for_snp[chr]: - rc_for_snp[chr][pos] = {} - if ref not in rc_for_snp[chr][pos]: - rc_for_snp[chr][pos][ref] = {} - rc_for_snp[chr][pos][ref] = variant - - if len(rc_for_snp.keys()) > 0: - region_file = generate_region_list(rc_for_snp) - filter_sites_in_hash(region_file, bam_file, ref_fasta, prefixed_sample, output_dir, False, min_mapping_qual, min_base_qual) - else: - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_snv.tsv') - open(output_file, 'w').close() - - if len(rc_for_indel.keys()) > 0: - region_file = generate_region_list(rc_for_indel) - filter_sites_in_hash(region_file, bam_file, ref_fasta, prefixed_sample, output_dir, True, min_mapping_qual, min_base_qual) - else: - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_indel.tsv') - open(output_file, 'w').close() arguments: [ { valueFrom: $(runtime.outdir), position: -3 } From 0505632b607f612cd5625aaa8497eb947ee9bfc2 Mon Sep 17 00:00:00 2001 From: johnegarza Date: Fri, 4 Dec 2020 12:05:14 -0600 Subject: [PATCH 7/7] cleanup --- definitions/tools/apply_bqsr.cwl | 1 - definitions/tools/filter_vcf_mapq0.cwl | 2 +- definitions/tools/mark_duplicates_and_sort.cwl | 6 +++--- definitions/tools/merge_crams.cwl | 3 --- definitions/tools/samtools_flagstat.cwl | 8 +------- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/definitions/tools/apply_bqsr.cwl b/definitions/tools/apply_bqsr.cwl index 4f4c24123..fb63b4b55 100644 --- a/definitions/tools/apply_bqsr.cwl +++ b/definitions/tools/apply_bqsr.cwl @@ -43,4 +43,3 @@ outputs: type: File outputBinding: glob: $(inputs.output_name).cram - secondaryFiles: [^.crai] diff --git a/definitions/tools/filter_vcf_mapq0.cwl b/definitions/tools/filter_vcf_mapq0.cwl index b2aa7a947..de84b5650 100644 --- a/definitions/tools/filter_vcf_mapq0.cwl +++ b/definitions/tools/filter_vcf_mapq0.cwl @@ -5,7 +5,7 @@ class: CommandLineTool label: "filter vcf for variants with high percentage of mapq0 reads" requirements: - class: DockerRequirement - dockerPull: mgibio/mapq0-filter:v0.3.1 + dockerPull: mgibio/mapq0-filter:v0.4.1 - class: ResourceRequirement ramMin: 8000 tmpdirMin: 10000 diff --git a/definitions/tools/mark_duplicates_and_sort.cwl b/definitions/tools/mark_duplicates_and_sort.cwl index ab35017cb..811d8618a 100644 --- a/definitions/tools/mark_duplicates_and_sort.cwl +++ b/definitions/tools/mark_duplicates_and_sort.cwl @@ -18,7 +18,7 @@ requirements: set -o pipefail set -o errexit - /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT REFERENCE_SEQUENCE=$6 | /opt/samtools/bin/samtools sort -@ $2 -m 4G --reference "$6" -o "$3" -O cram /dev/stdin + /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT REFERENCE_SEQUENCE=$6 | /opt/samtools/bin/samtools sort -@ $2 -m 4G --reference "$6" -o "$3" -O cram /dev/stdin && /opt/samtools/bin/samtools index "$3" arguments: - position: 2 @@ -37,7 +37,7 @@ inputs: position: 5 output_name: type: string? - default: 'MarkedSorted' + default: 'MarkedSorted.cram' inputBinding: position: 3 reference: @@ -52,7 +52,7 @@ outputs: sorted_cram: type: File outputBinding: - glob: $(inputs.output_name).cram + glob: $(inputs.output_name) secondaryFiles: [.crai] metrics_file: type: File diff --git a/definitions/tools/merge_crams.cwl b/definitions/tools/merge_crams.cwl index 08faf52d5..6c087e3a5 100644 --- a/definitions/tools/merge_crams.cwl +++ b/definitions/tools/merge_crams.cwl @@ -10,9 +10,6 @@ requirements: coresMin: 4 - class: DockerRequirement dockerPull: "mgibio/samtools-cwl:1.0.0" - - class: SchemaDefRequirement - types: - - $import: ../types/sequence_data.yml arguments: ["$(inputs.name).merged.cram", { prefix: "--threads", valueFrom: $(runtime.cores) }, { prefix: '-O', valueFrom: "CRAM"}] inputs: crams: diff --git a/definitions/tools/samtools_flagstat.cwl b/definitions/tools/samtools_flagstat.cwl index 65d426c27..743de907d 100644 --- a/definitions/tools/samtools_flagstat.cwl +++ b/definitions/tools/samtools_flagstat.cwl @@ -11,16 +11,10 @@ requirements: dockerPull: "mgibio/samtools-cwl:1.0.0" stdout: "$(inputs.cram.basename).flagstat" inputs: - reference: - type: File - secondaryFiles: [.fai] - inputBinding: - position: 1 - prefix: '--reference' cram: type: File inputBinding: - position: 2 + position: 1 secondaryFiles: [^.crai] outputs: flagstats: