diff --git a/definitions/pipelines/alignment_exome.cwl b/definitions/pipelines/alignment_exome.cwl index 94e9e5943..88865c41b 100644 --- a/definitions/pipelines/alignment_exome.cwl +++ b/definitions/pipelines/alignment_exome.cwl @@ -50,10 +50,10 @@ inputs: qc_minimum_base_quality: type: int? outputs: - bam: + cram: type: File - outputSource: alignment/final_bam - secondaryFiles: [.bai, ^.bai] + outputSource: alignment/final_cram + secondaryFiles: [.crai, ^.crai] mark_duplicates_metrics: type: File outputSource: alignment/mark_duplicates_metrics_file @@ -103,11 +103,11 @@ steps: bqsr_known_sites: bqsr_known_sites bqsr_intervals: bqsr_intervals final_name: final_name - out: [final_bam,mark_duplicates_metrics_file] + out: [final_cram,mark_duplicates_metrics_file] qc: run: ../subworkflows/qc_exome.cwl in: - bam: alignment/final_bam + cram: alignment/final_cram reference: reference bait_intervals: bait_intervals target_intervals: target_intervals diff --git a/definitions/pipelines/detect_variants.cwl b/definitions/pipelines/detect_variants.cwl index cee5d8a4b..45ce3a385 100644 --- a/definitions/pipelines/detect_variants.cwl +++ b/definitions/pipelines/detect_variants.cwl @@ -16,12 +16,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai,^.bai] - normal_bam: + secondaryFiles: [.crai,^.crai] + normal_cram: type: File - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] roi_intervals: type: File label: "roi_intervals: regions of interest in which variants will be called" @@ -201,19 +201,20 @@ steps: run: ../subworkflows/mutect.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals scatter_count: scatter_count tumor_sample_name: tumor_sample_name + normal_sample_name: normal_sample_name out: [unfiltered_vcf, filtered_vcf] strelka: run: ../subworkflows/strelka_and_post_processing.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals exome_mode: strelka_exome_mode cpu_reserved: strelka_cpu_reserved @@ -225,8 +226,8 @@ steps: run: ../subworkflows/varscan_pre_and_post_processing.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals scatter_count: scatter_count strand_filter: varscan_strand_filter @@ -242,8 +243,8 @@ steps: run: ../subworkflows/pindel.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: roi_intervals scatter_count: scatter_count insert_size: pindel_insert_size @@ -255,8 +256,8 @@ steps: run: ../subworkflows/docm_cle.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram docm_vcf: docm_vcf interval_list: roi_intervals filter_docm_variants: filter_docm_variants @@ -314,7 +315,7 @@ steps: vcf: annotate_variants/annotated_vcf sample: tumor_sample_name reference_fasta: reference - bam: tumor_bam + cram: tumor_cram min_base_quality: readcount_minimum_base_quality min_mapping_quality: readcount_minimum_mapping_quality out: @@ -325,7 +326,7 @@ steps: vcf: annotate_variants/annotated_vcf sample: normal_sample_name reference_fasta: reference - bam: normal_bam + cram: normal_cram min_base_quality: readcount_minimum_base_quality min_mapping_quality: readcount_minimum_mapping_quality out: @@ -368,7 +369,7 @@ steps: filter_somatic_llr_tumor_purity: filter_somatic_llr_tumor_purity filter_somatic_llr_normal_contamination_rate: filter_somatic_llr_normal_contamination_rate filter_minimum_depth: filter_minimum_depth - tumor_bam: tumor_bam + tumor_cram: tumor_cram do_cle_vcf_filter: cle_vcf_filter reference: reference normal_sample_name: normal_sample_name diff --git a/definitions/pipelines/somatic_exome.cwl b/definitions/pipelines/somatic_exome.cwl index 93633fee8..ba70efa46 100644 --- a/definitions/pipelines/somatic_exome.cwl +++ b/definitions/pipelines/somatic_exome.cwl @@ -252,7 +252,7 @@ inputs: outputs: tumor_cram: type: File - outputSource: tumor_index_cram/indexed_cram + outputSource: tumor_alignment_and_qc/cram tumor_mark_duplicates_metrics: type: File outputSource: tumor_alignment_and_qc/mark_duplicates_metrics @@ -291,7 +291,7 @@ outputs: outputSource: tumor_alignment_and_qc/verify_bam_id_depth normal_cram: type: File - outputSource: normal_index_cram/indexed_cram + outputSource: normal_alignment_and_qc/cram normal_mark_duplicates_metrics: type: File outputSource: normal_alignment_and_qc/mark_duplicates_metrics @@ -467,11 +467,9 @@ steps: picard_metric_accumulation_level: picard_metric_accumulation_level qc_minimum_mapping_quality: qc_minimum_mapping_quality qc_minimum_base_quality: qc_minimum_base_quality - final_name: - source: tumor_name - valueFrom: "$(self).bam" + final_name: tumor_name out: - [bam, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] + [cram, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] normal_alignment_and_qc: run: alignment_exome.cwl in: @@ -489,17 +487,15 @@ steps: picard_metric_accumulation_level: picard_metric_accumulation_level qc_minimum_mapping_quality: qc_minimum_mapping_quality qc_minimum_base_quality: qc_minimum_base_quality - final_name: - source: normal_name - valueFrom: "$(self).bam" + final_name: normal_name out: - [bam, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] + [cram, mark_duplicates_metrics, insert_size_metrics, alignment_summary_metrics, hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, per_base_coverage_metrics, per_base_hs_metrics, summary_hs_metrics, flagstats, verify_bam_id_metrics, verify_bam_id_depth] concordance: run: ../tools/concordance.cwl in: reference: reference - bam_1: tumor_alignment_and_qc/bam - bam_2: normal_alignment_and_qc/bam + cram_1: tumor_alignment_and_qc/cram + cram_2: normal_alignment_and_qc/cram vcf: somalier_vcf out: [somalier_pairs, somalier_samples] @@ -514,8 +510,8 @@ steps: run: detect_variants.cwl in: reference: reference - tumor_bam: tumor_alignment_and_qc/bam - normal_bam: normal_alignment_and_qc/bam + tumor_cram: tumor_alignment_and_qc/cram + normal_cram: normal_alignment_and_qc/cram roi_intervals: pad_target_intervals/expanded_interval_list strelka_exome_mode: default: true @@ -552,14 +548,14 @@ steps: cnvkit: run: ../tools/cnvkit_batch.cwl in: - tumor_bam: tumor_alignment_and_qc/bam + tumor_cram: tumor_alignment_and_qc/cram reference: - source: [normal_alignment_and_qc/bam, reference] + source: [normal_alignment_and_qc/cram, reference] valueFrom: | ${ var normal = self[0]; var fasta = self[1]; - return {'normal_bam': normal, 'fasta_file': fasta}; + return {'normal_cram': normal, 'fasta_file': fasta}; } bait_intervals: bait_intervals out: @@ -567,37 +563,12 @@ steps: manta: run: ../tools/manta_somatic.cwl in: - normal_bam: normal_alignment_and_qc/bam - tumor_bam: tumor_alignment_and_qc/bam + normal_bam: normal_alignment_and_qc/cram + tumor_bam: tumor_alignment_and_qc/cram reference: reference call_regions: manta_call_regions non_wgs: manta_non_wgs output_contigs: manta_output_contigs out: [diploid_variants, somatic_variants, all_candidates, small_candidates, tumor_only_variants] - tumor_bam_to_cram: - run: ../tools/bam_to_cram.cwl - in: - bam: tumor_alignment_and_qc/bam - reference: reference - out: - [cram] - tumor_index_cram: - run: ../tools/index_cram.cwl - in: - cram: tumor_bam_to_cram/cram - out: - [indexed_cram] - normal_bam_to_cram: - run: ../tools/bam_to_cram.cwl - in: - bam: normal_alignment_and_qc/bam - reference: reference - out: - [cram] - normal_index_cram: - run: ../tools/index_cram.cwl - in: - cram: normal_bam_to_cram/cram - out: - [indexed_cram] + diff --git a/definitions/subworkflows/docm_cle.cwl b/definitions/subworkflows/docm_cle.cwl index c54817a63..bd00f209d 100644 --- a/definitions/subworkflows/docm_cle.cwl +++ b/definitions/subworkflows/docm_cle.cwl @@ -11,12 +11,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai] - normal_bam: + secondaryFiles: [^.crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] docm_vcf: type: File secondaryFiles: [.tbi] @@ -34,8 +34,8 @@ steps: run: ../tools/docm_gatk_haplotype_caller.cwl in: reference: reference - bam: tumor_bam - normal_bam: normal_bam + cram: tumor_cram + normal_cram: normal_cram docm_vcf: docm_vcf interval_list: interval_list out: @@ -62,8 +62,8 @@ steps: run: ../tools/filter_vcf_docm.cwl in: docm_raw_variants: decompose/decomposed_vcf - normal_bam: normal_bam - tumor_bam: tumor_bam + normal_cram: normal_cram + tumor_cram: tumor_cram filter_docm_variants: filter_docm_variants out: [docm_filtered_variants] diff --git a/definitions/subworkflows/filter_vcf.cwl b/definitions/subworkflows/filter_vcf.cwl index 582a5b1db..8a91698ef 100644 --- a/definitions/subworkflows/filter_vcf.cwl +++ b/definitions/subworkflows/filter_vcf.cwl @@ -17,9 +17,9 @@ inputs: type: float gnomad_field_name: type: string - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai] + secondaryFiles: [.crai] do_cle_vcf_filter: type: boolean filter_somatic_llr_threshold: @@ -67,7 +67,7 @@ steps: run: ../tools/filter_vcf_mapq0.cwl in: vcf: filter_vcf_gnomADe_allele_freq/filtered_vcf - tumor_bam: tumor_bam + tumor_cram: tumor_cram threshold: filter_mapq0_threshold reference: reference out: diff --git a/definitions/subworkflows/fp_filter.cwl b/definitions/subworkflows/fp_filter.cwl index 0b9a96bca..128002a3c 100644 --- a/definitions/subworkflows/fp_filter.cwl +++ b/definitions/subworkflows/fp_filter.cwl @@ -7,9 +7,9 @@ requirements: - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement inputs: - bam: + cram: type: File - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] reference: type: - string @@ -59,11 +59,18 @@ steps: vcf: decompose_variants/decomposed_vcf out: [indexed_vcf] + cram_to_bam: + run: cram_to_bam_and_index.cwl + in: + cram: cram + reference: reference + out: + [bam] fp_filter: run: ../tools/fp_filter.cwl in: reference: reference - bam: bam + bam: cram_to_bam/bam vcf: index/indexed_vcf sample_name: sample_name min_var_freq: min_var_freq diff --git a/definitions/subworkflows/hs_metrics.cwl b/definitions/subworkflows/hs_metrics.cwl index d89348bf3..c5d663ce5 100644 --- a/definitions/subworkflows/hs_metrics.cwl +++ b/definitions/subworkflows/hs_metrics.cwl @@ -11,9 +11,9 @@ requirements: - class: StepInputExpressionRequirement - class: SubworkflowFeatureRequirement inputs: - bam: + cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] minimum_mapping_quality: type: int? minimum_base_quality: @@ -51,7 +51,7 @@ steps: scatter: [bait_intervals, target_intervals, output_prefix] scatterMethod: dotproduct in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" @@ -77,7 +77,7 @@ steps: scatter: [bait_intervals, target_intervals, output_prefix] scatterMethod: dotproduct in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" @@ -103,7 +103,7 @@ steps: scatter: [bait_intervals, target_intervals, output_prefix] scatterMethod: dotproduct in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" diff --git a/definitions/subworkflows/mutect.cwl b/definitions/subworkflows/mutect.cwl index c35818127..7c0f9dd5b 100644 --- a/definitions/subworkflows/mutect.cwl +++ b/definitions/subworkflows/mutect.cwl @@ -14,18 +14,20 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai, .bai] - normal_bam: + secondaryFiles: [^.crai, .crai] + normal_cram: type: File? - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] interval_list: type: File scatter_count: type: int tumor_sample_name: type: string + normal_sample_name: + type: string outputs: unfiltered_vcf: type: File @@ -47,9 +49,11 @@ steps: run: ../tools/mutect.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram interval_list: split_interval_list/split_interval_lists + tumor_sample_name: tumor_sample_name + normal_sample_name: normal_sample_name out: [vcf] merge: @@ -68,7 +72,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: index/indexed_vcf variant_caller: valueFrom: "mutect" diff --git a/definitions/subworkflows/pindel.cwl b/definitions/subworkflows/pindel.cwl index a6d761228..cce3a790e 100644 --- a/definitions/subworkflows/pindel.cwl +++ b/definitions/subworkflows/pindel.cwl @@ -14,12 +14,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai, ^.bai] - normal_bam: + secondaryFiles: [.crai, ^.crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] interval_list: type: File insert_size: @@ -54,8 +54,8 @@ steps: run: pindel_cat.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram region_file: split_interval_list_to_bed/split_beds insert_size: insert_size tumor_sample_name: tumor_sample_name @@ -103,7 +103,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: reindex/indexed_vcf variant_caller: valueFrom: "pindel" diff --git a/definitions/subworkflows/pindel_cat.cwl b/definitions/subworkflows/pindel_cat.cwl index a7f19aae6..b53acafe6 100644 --- a/definitions/subworkflows/pindel_cat.cwl +++ b/definitions/subworkflows/pindel_cat.cwl @@ -6,18 +6,19 @@ class: Workflow label: "Per-region pindel" requirements: - class: MultipleInputFeatureRequirement + - class: SubworkflowFeatureRequirement inputs: reference: type: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: ["^.bai"] - normal_bam: + secondaryFiles: ["^.crai"] + normal_cram: type: File - secondaryFiles: ["^.bai"] + secondaryFiles: ["^.crai"] region_file: type: File insert_size: @@ -32,12 +33,26 @@ outputs: type: File outputSource: cat/pindel_out steps: + tumor_cram_to_bam: + run: cram_to_bam_and_index.cwl + in: + reference: reference + cram: tumor_cram + out: + [bam] + normal_cram_to_bam: + run: cram_to_bam_and_index.cwl + in: + reference: reference + cram: normal_cram + out: + [bam] pindel: run: ../tools/pindel.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_bam: tumor_cram_to_bam/bam + normal_bam: normal_cram_to_bam/bam insert_size: insert_size region_file: region_file tumor_sample_name: tumor_sample_name diff --git a/definitions/subworkflows/qc_exome.cwl b/definitions/subworkflows/qc_exome.cwl index 54dc21dcb..dee450275 100644 --- a/definitions/subworkflows/qc_exome.cwl +++ b/definitions/subworkflows/qc_exome.cwl @@ -10,9 +10,9 @@ requirements: - class: StepInputExpressionRequirement - class: SubworkflowFeatureRequirement inputs: - bam: + cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -79,7 +79,7 @@ steps: collect_insert_size_metrics: run: ../tools/collect_insert_size_metrics.cwl in: - bam: bam + cram: cram reference: reference metric_accumulation_level: picard_metric_accumulation_level out: @@ -87,7 +87,7 @@ steps: collect_alignment_summary_metrics: run: ../tools/collect_alignment_summary_metrics.cwl in: - bam: bam + cram: cram reference: reference metric_accumulation_level: picard_metric_accumulation_level out: @@ -95,7 +95,7 @@ steps: collect_roi_hs_metrics: run: ../tools/collect_hs_metrics.cwl in: - bam: bam + cram: cram reference: reference metric_accumulation_level: valueFrom: "ALL_READS" @@ -114,7 +114,7 @@ steps: collect_detailed_hs_metrics: run: hs_metrics.cwl in: - bam: bam + cram: cram minimum_mapping_quality: minimum_mapping_quality minimum_base_quality: minimum_base_quality per_base_intervals: per_base_intervals @@ -126,7 +126,8 @@ steps: samtools_flagstat: run: ../tools/samtools_flagstat.cwl in: - bam: bam + cram: cram + reference: reference out: [flagstats] select_variants: run: ../tools/select_variants.cwl @@ -136,10 +137,17 @@ steps: interval_list: target_intervals out: [filtered_vcf] + cram_to_bam: + run: cram_to_bam_and_index.cwl + in: + cram: cram + reference: reference + out: + [bam] verify_bam_id: run: ../tools/verify_bam_id.cwl in: - bam: bam + bam: cram_to_bam/bam vcf: select_variants/filtered_vcf out: [verify_bam_id_metrics, verify_bam_id_depth] diff --git a/definitions/subworkflows/sequence_align_and_tag_adapter.cwl b/definitions/subworkflows/sequence_align_and_tag_adapter.cwl index 00d6abd57..9786f134f 100644 --- a/definitions/subworkflows/sequence_align_and_tag_adapter.cwl +++ b/definitions/subworkflows/sequence_align_and_tag_adapter.cwl @@ -27,9 +27,9 @@ inputs: - ../types/trimming_options.yml#trimming_options - "null" outputs: - aligned_bam: + aligned_cram: type: File - outputSource: align_and_tag/aligned_bam + outputSource: align_and_tag/aligned_cram steps: align_and_tag: run: ../tools/sequence_align_and_tag.cwl @@ -49,4 +49,4 @@ steps: valueFrom: $(self.readgroup) trimming: trimming out: - [aligned_bam] + [aligned_cram] diff --git a/definitions/subworkflows/sequence_to_bqsr.cwl b/definitions/subworkflows/sequence_to_bqsr.cwl index 8da03c69d..6934a8dd0 100644 --- a/definitions/subworkflows/sequence_to_bqsr.cwl +++ b/definitions/subworkflows/sequence_to_bqsr.cwl @@ -34,9 +34,9 @@ inputs: type: string default: 'final' outputs: - final_bam: + final_cram: type: File - outputSource: index_bam/indexed_bam + outputSource: index_cram/indexed_cram secondaryFiles: [.bai, ^.bai] mark_duplicates_metrics_file: type: File @@ -51,31 +51,34 @@ steps: reference: reference trimming: trimming out: - [aligned_bam] + [aligned_cram] merge: - run: ../tools/merge_bams_samtools.cwl + run: ../tools/merge_crams.cwl in: - bams: align/aligned_bam + crams: align/aligned_cram name: final_name + reference: reference out: - [merged_bam] + [merged_cram] name_sort: - run: ../tools/name_sort.cwl + run: ../tools/name_sort_samtools.cwl in: - bam: merge/merged_bam + cram: merge/merged_cram + reference: reference out: - [name_sorted_bam] + [name_sorted_cram] mark_duplicates_and_sort: run: ../tools/mark_duplicates_and_sort.cwl in: - bam: name_sort/name_sorted_bam + cram: name_sort/name_sorted_cram + reference: reference out: - [sorted_bam, metrics_file] + [sorted_cram, metrics_file] bqsr: run: ../tools/bqsr.cwl in: reference: reference - bam: mark_duplicates_and_sort/sorted_bam + cram: mark_duplicates_and_sort/sorted_cram intervals: bqsr_intervals known_sites: bqsr_known_sites out: @@ -84,14 +87,14 @@ steps: run: ../tools/apply_bqsr.cwl in: reference: reference - bam: mark_duplicates_and_sort/sorted_bam + cram: mark_duplicates_and_sort/sorted_cram bqsr_table: bqsr/bqsr_table output_name: final_name out: - [bqsr_bam] - index_bam: - run: ../tools/index_bam.cwl + [bqsr_cram] + index_cram: + run: ../tools/index_cram.cwl in: - bam: apply_bqsr/bqsr_bam + cram: apply_bqsr/bqsr_cram out: - [indexed_bam] + [indexed_cram] diff --git a/definitions/subworkflows/strelka_and_post_processing.cwl b/definitions/subworkflows/strelka_and_post_processing.cwl index d877dfe0a..65c1d1a7c 100644 --- a/definitions/subworkflows/strelka_and_post_processing.cwl +++ b/definitions/subworkflows/strelka_and_post_processing.cwl @@ -9,12 +9,12 @@ requirements: - class: MultipleInputFeatureRequirement - class: StepInputExpressionRequirement inputs: - tumor_bam: + tumor_cram: type: File - secondaryFiles: [.bai,^.bai] - normal_bam: + secondaryFiles: [.crai,^.crai] + normal_cram: type: File - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] reference: type: - string @@ -44,8 +44,8 @@ steps: strelka: run: ../tools/strelka.cwl in: - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram reference: reference exome_mode: exome_mode cpu_reserved: cpu_reserved @@ -100,7 +100,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: region_filter/filtered_vcf variant_caller: valueFrom: "strelka" diff --git a/definitions/subworkflows/varscan.cwl b/definitions/subworkflows/varscan.cwl index 22d72a3a6..8df4ed5e6 100644 --- a/definitions/subworkflows/varscan.cwl +++ b/definitions/subworkflows/varscan.cwl @@ -9,12 +9,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai] - normal_bam: + secondaryFiles: [^.crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] roi_bed: type: File? strand_filter: @@ -75,8 +75,8 @@ steps: run: ../tools/varscan_somatic.cwl in: reference: reference - normal_bam: normal_bam - tumor_bam: tumor_bam + normal_cram: normal_cram + tumor_cram: tumor_cram roi_bed: roi_bed strand_filter: strand_filter min_coverage: min_coverage diff --git a/definitions/subworkflows/varscan_pre_and_post_processing.cwl b/definitions/subworkflows/varscan_pre_and_post_processing.cwl index 15e2b40d0..b4353b795 100644 --- a/definitions/subworkflows/varscan_pre_and_post_processing.cwl +++ b/definitions/subworkflows/varscan_pre_and_post_processing.cwl @@ -15,12 +15,12 @@ inputs: - string - File secondaryFiles: [.fai, ^.dict] - tumor_bam: + tumor_cram: type: File - secondaryFiles: [^.bai, .bai] - normal_bam: + secondaryFiles: [^.crai, .crai] + normal_cram: type: File - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] interval_list: type: File strand_filter: @@ -72,8 +72,8 @@ steps: run: varscan.cwl in: reference: reference - tumor_bam: tumor_bam - normal_bam: normal_bam + tumor_cram: tumor_cram + normal_cram: normal_cram roi_bed: intervals_to_bed/interval_bed strand_filter: strand_filter min_coverage: min_coverage @@ -188,7 +188,7 @@ steps: run: fp_filter.cwl in: reference: reference - bam: tumor_bam + cram: tumor_cram vcf: index/indexed_vcf min_var_freq: min_var_freq variant_caller: diff --git a/definitions/tools/apply_bqsr.cwl b/definitions/tools/apply_bqsr.cwl index f7f8df9d3..fb63b4b55 100644 --- a/definitions/tools/apply_bqsr.cwl +++ b/definitions/tools/apply_bqsr.cwl @@ -5,7 +5,7 @@ class: CommandLineTool label: 'apply BQSR' baseCommand: ["/gatk/gatk", "--java-options", "-Xmx16g", "ApplyBQSR"] arguments: - ["-O", { valueFrom: $(runtime.outdir)/$(inputs.output_name).bam }, + ["-O", { valueFrom: $(runtime.outdir)/$(inputs.output_name).cram }, "--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30" @@ -24,12 +24,12 @@ inputs: inputBinding: prefix: "-R" position: 1 - bam: + cram: type: File inputBinding: prefix: "-I" position: 2 - secondaryFiles: [.bai] + secondaryFiles: [.crai] bqsr_table: type: File inputBinding: @@ -39,8 +39,7 @@ inputs: type: string? default: 'final' outputs: - bqsr_bam: + bqsr_cram: type: File outputBinding: - glob: $(inputs.output_name).bam - secondaryFiles: [^.bai] + glob: $(inputs.output_name).cram diff --git a/definitions/tools/bam_readcount.cwl b/definitions/tools/bam_readcount.cwl index 8e6948fcc..d690db971 100644 --- a/definitions/tools/bam_readcount.cwl +++ b/definitions/tools/bam_readcount.cwl @@ -4,124 +4,14 @@ cwlVersion: v1.0 class: CommandLineTool label: "run bam-readcount" -baseCommand: ["/usr/bin/python", "bam_readcount_helper.py"] +baseCommand: ["python", "/usr/bin/bam_readcount_helper.py"] requirements: - class: ShellCommandRequirement - class: DockerRequirement - dockerPull: "mgibio/bam_readcount_helper-cwl:1.1.1" + dockerPull: "seqfu/bam_readcount_helper-cwl:1.1.1-samtools-1.10" - class: ResourceRequirement ramMin: 16000 - class: InlineJavascriptRequirement - - class: InitialWorkDirRequirement - listing: - - entryname: 'bam_readcount_helper.py' - entry: | - #!/usr/bin/env python - - import sys - import os - from cyvcf2 import VCF - import tempfile - import csv - from subprocess import Popen, PIPE - - def generate_region_list(hash): - fh = tempfile.NamedTemporaryFile('w', delete=False) - writer = csv.writer(fh, delimiter='\t') - for chr, positions in hash.items(): - for pos in sorted(positions.keys()): - writer.writerow([chr, pos, pos]) - fh.close() - return fh.name - - def filter_sites_in_hash(region_list, bam_file, ref_fasta, prefixed_sample, output_dir, insertion_centric, map_qual, base_qual): - bam_readcount_cmd = ['/usr/bin/bam-readcount', '-f', ref_fasta, '-l', region_list, '-w', '0', '-b', str(base_qual), '-q', str(map_qual)] - if insertion_centric: - bam_readcount_cmd.append('-i') - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_indel.tsv') - else: - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_snv.tsv') - bam_readcount_cmd.append(bam_file) - execution = Popen(bam_readcount_cmd, stdout=PIPE, stderr=PIPE) - stdout, stderr = execution.communicate() - if execution.returncode == 0: - with open(output_file, 'wb') as output_fh: - output_fh.write(stdout) - else: - sys.exit(stderr) - - #initializing these with default values - min_base_qual = 20 - min_mapping_qual = 0 - - if len(sys.argv) == 7: - (script_name, vcf_filename, sample, ref_fasta, bam_file, prefix, output_dir)= sys.argv - elif len(sys.argv) == 8: - (script_name, vcf_filename, sample, ref_fasta, bam_file, prefix, output_dir, min_base_qual)= sys.argv - elif len(sys.argv) == 9: #elif instead of else for explicit safety - (script_name, vcf_filename, sample, ref_fasta, bam_file, prefix, output_dir, min_base_qual, min_mapping_qual)= sys.argv - - if prefix == 'NOPREFIX': - prefixed_sample = sample - else: - prefixed_sample = '_'.join([prefix, sample]) - - vcf_file = VCF(vcf_filename) - sample_index = vcf_file.samples.index(sample) - - rc_for_indel = {} - rc_for_snp = {} - for variant in vcf_file: - ref = variant.REF - chr = variant.CHROM - start = variant.start - end = variant.end - pos = variant.POS - for var in variant.ALT: - if len(ref) > 1 or len(var) > 1: - #it's an indel or mnp - if len(ref) == len(var) or (len(ref) > 1 and len(var) > 1): - sys.stderr.write("Complex variant or MNP will be skipped: %s\t%s\t%s\t%s\n" % (chr, pos, ref , var)) - continue - elif len(ref) > len(var): - #it's a deletion - pos += 1 - unmodified_ref = ref - ref = unmodified_ref[1] - var = "-%s" % unmodified_ref[1:] - else: - #it's an insertion - var = "+%s" % var[1:] - if chr not in rc_for_indel: - rc_for_indel[chr] = {} - if pos not in rc_for_indel[chr]: - rc_for_indel[chr][pos] = {} - if ref not in rc_for_indel[chr][pos]: - rc_for_indel[chr][pos][ref] = {} - rc_for_indel[chr][pos][ref] = variant - else: - #it's a SNP - if chr not in rc_for_snp: - rc_for_snp[chr] = {} - if pos not in rc_for_snp[chr]: - rc_for_snp[chr][pos] = {} - if ref not in rc_for_snp[chr][pos]: - rc_for_snp[chr][pos][ref] = {} - rc_for_snp[chr][pos][ref] = variant - - if len(rc_for_snp.keys()) > 0: - region_file = generate_region_list(rc_for_snp) - filter_sites_in_hash(region_file, bam_file, ref_fasta, prefixed_sample, output_dir, False, min_mapping_qual, min_base_qual) - else: - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_snv.tsv') - open(output_file, 'w').close() - - if len(rc_for_indel.keys()) > 0: - region_file = generate_region_list(rc_for_indel) - filter_sites_in_hash(region_file, bam_file, ref_fasta, prefixed_sample, output_dir, True, min_mapping_qual, min_base_qual) - else: - output_file = os.path.join(output_dir, prefixed_sample + '_bam_readcount_indel.tsv') - open(output_file, 'w').close() arguments: [ { valueFrom: $(runtime.outdir), position: -3 } @@ -143,11 +33,11 @@ inputs: secondaryFiles: [.fai, ^.dict] inputBinding: position: -6 - bam: + cram: type: File inputBinding: position: -5 - secondaryFiles: [.bai] + secondaryFiles: [.crai] prefix: type: string? default: 'NOPREFIX' diff --git a/definitions/tools/bqsr.cwl b/definitions/tools/bqsr.cwl index ec2704e2e..0cfa7a902 100644 --- a/definitions/tools/bqsr.cwl +++ b/definitions/tools/bqsr.cwl @@ -21,12 +21,12 @@ inputs: inputBinding: prefix: "-R" position: 2 - bam: + cram: type: File inputBinding: prefix: "-I" position: 3 - secondaryFiles: [.bai] + secondaryFiles: [.crai] known_sites: type: type: array diff --git a/definitions/tools/cnvkit_batch.cwl b/definitions/tools/cnvkit_batch.cwl index d8118d171..25c930916 100644 --- a/definitions/tools/cnvkit_batch.cwl +++ b/definitions/tools/cnvkit_batch.cwl @@ -2,12 +2,12 @@ cwlVersion: v1.0 class: CommandLineTool -baseCommand: ["/usr/bin/python", "/usr/local/bin/cnvkit.py", "batch"] +baseCommand: ["/usr/bin/python3", "/git/cnvkit/cnvkit.py", "batch"] requirements: - class: ShellCommandRequirement - class: DockerRequirement - dockerPull: "etal/cnvkit:0.9.5" + dockerPull: "johnegarza/cnvkit:0.9.8" - class: ResourceRequirement coresMin: 1 ramMin: 4000 @@ -15,7 +15,7 @@ requirements: - class: InlineJavascriptRequirement arguments: [{ valueFrom: "$((inputs.reference.hasOwnProperty('cnn_file'))? null : '--normal')" }] inputs: - tumor_bam: + tumor_cram: type: File inputBinding: position: -1 @@ -45,11 +45,11 @@ inputs: inputBinding: position: 2 prefix: "--fasta" - normal_bam: + normal_cram: type: File? inputBinding: position: 1 - doc: "Normal samples (.bam) used to construct the pooled, paired, or flat reference. If this option is used but no filenames are given, a 'flat' reference will be built. Otherwise, all filenames following this option will be used." + doc: "Normal samples (.cram) used to construct the pooled, paired, or flat reference. If this option is used but no filenames are given, a 'flat' reference will be built. Otherwise, all filenames following this option will be used." access: type: File? inputBinding: @@ -125,8 +125,8 @@ outputs: glob: | ${ var glob_base = ".antitargetcoverage.cnn"; - if (inputs.normal_bam) { - glob_base = inputs.normal_bam.nameroot + glob_base; + if (inputs.normal_cram) { + glob_base = inputs.normal_cram.nameroot + glob_base; } return glob_base; } @@ -136,8 +136,8 @@ outputs: glob: | ${ var glob_base = ".targetcoverage.cnn"; - if (inputs.normal_bam) { - glob_base = inputs.normal_bam.nameroot + glob_base; + if (inputs.normal_cram) { + glob_base = inputs.normal_cram.nameroot + glob_base; } return glob_base; } @@ -148,24 +148,24 @@ outputs: cn_diagram: type: File? outputBinding: - glob: $(inputs.tumor_bam.nameroot)-diagram.pdf + glob: $(inputs.tumor_cram.nameroot)-diagram.pdf cn_scatter_plot: type: File? outputBinding: - glob: $(inputs.tumor_bam.nameroot)-scatter.pdf + glob: $(inputs.tumor_cram.nameroot)-scatter.pdf tumor_antitarget_coverage: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).antitargetcoverage.cnn + glob: $(inputs.tumor_cram.nameroot).antitargetcoverage.cnn tumor_target_coverage: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).targetcoverage.cnn + glob: $(inputs.tumor_cram.nameroot).targetcoverage.cnn tumor_bin_level_ratios: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).cnr + glob: $(inputs.tumor_cram.nameroot).cnr tumor_segmented_ratios: type: File outputBinding: - glob: $(inputs.tumor_bam.nameroot).cns + glob: $(inputs.tumor_cram.nameroot).cns diff --git a/definitions/tools/collect_alignment_summary_metrics.cwl b/definitions/tools/collect_alignment_summary_metrics.cwl index d3b2c5d46..064d65b6a 100644 --- a/definitions/tools/collect_alignment_summary_metrics.cwl +++ b/definitions/tools/collect_alignment_summary_metrics.cwl @@ -5,18 +5,18 @@ class: CommandLineTool label: "collect alignment summary metrics" baseCommand: ["/usr/bin/java", "-Xmx16g", "-jar", "/usr/picard/picard.jar", "CollectAlignmentSummaryMetrics"] arguments: - ["OUTPUT=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).AlignmentSummaryMetrics.txt }] + ["OUTPUT=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).AlignmentSummaryMetrics.txt }] requirements: - class: ResourceRequirement ramMin: 18000 - class: DockerRequirement dockerPull: "broadinstitute/picard:2.23.6" inputs: - bam: + cram: type: File inputBinding: prefix: "INPUT=" - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -32,4 +32,4 @@ outputs: alignment_summary_metrics: type: File outputBinding: - glob: "$(inputs.bam.nameroot).AlignmentSummaryMetrics.txt" + glob: "$(inputs.cram.nameroot).AlignmentSummaryMetrics.txt" diff --git a/definitions/tools/collect_hs_metrics.cwl b/definitions/tools/collect_hs_metrics.cwl index 73b427798..b2343ef34 100644 --- a/definitions/tools/collect_hs_metrics.cwl +++ b/definitions/tools/collect_hs_metrics.cwl @@ -5,7 +5,7 @@ class: CommandLineTool label: "collect HS metrics" baseCommand: ["/usr/bin/java", "-Xmx48g", "-jar", "/usr/picard/picard.jar", "CollectHsMetrics"] arguments: - ["O=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).$(inputs.output_prefix)-HsMetrics.txt }] + ["O=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).$(inputs.output_prefix)-HsMetrics.txt }] requirements: - class: ResourceRequirement ramMin: 52000 @@ -14,11 +14,11 @@ requirements: dockerPull: "broadinstitute/picard:2.23.6" - class: StepInputExpressionRequirement inputs: - bam: + cram: type: File inputBinding: prefix: "I=" - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -45,7 +45,7 @@ inputs: valueFrom: | ${ if(self) { - return inputs.bam.nameroot + "." + inputs.output_prefix + "-PerTargetCoverage.txt" + return inputs.cram.nameroot + "." + inputs.output_prefix + "-PerTargetCoverage.txt" } else { return false; } @@ -57,7 +57,7 @@ inputs: valueFrom: | ${ if(self) { - return inputs.bam.nameroot + "." + inputs.output_prefix + "-PerBaseCoverage.txt" + return inputs.cram.nameroot + "." + inputs.output_prefix + "-PerBaseCoverage.txt" } else { return false; } @@ -79,12 +79,12 @@ outputs: hs_metrics: type: File outputBinding: - glob: "$(inputs.bam.nameroot).$(inputs.output_prefix)-HsMetrics.txt" + glob: "$(inputs.cram.nameroot).$(inputs.output_prefix)-HsMetrics.txt" per_target_coverage_metrics: type: File? outputBinding: - glob: "$(inputs.bam.nameroot).$(inputs.output_prefix)-PerTargetCoverage.txt" + glob: "$(inputs.cram.nameroot).$(inputs.output_prefix)-PerTargetCoverage.txt" per_base_coverage_metrics: type: File? outputBinding: - glob: "$(inputs.bam.nameroot).$(inputs.output_prefix)-PerBaseCoverage.txt" + glob: "$(inputs.cram.nameroot).$(inputs.output_prefix)-PerBaseCoverage.txt" diff --git a/definitions/tools/collect_insert_size_metrics.cwl b/definitions/tools/collect_insert_size_metrics.cwl index de2da933f..2f0335eda 100644 --- a/definitions/tools/collect_insert_size_metrics.cwl +++ b/definitions/tools/collect_insert_size_metrics.cwl @@ -5,19 +5,19 @@ class: CommandLineTool label: "collect insert size metrics" baseCommand: ["/usr/bin/java", "-Xmx16g", "-jar", "/usr/picard/picard.jar", "CollectInsertSizeMetrics"] arguments: - ["O=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).InsertSizeMetrics.txt }, - "H=", { valueFrom: $(runtime.outdir)/$(inputs.bam.nameroot).InsertSizeHistogram.pdf }] + ["O=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).InsertSizeMetrics.txt }, + "H=", { valueFrom: $(runtime.outdir)/$(inputs.cram.nameroot).InsertSizeHistogram.pdf }] requirements: - class: ResourceRequirement ramMin: 18000 - class: DockerRequirement dockerPull: "broadinstitute/picard:2.23.6" inputs: - bam: + cram: type: File inputBinding: prefix: "I=" - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string @@ -33,8 +33,8 @@ outputs: insert_size_metrics: type: File outputBinding: - glob: "$(inputs.bam.nameroot).InsertSizeMetrics.txt" + glob: "$(inputs.cram.nameroot).InsertSizeMetrics.txt" insert_size_histogram: type: File outputBinding: - glob: "$(inputs.bam.nameroot).InsertSizeHistogram.pdf" + glob: "$(inputs.cram.nameroot).InsertSizeHistogram.pdf" diff --git a/definitions/tools/concordance.cwl b/definitions/tools/concordance.cwl index 7f03cf1bd..591bfab9c 100644 --- a/definitions/tools/concordance.cwl +++ b/definitions/tools/concordance.cwl @@ -26,21 +26,21 @@ inputs: inputBinding: prefix: "-f" position: 2 - bam_1: + cram_1: type: File inputBinding: position: 3 - secondaryFiles: [.bai] - bam_2: + secondaryFiles: [.crai] + cram_2: type: File inputBinding: position: 4 - secondaryFiles: [.bai] - bam_3: + secondaryFiles: [.crai] + cram_3: type: File? inputBinding: position: 5 - secondaryFiles: [.bai] + secondaryFiles: [.crai] outputs: somalier_pairs: type: File diff --git a/definitions/tools/docm_gatk_haplotype_caller.cwl b/definitions/tools/docm_gatk_haplotype_caller.cwl index 0eb3cf5e5..599660e52 100644 --- a/definitions/tools/docm_gatk_haplotype_caller.cwl +++ b/definitions/tools/docm_gatk_haplotype_caller.cwl @@ -17,26 +17,26 @@ requirements: set -o errexit # Running haplotype caller using the newly created interval list - if [[ "$#" == 5 ]];then # If normal_bam is passed. + if [[ "$#" == 5 ]];then # If normal_cram is passed. # explicitly capturing variables reference=$1 - normal_bam=$2 - tumor_bam=$3 + normal_cram=$2 + tumor_cram=$3 docm_vcf=$4 interval_list=$5 # Chaning the interval_list to a new docm_interval_list that spans the docm regions by 200bp cat $interval_list | grep '^@' > docm.interval_list # Extracting the header from the interval_list zcat $docm_vcf | grep ^chr | awk '{FS = "\t";OFS = "\t";print $1,$2-100,$2+100,"+",$1"_"$2-100"_"$2+100}' >> docm.interval_list # Extracting the docm regions with a 100bp flanking region on both directions - /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $normal_bam -I $tumor_bam --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf - else # If normal_bam is not passed + /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $normal_cram -I $tumor_cram --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf + else # If normal_cram is not passed reference=$1 - tumor_bam=$2 + tumor_cram=$2 docm_vcf=$3 interval_list=$4 # Chaning the interval_list to a new docm_interval_list that spans the docm regions by 200bp cat $interval_list | grep '^@' > docm.interval_list # Extracting the header from the interval_list zcat $docm_vcf | grep ^chr | awk '{FS = "\t";OFS = "\t";print $1,$2-100,$2+100,"+",$1"_"$2-100"_"$2+100}' >> docm.interval_list # Extracting the docm regions with a 100bp flanking region on both directions - /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $tumor_bam --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf + /gatk/gatk HaplotypeCaller --java-options "-Xmx8g" -R $reference -I $tumor_cram --alleles $docm_vcf -L docm.interval_list --genotyping-mode GENOTYPE_GIVEN_ALLELES -O docm_raw_variants.vcf fi inputs: @@ -47,16 +47,16 @@ inputs: secondaryFiles: [.fai, ^.dict] inputBinding: position: 1 - normal_bam: + normal_cram: type: File? inputBinding: position: 2 - secondaryFiles: [^.bai] - bam: + secondaryFiles: [^.crai] + cram: type: File inputBinding: position: 3 - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] docm_vcf: type: File inputBinding: diff --git a/definitions/tools/filter_vcf_docm.cwl b/definitions/tools/filter_vcf_docm.cwl index 84911a700..8ec453687 100644 --- a/definitions/tools/filter_vcf_docm.cwl +++ b/definitions/tools/filter_vcf_docm.cwl @@ -107,11 +107,11 @@ inputs: type: File inputBinding: position: -4 - normal_bam: + normal_cram: type: File inputBinding: position: -3 - tumor_bam: + tumor_cram: type: File inputBinding: position: -2 diff --git a/definitions/tools/filter_vcf_mapq0.cwl b/definitions/tools/filter_vcf_mapq0.cwl index 964b5b83a..de84b5650 100644 --- a/definitions/tools/filter_vcf_mapq0.cwl +++ b/definitions/tools/filter_vcf_mapq0.cwl @@ -5,7 +5,7 @@ class: CommandLineTool label: "filter vcf for variants with high percentage of mapq0 reads" requirements: - class: DockerRequirement - dockerPull: mgibio/mapq0-filter:v0.3.1 + dockerPull: mgibio/mapq0-filter:v0.4.1 - class: ResourceRequirement ramMin: 8000 tmpdirMin: 10000 @@ -17,11 +17,11 @@ inputs: type: File inputBinding: position: 1 - tumor_bam: + tumor_cram: type: File inputBinding: position: 2 - secondaryFiles: [.bai] + secondaryFiles: [.crai] reference: type: - string diff --git a/definitions/tools/mark_duplicates_and_sort.cwl b/definitions/tools/mark_duplicates_and_sort.cwl index 79097442c..811d8618a 100644 --- a/definitions/tools/mark_duplicates_and_sort.cwl +++ b/definitions/tools/mark_duplicates_and_sort.cwl @@ -10,7 +10,7 @@ requirements: coresMin: 8 ramMin: 40000 - class: DockerRequirement - dockerPull: "mgibio/mark_duplicates-cwl:1.0.1" + dockerPull: "mgibio/mark_duplicates-cwl:2.0.0" - class: InitialWorkDirRequirement listing: - entryname: 'markduplicates_helper.sh' @@ -18,20 +18,15 @@ requirements: set -o pipefail set -o errexit - declare MD_BARCODE_TAG - if [ ! -z "$6" ]; then - MD_BARCODE_TAG="BARCODE_TAG=$6" - /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT "$MD_BARCODE_TAG" | /usr/bin/sambamba sort -t $2 -m 18G -o $3 /dev/stdin - else - /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT | /usr/bin/sambamba sort -t $2 -m 18G -o $3 /dev/stdin - fi + /usr/bin/java -Xmx16g -jar /opt/picard/picard.jar MarkDuplicates I=$1 O=/dev/stdout ASSUME_SORT_ORDER=$5 METRICS_FILE=$4 QUIET=true COMPRESSION_LEVEL=0 VALIDATION_STRINGENCY=LENIENT REFERENCE_SEQUENCE=$6 | /opt/samtools/bin/samtools sort -@ $2 -m 4G --reference "$6" -o "$3" -O cram /dev/stdin && /opt/samtools/bin/samtools index "$3" + arguments: - position: 2 valueFrom: "$(runtime.cores)" - position: 4 - valueFrom: "$(inputs.bam.nameroot).mark_dups_metrics.txt" + valueFrom: "$(inputs.cram.nameroot).mark_dups_metrics.txt" inputs: - bam: + cram: type: File inputBinding: position: 1 @@ -42,17 +37,24 @@ inputs: position: 5 output_name: type: string? - default: 'MarkedSorted.bam' + default: 'MarkedSorted.cram' inputBinding: position: 3 + reference: + type: + - string + - File + secondaryFiles: [.fai] + inputBinding: + position: 6 outputs: - sorted_bam: + sorted_cram: type: File outputBinding: glob: $(inputs.output_name) - secondaryFiles: [.bai] + secondaryFiles: [.crai] metrics_file: type: File outputBinding: - glob: "$(inputs.bam.nameroot).mark_dups_metrics.txt" + glob: "$(inputs.cram.nameroot).mark_dups_metrics.txt" diff --git a/definitions/tools/merge_crams.cwl b/definitions/tools/merge_crams.cwl new file mode 100644 index 000000000..6c087e3a5 --- /dev/null +++ b/definitions/tools/merge_crams.cwl @@ -0,0 +1,34 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "Samtools: merge" +baseCommand: ["/opt/samtools/bin/samtools", "merge"] +requirements: + - class: ResourceRequirement + ramMin: 8000 + coresMin: 4 + - class: DockerRequirement + dockerPull: "mgibio/samtools-cwl:1.0.0" +arguments: ["$(inputs.name).merged.cram", { prefix: "--threads", valueFrom: $(runtime.cores) }, { prefix: '-O', valueFrom: "CRAM"}] +inputs: + crams: + type: File[] + inputBinding: + position: 1 + name: + type: string + reference: + type: + - string + - File + inputBinding: + position: 2 + prefix: '--reference' + +outputs: + merged_cram: + type: File + outputBinding: + glob: "$(inputs.name).merged.cram" + diff --git a/definitions/tools/mutect.cwl b/definitions/tools/mutect.cwl index 999643417..4aeb883c9 100644 --- a/definitions/tools/mutect.cwl +++ b/definitions/tools/mutect.cwl @@ -19,12 +19,7 @@ requirements: set -o pipefail set -o errexit - export tumor_bam="$3" - export normal_bam="$4" - - TUMOR=`perl -e 'my $header_str = qx(samtools view -H $ENV{tumor_bam}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the TUMOR bam. - NORMAL=`perl -e 'my $header_str = qx(samtools view -H $ENV{normal_bam}); my ($sample_name) = $header_str =~ /SM:([ -~]+)/; print $sample_name'` #Extracting the sample name from the NORMAL bam. - /gatk/gatk Mutect2 --java-options "-Xmx20g" -O $1 -R $2 -I $3 -tumor "$TUMOR" -I $4 -normal "$NORMAL" -L $5 #Running Mutect2. + /gatk/gatk Mutect2 --java-options "-Xmx20g" -O $1 -R $2 -I $3 -tumor "$6" -I $4 -normal "$7" -L $5 #Running Mutect2. /gatk/gatk FilterMutectCalls -R $2 -V mutect.vcf.gz -O mutect.filtered.vcf.gz #Running FilterMutectCalls on the output vcf. arguments: @@ -39,20 +34,28 @@ inputs: secondaryFiles: [.fai, ^.dict] inputBinding: position: 2 - tumor_bam: + tumor_cram: type: File inputBinding: position: 3 - secondaryFiles: [.bai] - normal_bam: + secondaryFiles: [.crai] + normal_cram: type: File? inputBinding: position: 4 - secondaryFiles: [.bai] + secondaryFiles: [.crai] interval_list: type: File inputBinding: position: 5 + tumor_sample_name: + type: string + inputBinding: + position: 6 + normal_sample_name: + type: string + inputBinding: + position: 7 outputs: vcf: diff --git a/definitions/tools/name_sort_samtools.cwl b/definitions/tools/name_sort_samtools.cwl new file mode 100644 index 000000000..c59ed058c --- /dev/null +++ b/definitions/tools/name_sort_samtools.cwl @@ -0,0 +1,38 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: ["/opt/samtools/bin/samtools", "sort", "-n"] +requirements: + - class: ResourceRequirement + ramMin: 26000 + coresMin: 8 + - class: DockerRequirement + dockerPull: "mgibio/samtools-cwl:1.0.0" + +arguments: + - prefix: -o + valueFrom: $(inputs.cram.nameroot).NameSorted.cram + - prefix: -@ + valueFrom: $(runtime.cores) + +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai] + inputBinding: + position: 1 + prefix: '--reference' + cram: + type: File + inputBinding: + position: 2 + +outputs: + name_sorted_cram: + type: File + outputBinding: + glob: $(inputs.cram.nameroot).NameSorted.cram + diff --git a/definitions/tools/samtools_flagstat.cwl b/definitions/tools/samtools_flagstat.cwl index a70931de9..743de907d 100644 --- a/definitions/tools/samtools_flagstat.cwl +++ b/definitions/tools/samtools_flagstat.cwl @@ -9,13 +9,13 @@ requirements: ramMin: 4000 - class: DockerRequirement dockerPull: "mgibio/samtools-cwl:1.0.0" -stdout: "$(inputs.bam.basename).flagstat" +stdout: "$(inputs.cram.basename).flagstat" inputs: - bam: + cram: type: File inputBinding: position: 1 - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] outputs: flagstats: type: stdout diff --git a/definitions/tools/sequence_align_and_tag.cwl b/definitions/tools/sequence_align_and_tag.cwl index 5a94f3610..eb8e1de93 100644 --- a/definitions/tools/sequence_align_and_tag.cwl +++ b/definitions/tools/sequence_align_and_tag.cwl @@ -62,22 +62,22 @@ requirements: if [[ "$MODE" == 'fastq' ]]; then if [[ "$RUN_TRIMMING" == 'false' ]]; then - /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -R "$READGROUP" "$REFERENCE" "$FASTQ1" "$FASTQ2" | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -R "$READGROUP" "$REFERENCE" "$FASTQ1" "$FASTQ2" | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -C -T "$REFERENCE" -S /dev/stdin else /opt/flexbar/flexbar --adapters "$TRIMMING_ADAPTERS" --reads "$FASTQ1" --reads2 "$FASTQ2" --adapter-trim-end LTAIL --adapter-min-overlap "$TRIMMING_ADAPTER_MIN_OVERLAP" --adapter-error-rate 0.1 --max-uncalled 300 --stdout-reads \ - | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -C -T "$REFERENCE" -S /dev/stdin fi fi if [[ "$MODE" == 'bam' ]]; then if [[ "$RUN_TRIMMING" == 'false' ]]; then - /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -C -T "$REFERENCE" -S /dev/stdin else /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout \ | /opt/flexbar/flexbar --adapters "$TRIMMING_ADAPTERS" --reads - --interleaved --adapter-trim-end LTAIL --adapter-min-overlap "$TRIMMING_ADAPTER_MIN_OVERLAP" --adapter-error-rate 0.1 --max-uncalled 300 --stdout-reads \ - | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -b -S /dev/stdin + | /usr/local/bin/bwa mem -K 100000000 -t "$NTHREADS" -Y -p -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/local/bin/samblaster -a --addMateTags | /opt/samtools/bin/samtools view -C -T "$REFERENCE" -S /dev/stdin fi fi -stdout: "refAlign.bam" +stdout: "refAlign.cram" arguments: - valueFrom: $(runtime.cores) position: 5 @@ -115,5 +115,5 @@ inputs: inputBinding: valueFrom: $( ['-t', self.adapters.path, '-o', self.min_overlap] ) outputs: - aligned_bam: + aligned_cram: type: stdout diff --git a/definitions/tools/strelka.cwl b/definitions/tools/strelka.cwl index 51603b1d5..3728d2d70 100644 --- a/definitions/tools/strelka.cwl +++ b/definitions/tools/strelka.cwl @@ -14,20 +14,20 @@ arguments: [ { valueFrom: $(inputs.cpu_reserved), position: 1 }, { valueFrom: $(runtime.outdir), position: 2 }] inputs: - tumor_bam: + tumor_cram: type: File inputBinding: prefix: '--tumorBam=' separate: false position: 3 - secondaryFiles: [.bai,^.bai] - normal_bam: + secondaryFiles: [.crai,^.crai] + normal_cram: type: File inputBinding: prefix: '--normalBam=' separate: false position: 4 - secondaryFiles: [.bai,^.bai] + secondaryFiles: [.crai,^.crai] reference: type: - string diff --git a/definitions/tools/varscan_somatic.cwl b/definitions/tools/varscan_somatic.cwl index 6aadf6554..ff98fe7d8 100644 --- a/definitions/tools/varscan_somatic.cwl +++ b/definitions/tools/varscan_somatic.cwl @@ -21,12 +21,12 @@ requirements: if [ $# -lt 7 ] then - echo "Usage: $0 [TUMOR_BAM] [NORMAL_BAM] [REFERENCE] [STRAND_FILTER] [MIN_COVERAGE] [MIN_VAR_FREQ] [P_VALUE] [roi_bed?]" + echo "Usage: $0 [TUMOR_CRAM] [NORMAL_CRAM] [REFERENCE] [STRAND_FILTER] [MIN_COVERAGE] [MIN_VAR_FREQ] [P_VALUE] [roi_bed?]" exit 1 fi - TUMOR_BAM="$1" - NORMAL_BAM="$2" + TUMOR_CRAM="$1" + NORMAL_CRAM="$2" REFERENCE="$3" STRAND_FILTER="$4" MIN_COVERAGE="$5" @@ -38,7 +38,7 @@ requirements: then #run without ROI java -jar /opt/varscan/VarScan.jar somatic \ - <(/opt/samtools/bin/samtools mpileup --no-baq -f "$REFERENCE" "$NORMAL_BAM" "$TUMOR_BAM") \ + <(/opt/samtools/bin/samtools mpileup --no-baq -f "$REFERENCE" "$NORMAL_CRAM" "$TUMOR_CRAM") \ $OUTPUT \ --strand-filter $STRAND_FILTER \ --min-coverage $MIN_COVERAGE \ @@ -49,7 +49,7 @@ requirements: else ROI_BED="$8" java -jar /opt/varscan/VarScan.jar somatic \ - <(/opt/samtools/bin/samtools mpileup --no-baq -l "$ROI_BED" -f "$REFERENCE" "$NORMAL_BAM" "$TUMOR_BAM") \ + <(/opt/samtools/bin/samtools mpileup --no-baq -l "$ROI_BED" -f "$REFERENCE" "$NORMAL_CRAM" "$TUMOR_CRAM") \ $OUTPUT \ --strand-filter $STRAND_FILTER \ --min-coverage $MIN_COVERAGE \ @@ -60,16 +60,16 @@ requirements: fi inputs: - tumor_bam: + tumor_cram: type: File inputBinding: position: 1 - secondaryFiles: [^.bai] - normal_bam: + secondaryFiles: [^.crai] + normal_cram: type: File inputBinding: position: 2 - secondaryFiles: [^.bai] + secondaryFiles: [^.crai] reference: type: - string