From 74f50a628b2094826397bfbd160a12ebe1f15681 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Sat, 31 Jan 2026 11:17:26 +0100 Subject: [PATCH 01/11] remove computation of simple mutdensity_adjusted - omega not working --- bin/compute_mutdensity.py | 43 ++++++++++++++------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/bin/compute_mutdensity.py b/bin/compute_mutdensity.py index 880138e7..ab77258e 100755 --- a/bin/compute_mutdensity.py +++ b/bin/compute_mutdensity.py @@ -19,7 +19,7 @@ MUTDENSITY_IMPACT_GROUPS = [False, ["SNV"] , ["INSERTION", "DELETION"], ["SNV", "INSERTION", "DELETION"]] -def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name): +def mutdensity_sample(maf_df, depths_df, sample_name): """ Computes a sample's global mutation density. Returns the mutation density per Mb, non-adjusted and adjusted by panel @@ -29,8 +29,7 @@ def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name): impact_group_results = list() # mutation density depth information - sample_features_depth = {"DEPTH" : depths_df.drop_duplicates(subset = ["CHROM", "POS"])[f"{sample_name}"].sum(), - "DEPTH_ADJUSTED": depths_adj_df[f"{sample_name}"].sum() + sample_features_depth = {"DEPTH" : depths_df.drop_duplicates(subset = ["CHROM", "POS"])[f"{sample_name}"].sum() } for type_list in MUTDENSITY_IMPACT_GROUPS: @@ -55,9 +54,7 @@ def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name): sample_features["N_MUTATED"] = n_mutated_reads sample_features["MUTDENSITY_MB"] = ( sample_features["N_MUTS"] / sample_features["DEPTH"] * 1000000 ).astype(float) - sample_features["MUTDENSITY_MB_ADJUSTED"] = ( sample_features["N_MUTS"] / sample_features["DEPTH_ADJUSTED"] * 1000000 ).astype(float) sample_features["MUTREADSDENSITY_MB"] = ( sample_features["N_MUTATED"] / sample_features["DEPTH"] * 1000000 ).astype(float) - sample_features["MUTREADSDENSITY_MB_ADJUSTED"] = ( sample_features["N_MUTATED"] / sample_features["DEPTH_ADJUSTED"] * 1000000 ).astype(float) sample_features["GENE"] = "ALL_GENES" sample_features["MUTTYPES"] = types_included @@ -70,7 +67,7 @@ def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name): return mutdensity_sample -def mutdensity_gene(maf_df, depths_df, depths_adj_df, sample_name): +def mutdensity_gene(maf_df, depths_df, sample_name): """ Computes each gene mutation density. Returns the mutation density both per Mb and Kb sequenced, both non-adjusted and adjusted by panel @@ -101,21 +98,16 @@ def mutdensity_gene(maf_df, depths_df, depths_adj_df, sample_name): depths_gene_df = depths_df.groupby("GENE").agg({f"{sample_name}" : "sum" }) depths_gene_df.columns = ["DEPTH"] - depths_adj_gene_df = depths_adj_df.groupby("GENE").agg({f"{sample_name}" : "sum" }) - depths_adj_gene_df.columns = ["DEPTH_ADJUSTED"] mut_rate_mut_reads_df = n_muts_gene.merge(n_mutated_reads, on = "GENE") - depths_depthsadj_gene_df = depths_gene_df.merge(depths_adj_gene_df, on = "GENE") + ## merge so that mutation density is computed although the number of mutations is NA (meaning, zero) - mut_depths_df = depths_depthsadj_gene_df.merge(mut_rate_mut_reads_df, on = "GENE", how = 'left') - mut_depths_df = mut_depths_df.fillna(0) # I think this is not needed + mut_depths_df = depths_gene_df.merge(mut_rate_mut_reads_df, on = "GENE", how = 'left') + mut_depths_df = mut_depths_df.fillna(0) # mutation density metrics mut_depths_df["MUTDENSITY_MB"] = (mut_depths_df["N_MUTS"] / mut_depths_df["DEPTH"] * 1000000).astype(float) - mut_depths_df["MUTDENSITY_MB_ADJUSTED"] = (mut_depths_df["N_MUTS"] / mut_depths_df["DEPTH_ADJUSTED"] * 1000000).astype(float) - mut_depths_df["MUTREADSDENSITY_MB"] = (mut_depths_df["N_MUTATED"] / mut_depths_df["DEPTH"] * 1000000).astype(float) - mut_depths_df["MUTREADSDENSITY_MB_ADJUSTED"] = (mut_depths_df["N_MUTATED"] / mut_depths_df["DEPTH_ADJUSTED"] * 1000000).astype(float) mut_depths_df["MUTTYPES"] = types_included impact_group_results.append(mut_depths_df.reset_index()) @@ -137,25 +129,21 @@ def load_n_process_inputs(maf_path, depths_path, annot_panel_path, sample_name): ## mode 1: each position counts one (once per gene, be careful that it might be duplicated in different genes) depths_subset_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]].drop_duplicates(), on = ["CHROM", "POS"], how = "inner") - ## mode 2 (adjusted): each position counts as many times it contributes to the panel - depths_df[sample_name] = depths_df[sample_name] / 3 # the depth per position can contribute to three different mutations - depths_subset_adj_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]], on = ["CHROM", "POS"], how = "inner") - - ## mode 3 (adjusted): each position counts as many times it contributes to the panel, but ONLY ONCE PER SAMPLE - depths_subset_adj_sample_df = depths_df.merge(annot_panel_df.drop_duplicates(subset = ["CHROM", "POS", "REF", "ALT"])[["CHROM", "POS"]], - on = ["CHROM", "POS"], how = "inner") # Add domains and exons to maf_df annot_panel_df['CHROM_POS'] = annot_panel_df['CHROM'].astype(str) + ':' + annot_panel_df['POS'].astype(str) maf_df_raw['CHROM_POS'] = maf_df_raw['MUT_ID'].str.split('_', expand = True)[0] - maf_df = maf_df_raw.merge(annot_panel_df[['CHROM_POS', 'GENE']], on = ['CHROM_POS'], how = 'left', suffixes=['','_subgenic']).reset_index(drop=True) + maf_df = maf_df_raw.merge(annot_panel_df[['CHROM_POS', 'GENE']], + on = ['CHROM_POS'], how = 'left', + suffixes=['','_subgenic']).reset_index(drop=True) + maf_df = maf_df.drop(columns = ['GENE', 'CHROM_POS']) maf_df = maf_df.rename(columns={ 'GENE_subgenic' : 'GENE'}) maf_df = maf_df.drop_duplicates() - return maf_df, depths_subset_df, depths_subset_adj_df, depths_subset_adj_sample_df + return maf_df, depths_subset_df # -- Main function -- # @@ -166,14 +154,14 @@ def compute_mutdensity(maf_path, depths_path, annot_panel_path, sample_name, pan the panel composition. It saves the results to a TSV file. """ - maf_df, depths_subset_df, depths_subset_adj_df, depths_subset_adj_sample_df = load_n_process_inputs(maf_path, depths_path, annot_panel_path, sample_name) + maf_df, depths_subset_df = load_n_process_inputs(maf_path, depths_path, annot_panel_path, sample_name) # Compute mutation densities ## sample mutation density - mutdensity_sample_df = mutdensity_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name) + mutdensity_sample_df = mutdensity_sample(maf_df, depths_subset_df, sample_name) ## per gene mutation density - mutdensity_genes_df = mutdensity_gene(maf_df, depths_subset_df, depths_subset_adj_df, sample_name) + mutdensity_genes_df = mutdensity_gene(maf_df, depths_subset_df, sample_name) mutdensity_df = pd.concat([mutdensity_sample_df, mutdensity_genes_df]) @@ -184,8 +172,7 @@ def compute_mutdensity(maf_path, depths_path, annot_panel_path, sample_name, pan mutdensity_df[["SAMPLE_ID", "GENE", "REGIONS", "MUTTYPES", "DEPTH", "N_MUTS", "N_MUTATED", - "MUTDENSITY_MB", "MUTDENSITY_MB_ADJUSTED", - "MUTREADSDENSITY_MB", "MUTREADSDENSITY_MB_ADJUSTED", + "MUTDENSITY_MB", "MUTREADSDENSITY_MB" ]].to_csv(f"{sample_name}.{panel_v}.mutdensities.tsv", sep = "\t", header = True, From e7ff64cf2553466870357fa486d0740f3688bfa2 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Sat, 31 Jan 2026 11:29:05 +0100 Subject: [PATCH 02/11] provide adjusted mutation density to omega - not tested, but should work - NOT CORRECT --- bin/omega_select_mutdensity.py | 23 +++++++++----- workflows/deepcsa.nf | 58 +++++++++++++++++----------------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/bin/omega_select_mutdensity.py b/bin/omega_select_mutdensity.py index 912afbbf..fd299d6a 100755 --- a/bin/omega_select_mutdensity.py +++ b/bin/omega_select_mutdensity.py @@ -8,19 +8,26 @@ def select_syn_mutdensity(mutdensity_file, output_file, mode): """ - INFO + This function selects the synonymous mutation densities for all genes + from the mutation density file of all samples. + + right now the use of mode is not implemented, + since we only compute one type of synonymous mutation densities. """ mutdensity_df = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values) - synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["MUTTYPES"] == "SNV") & - (mutdensity_df["GENE"] != "ALL_GENES") & - ~(mutdensity_df["GENE"].str.contains("--"))].reset_index(drop = True) + synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples') & + ~(mutdensity_df["GENE"].str.contains("--")) + ]["synonymous"].reset_index(drop = True) - if mode == 'mutations': - synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'MUTDENSITY_MB_ADJUSTED']] - elif mode == 'mutated_reads': - synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'MUTREADSDENSITY_MB_ADJUSTED']] + synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']] + + # TODO implement these different modes if appropriate + # if mode == 'mutations': + # synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']] + # elif mode == 'mutated_reads': + # synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']] synonymous_mutdensities_genes.columns = ["GENE", "MUTDENSITY"] synonymous_mutdensities_genes.to_csv(f"{output_file}", diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index 0e07c75a..d8aac712 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -254,37 +254,10 @@ workflow DEEPCSA{ DEPTHSSYNONYMOUSCONS(annotated_depths, CREATEPANELS.out.synonymous_consensus_bed) } - if (run_mutdensity){ - // Mutation Density - MUTDENSITYALL(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.all_consensus_bed, ENRICHPANELS.out.all_consensus_expanded_panel.first()) - MUTDENSITYPROT(somatic_mutations, DEPTHSPROTCONS.out.subset, CREATEPANELS.out.prot_consensus_bed, ENRICHPANELS.out.prot_consensus_expanded_panel.first()) - MUTDENSITYNONPROT(somatic_mutations, DEPTHSNONPROTCONS.out.subset, CREATEPANELS.out.nonprot_consensus_bed, ENRICHPANELS.out.nonprot_consensus_expanded_panel.first()) - MUTDENSITYSYNONYMOUS(somatic_mutations, DEPTHSSYNONYMOUSCONS.out.subset, CREATEPANELS.out.synonymous_consensus_bed, ENRICHPANELS.out.synonymous_consensus_expanded_panel.first()) - - channel.of([ [ id: "all_samples" ] ]) - .join( MUTDENSITYSYNONYMOUS.out.mutdensities ) - .set{ all_samples_syn_mutdensity } - - SYNMUTDENSITY(all_samples_syn_mutdensity) - - SYNMUTREADSDENSITY(all_samples_syn_mutdensity) - - - // Concatenate all outputs into a single file - channel.empty() - .concat(MUTDENSITYALL.out.mutdensities.map{ it -> it[1]}.flatten()) - .concat(MUTDENSITYPROT.out.mutdensities.map{ it -> it[1]}.flatten()) - .concat(MUTDENSITYNONPROT.out.mutdensities.map{ it -> it[1]}.flatten()) - .concat(MUTDENSITYSYNONYMOUS.out.mutdensities.map{ it -> it[1]}.flatten()) - .set{ all_mutdensities } - all_mutdensities.collectFile(name: "all_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity", skip: 1, keepHeader: true).set{ all_mutdensities_file } - - } - - // Mutational profile - if ( params.profileall || run_mutabilities || params.omega ){ + if ( params.profileall || run_mutabilities || params.omega || run_mutdensity){ MUTPROFILEALL(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.all_consensus_bed, wgs_trinucs, TABLE2GROUP.out.json_allgroups) + if (run_mutdensity){ MUTDENSITYADJUSTED(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.exons_consensus_bed, CREATEPANELS.out.exons_consensus_panel, MUTPROFILEALL.out.profile, wgs_trinucs) @@ -296,6 +269,14 @@ workflow DEEPCSA{ MUTDENSITYADJUSTED.out.mutdensities_flat.map{ it -> it[1]}.flatten() .set{ all_adjusted_mutdensities_flat } all_adjusted_mutdensities_flat.collectFile(name: "all_adjusted_mutdensities_flat.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true) + + channel.of([ [ id: "all_samples" ] ]) + .join( MUTDENSITYADJUSTED.out.mutdensities ) + .set{ all_samples_adj_mutdensity } + + SYNMUTDENSITY(all_samples_adj_mutdensity) + + SYNMUTREADSDENSITY(all_samples_adj_mutdensity) } } if (params.profilenonprot){ @@ -310,6 +291,25 @@ workflow DEEPCSA{ } + if (run_mutdensity){ + // Mutation Density + MUTDENSITYALL(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.all_consensus_bed, ENRICHPANELS.out.all_consensus_expanded_panel.first()) + MUTDENSITYPROT(somatic_mutations, DEPTHSPROTCONS.out.subset, CREATEPANELS.out.prot_consensus_bed, ENRICHPANELS.out.prot_consensus_expanded_panel.first()) + MUTDENSITYNONPROT(somatic_mutations, DEPTHSNONPROTCONS.out.subset, CREATEPANELS.out.nonprot_consensus_bed, ENRICHPANELS.out.nonprot_consensus_expanded_panel.first()) + MUTDENSITYSYNONYMOUS(somatic_mutations, DEPTHSSYNONYMOUSCONS.out.subset, CREATEPANELS.out.synonymous_consensus_bed, ENRICHPANELS.out.synonymous_consensus_expanded_panel.first()) + + // Concatenate all outputs into a single file + channel.empty() + .concat(MUTDENSITYALL.out.mutdensities.map{ it -> it[1]}.flatten()) + .concat(MUTDENSITYPROT.out.mutdensities.map{ it -> it[1]}.flatten()) + .concat(MUTDENSITYNONPROT.out.mutdensities.map{ it -> it[1]}.flatten()) + .concat(MUTDENSITYSYNONYMOUS.out.mutdensities.map{ it -> it[1]}.flatten()) + .set{ all_mutdensities } + all_mutdensities.collectFile(name: "all_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity", skip: 1, keepHeader: true).set{ all_mutdensities_file } + + } + + if (run_mutabilities) { if (params.profileall){ MUTABILITYALL(somatic_mutations, From 53c27bed7eb18006d9fe870a349e4afab97ce980 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 3 Feb 2026 13:29:22 +0100 Subject: [PATCH 03/11] rename and reorganize mutdensity files --- bin/{compute_mutdensity.py => mut_density_simple.py} | 0 modules/local/mut_density/{ => adjusted}/main.nf | 4 ++-- .../local/{computemutdensity => mut_density/simple}/main.nf | 2 +- subworkflows/local/adjmutdensity/main.nf | 2 +- subworkflows/local/mutationdensity/main.nf | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) rename bin/{compute_mutdensity.py => mut_density_simple.py} (100%) rename modules/local/mut_density/{ => adjusted}/main.nf (95%) rename modules/local/{computemutdensity => mut_density/simple}/main.nf (97%) diff --git a/bin/compute_mutdensity.py b/bin/mut_density_simple.py similarity index 100% rename from bin/compute_mutdensity.py rename to bin/mut_density_simple.py diff --git a/modules/local/mut_density/main.nf b/modules/local/mut_density/adjusted/main.nf similarity index 95% rename from modules/local/mut_density/main.nf rename to modules/local/mut_density/adjusted/main.nf index 632415f8..6f6388fd 100644 --- a/modules/local/mut_density/main.nf +++ b/modules/local/mut_density/adjusted/main.nf @@ -8,7 +8,7 @@ process MUTATION_DENSITY { input: tuple val(meta), path(somatic_mutations_file), path(depths_file), path(mutability_file) tuple val(meta2), path(panel_file) - path(trinucleotide_counts_file) + path (trinucleotide_counts_file) output: @@ -20,7 +20,7 @@ process MUTATION_DENSITY { script: def sample_name = "${meta.id}" """ - mut_density.py \\ + mut_density_adjusted.py \\ --sample_name ${sample_name} \\ --depths_file ${depths_file} \\ --somatic_mutations_file ${somatic_mutations_file} \\ diff --git a/modules/local/computemutdensity/main.nf b/modules/local/mut_density/simple/main.nf similarity index 97% rename from modules/local/computemutdensity/main.nf rename to modules/local/mut_density/simple/main.nf index 9883c740..8c50db94 100644 --- a/modules/local/computemutdensity/main.nf +++ b/modules/local/mut_density/simple/main.nf @@ -16,7 +16,7 @@ process MUTATION_DENSITY { def sample_name = "${meta.id}" def panel_version = task.ext.panel_version ?: "${meta2.id}" """ - compute_mutdensity.py \\ + mut_density_simple.py \\ --maf_path ${mutations} \\ --depths_path ${depth} \\ --annot_panel_path ${consensus_panel} \\ diff --git a/subworkflows/local/adjmutdensity/main.nf b/subworkflows/local/adjmutdensity/main.nf index 25cfbe53..687e8a64 100644 --- a/subworkflows/local/adjmutdensity/main.nf +++ b/subworkflows/local/adjmutdensity/main.nf @@ -2,7 +2,7 @@ include { TABIX_BGZIPTABIX_QUERY as QUERYMUTATIONS } from '../../../ include { SUBSET_MAF as SUBSETMUTDENSITYADJUSTED } from '../../../modules/local/subsetmaf/main' -include { MUTATION_DENSITY as MUTDENSITYADJ } from '../../../modules/local/mut_density/main' +include { MUTATION_DENSITY as MUTDENSITYADJ } from '../../../modules/local/mut_density/adjusted/main' workflow MUTATION_DENSITY { diff --git a/subworkflows/local/mutationdensity/main.nf b/subworkflows/local/mutationdensity/main.nf index e9569ad9..87a85a2b 100644 --- a/subworkflows/local/mutationdensity/main.nf +++ b/subworkflows/local/mutationdensity/main.nf @@ -2,7 +2,7 @@ include { TABIX_BGZIPTABIX_QUERY as QUERYMUTATIONS } from '../../../modu include { SUBSET_MAF as SUBSETMUTDENSITY } from '../../../modules/local/subsetmaf/main' -include { MUTATION_DENSITY as MUTDENSITY } from '../../../modules/local/computemutdensity/main' +include { MUTATION_DENSITY as MUTDENSITY } from '../../../modules/local/mut_density/simple/main' workflow MUTATION_DENSITY{ From 5a4aaeaa662d014ffea7217609fbf5243cdf55d1 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 3 Feb 2026 13:31:24 +0100 Subject: [PATCH 04/11] update adjusted mut density with sample values - add drop duplicates with potential fix --- ...{mut_density.py => mut_density_adjusted.py} | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) rename bin/{mut_density.py => mut_density_adjusted.py} (90%) diff --git a/bin/mut_density.py b/bin/mut_density_adjusted.py similarity index 90% rename from bin/mut_density.py rename to bin/mut_density_adjusted.py index 308b2953..1e90733a 100755 --- a/bin/mut_density.py +++ b/bin/mut_density_adjusted.py @@ -61,14 +61,17 @@ def mutation_density(sample_name, depths_file, somatic_mutations_file, mutabilit for csqn, csqn_set in broadimpact_grouping_dict_with_synonymous.items(): - for gene in panel_df['GENE'].unique(): + for gene in list(panel_df['GENE'].unique()) + ["ALL_GENES"]: # compute vector of sum of depths per trinucleotide context # tailored to the specific gene-impact target - region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)].copy() + if gene == 'ALL_GENES': + region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set))][['CHROM', 'POS']].drop_duplicates() + else: + region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)][['CHROM', 'POS']].drop_duplicates() # counting every position once - dh = pd.merge(region_df[['CHROM', 'POS']], + dh = pd.merge(region_df, depths_df[['CHROM', 'POS', 'CONTEXT', sample_name]], on=['CHROM', 'POS'], how='left') depth_sum_df = dh.groupby(by='CONTEXT').agg({sample_name: 'sum'}).reset_index() @@ -88,10 +91,13 @@ def mutation_density(sample_name, depths_file, somatic_mutations_file, mutabilit except AssertionError: res.loc[gene, csqn] = None continue - - # observed somatic mutations - n = somatic_mutations_df[(somatic_mutations_df['IMPACT'].isin(csqn_set)) & (somatic_mutations_df['GENE'] == gene)].shape[0] + + # observed somatic mutations + if gene == 'ALL_GENES': + n = somatic_mutations_df[(somatic_mutations_df['IMPACT'].isin(csqn_set))].shape[0] + else: + n = somatic_mutations_df[(somatic_mutations_df['IMPACT'].isin(csqn_set)) & (somatic_mutations_df['GENE'] == gene)].shape[0] res.loc[gene, csqn] = n / effective_length From c04b90e9930704e30970e083d246902b31736e95 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 3 Feb 2026 13:53:24 +0100 Subject: [PATCH 05/11] add adjusted mut density variability plots - not tested --- bin/mut_density_adjusted.py | 8 +++--- bin/plot_explore_variability.py | 28 +++++++++---------- .../plot/interindividual_variability/main.nf | 8 ++++-- subworkflows/local/plottingsummary/main.nf | 3 +- workflows/deepcsa.nf | 4 ++- 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/bin/mut_density_adjusted.py b/bin/mut_density_adjusted.py index 1e90733a..762a5bd2 100755 --- a/bin/mut_density_adjusted.py +++ b/bin/mut_density_adjusted.py @@ -155,12 +155,12 @@ def main(sample_name, depths_file, somatic_mutations_file, mutability_file, pane logfoldchange_plot(sample_name, res, res_flat) # save results - res["SAMPLE"] = sample_name - res_flat["SAMPLE"] = sample_name + res["SAMPLE_ID"] = sample_name + res_flat["SAMPLE_ID"] = sample_name res.index.name = 'GENE' res_flat.index.name = 'GENE' - res[['SAMPLE'] + [col for col in res.columns if col != 'SAMPLE']].to_csv(f'{sample_name}.mutdensities.tsv', sep='\t') - res_flat[['SAMPLE'] + [col for col in res_flat.columns if col != 'SAMPLE']].to_csv(f'{sample_name}.mutdensities_flat.tsv', sep='\t') + res[['SAMPLE_ID'] + [col for col in res.columns if col != 'SAMPLE_ID']].to_csv(f'{sample_name}.mutdensities.tsv', sep='\t') + res_flat[['SAMPLE_ID'] + [col for col in res_flat.columns if col != 'SAMPLE_ID']].to_csv(f'{sample_name}.mutdensities_flat.tsv', sep='\t') diff --git a/bin/plot_explore_variability.py b/bin/plot_explore_variability.py index e089490c..f55ceb0a 100755 --- a/bin/plot_explore_variability.py +++ b/bin/plot_explore_variability.py @@ -84,15 +84,13 @@ def mut_density_heatmaps(data, genes_list, samples_list, outdir, prefix = '', def adj_mut_density_heatmaps(data, genes_list, samples_list, outdir, prefix = '', config_datasets = { - "all" : ({"MUTTYPES": 'all_types', "REGIONS": 'all'}, 'MUTDENSITY_MB'), - "all protein-affecting" : ({"MUTTYPES": 'all_types', "REGIONS": 'protein_affecting'}, 'MUTDENSITY_MB'), - "all non-protein-affecting" : ({"MUTTYPES": 'all_types', "REGIONS": 'non_protein_affecting'}, 'MUTDENSITY_MB'), - "SNVs" : ({"MUTTYPES": 'SNV', "REGIONS": 'all'}, 'MUTDENSITY_MB'), - "SNVs protein-affecting" : ({"MUTTYPES": 'SNV', "REGIONS": 'protein_affecting'}, 'MUTDENSITY_MB'), - "SNVs non-protein-affecting" : ({"MUTTYPES": 'SNV', "REGIONS": 'non_protein_affecting'}, 'MUTDENSITY_MB'), - "INDELs" : ({"MUTTYPES": 'DELETION-INSERTION', "REGIONS": 'all'}, 'MUTDENSITY_MB'), - "INDELs protein-affecting" : ({"MUTTYPES": 'DELETION-INSERTION', "REGIONS": 'protein_affecting'}, 'MUTDENSITY_MB'), - "INDELs non-protein-affecting" : ({"MUTTYPES": 'DELETION-INSERTION', "REGIONS": 'non_protein_affecting'}, 'MUTDENSITY_MB') + "synonymous" : "synonymous", + "missense" : "missense", + "nonsense" : "nonsense", + "essential_splice" : "essential_splice", + "truncating" : "truncating", + "nonsynonymous_splice" : "nonsynonymous_splice", + "all_impacts" : "all_impacts", } ): """ @@ -105,13 +103,13 @@ def adj_mut_density_heatmaps(data, genes_list, samples_list, outdir, prefix = '' print("No data available for the selected samples/groups") return - pdf_filename = f"{outdir}/{prefix}mut_density_heatmaps.pdf" + pdf_filename = f"{outdir}/{prefix}_adjusted_mut_density_heatmaps.pdf" with PdfPages(pdf_filename) as pdf: - for title, (config, value) in config_datasets.items(): - print("Creating heatmap for:", title, config, value) - filtered_data = filter_data_from_config(data, config) - # print(filtered_data[['GENE', 'SAMPLE_ID', value]].head()) + for title, value in config_datasets.items(): + print("Creating heatmap for:", title) + filtered_data = data[["GENE", "SAMPLE_ID", value]] + # Create a pivot table for the heatmap heatmap_data = filtered_data.pivot_table(index='GENE', columns='SAMPLE_ID', values=value) heatmap_data = heatmap_data.reindex(index=genes_list, columns=samples_list) @@ -208,7 +206,7 @@ def main(outdir, panel_regions, samples_json, all_groups_json, mutdensities, adj plotting_manager(outdir, genes_list, samples_list, "samples.", data_string, data_objects) except Exception as e: print("Error in the process", e) - + try: plotting_manager(outdir, genes_list, groups_names, "groups.", data_string, data_objects) except Exception as e: diff --git a/modules/local/plot/interindividual_variability/main.nf b/modules/local/plot/interindividual_variability/main.nf index 6a3c1a34..daca8584 100644 --- a/modules/local/plot/interindividual_variability/main.nf +++ b/modules/local/plot/interindividual_variability/main.nf @@ -6,10 +6,11 @@ process PLOT_INTERINDIVIDUAL_VARIABILITY { container "docker.io/bbglab/deepcsa-core:0.0.2-alpha" input: - path(samples_json) - path(all_groups_json) + path (samples_json) + path (all_groups_json) tuple val(meta), path(panel_file) - path(mutdensities_file) + path (mutdensities_file) + path (adjusted_mutdensities_file) output: path("**.pdf") , emit: plots @@ -23,6 +24,7 @@ process PLOT_INTERINDIVIDUAL_VARIABILITY { mkdir ${prefix}.variability_plots plot_explore_variability.py \\ --mutdensities ${mutdensities_file} \\ + --adjusted-mutdensities ${adjusted_mutdensities_file} \\ --panel-regions ${panel_file} \\ --outdir ${prefix}.variability_plots \\ --samples-json ${samples_json} \\ diff --git a/subworkflows/local/plottingsummary/main.nf b/subworkflows/local/plottingsummary/main.nf index afb4d9dc..70c3f3be 100644 --- a/subworkflows/local/plottingsummary/main.nf +++ b/subworkflows/local/plottingsummary/main.nf @@ -14,6 +14,7 @@ workflow PLOTTING_SUMMARY { positive_selection_results_ready all_mutations all_mutdensities + all_mutdensities_adjusted site_comparison all_samples_depth samples @@ -80,7 +81,7 @@ workflow PLOTTING_SUMMARY { // ? plot saturation kinetics curves - PLOTINTERINDIVIDUALVARIABILITY(samples, all_groups, panel, all_mutdensities) + PLOTINTERINDIVIDUALVARIABILITY(samples, all_groups, panel, all_mutdensities, all_mutdensities_adjusted) // heatmaps: // mutations per gene/sample (total, SNV only, INDEL only, per consequence type) // driver mutations per gene/sample diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index d8aac712..e45f4f9b 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -148,6 +148,7 @@ workflow DEEPCSA{ all_compiled_omegas = channel.empty() all_compiled_omegasgloballoc = channel.empty() all_mutdensities_file = channel.empty() + compiled_adjusted_mutdensities = channel.empty() // if the user wants to use custom gene groups, import the gene groups table @@ -264,7 +265,7 @@ workflow DEEPCSA{ // Concatenate all outputs into a single file MUTDENSITYADJUSTED.out.mutdensities.map{ it -> it[1]}.flatten() .set{ all_adjusted_mutdensities } - all_adjusted_mutdensities.collectFile(name: "all_adjusted_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true) + all_adjusted_mutdensities.collectFile(name: "all_adjusted_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true).set{ compiled_adjusted_mutdensities } MUTDENSITYADJUSTED.out.mutdensities_flat.map{ it -> it[1]}.flatten() .set{ all_adjusted_mutdensities_flat } @@ -576,6 +577,7 @@ workflow DEEPCSA{ PLOTTINGSUMMARY(positive_selection_results_ready, somatic_mutations, all_mutdensities_file.first(), + compiled_adjusted_mutdensities.first(), site_comparison_results, ANNOTATEDEPTHS.out.all_samples_depths.first(), From dfab72d59a6f8272d7845c4fca5862c69fa115fc Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Fri, 13 Feb 2026 12:41:52 +0100 Subject: [PATCH 06/11] fix bug after upstream change --- bin/omega_select_mutdensity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/omega_select_mutdensity.py b/bin/omega_select_mutdensity.py index fd299d6a..2178cd3f 100755 --- a/bin/omega_select_mutdensity.py +++ b/bin/omega_select_mutdensity.py @@ -17,7 +17,7 @@ def select_syn_mutdensity(mutdensity_file, output_file, mode): mutdensity_df = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values) - synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples') & + synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE_ID"] == 'all_samples') & ~(mutdensity_df["GENE"].str.contains("--")) ]["synonymous"].reset_index(drop = True) From 61d4532d038bf83f9a74512bdc675a47e935978f Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Fri, 13 Feb 2026 15:44:51 +0100 Subject: [PATCH 07/11] fix error in computation of adjusted mutation density --- bin/mut_density_adjusted.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bin/mut_density_adjusted.py b/bin/mut_density_adjusted.py index 762a5bd2..ebf684f9 100755 --- a/bin/mut_density_adjusted.py +++ b/bin/mut_density_adjusted.py @@ -31,6 +31,8 @@ def get_correction_factor(sample_name, trinucleotide_counts_df, mutability_df, f triplet_counts = np.array(l) # genome length in Mb + # accounting for the fact that each position contributes: + # 3*depth because of the 3 mutations available at each position genome_length = sum(triplet_counts) / (3 * 1e6) # vector of relative mutabilities in 96-channel canonical sorting @@ -66,12 +68,13 @@ def mutation_density(sample_name, depths_file, somatic_mutations_file, mutabilit # compute vector of sum of depths per trinucleotide context # tailored to the specific gene-impact target if gene == 'ALL_GENES': - region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set))][['CHROM', 'POS']].drop_duplicates() + region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set))][['CHROM', 'POS', 'REF', 'ALT']].drop_duplicates() else: - region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)][['CHROM', 'POS']].drop_duplicates() + region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)][['CHROM', 'POS', 'REF', 'ALT']].drop_duplicates() - # counting every position once - dh = pd.merge(region_df, + # counting every position as many times as the number of possible + # mutations of the selected consequences at that position (1,2 or 3) + dh = pd.merge(region_df[['CHROM', 'POS']], depths_df[['CHROM', 'POS', 'CONTEXT', sample_name]], on=['CHROM', 'POS'], how='left') depth_sum_df = dh.groupby(by='CONTEXT').agg({sample_name: 'sum'}).reset_index() From db38bf0437ad4fb6c7f5f89a5af440ccf00510f5 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Tue, 17 Feb 2026 09:43:10 +0000 Subject: [PATCH 08/11] fix bug in synonymous mut. density selection --- bin/omega_select_mutdensity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/omega_select_mutdensity.py b/bin/omega_select_mutdensity.py index 2178cd3f..0af801dd 100755 --- a/bin/omega_select_mutdensity.py +++ b/bin/omega_select_mutdensity.py @@ -19,9 +19,9 @@ def select_syn_mutdensity(mutdensity_file, output_file, mode): synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE_ID"] == 'all_samples') & ~(mutdensity_df["GENE"].str.contains("--")) - ]["synonymous"].reset_index(drop = True) + ].reset_index(drop = True) - synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']] + synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']].copy() # TODO implement these different modes if appropriate # if mode == 'mutations': From c80749480ae776b8a69127b0457ba8e077eba601 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Thu, 19 Feb 2026 09:15:27 +0000 Subject: [PATCH 09/11] add dndsproxy computation --- bin/mut_density_adjusted_dnds.py | 74 ++++++++++++++++++++++++++++++++ conf/results_outputs.config | 8 ++++ modules/local/dnds_proxy/main.nf | 45 +++++++++++++++++++ workflows/deepcsa.nf | 3 ++ 4 files changed, 130 insertions(+) create mode 100755 bin/mut_density_adjusted_dnds.py create mode 100644 modules/local/dnds_proxy/main.nf diff --git a/bin/mut_density_adjusted_dnds.py b/bin/mut_density_adjusted_dnds.py new file mode 100755 index 00000000..68cf59b9 --- /dev/null +++ b/bin/mut_density_adjusted_dnds.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + + +import click +import pandas as pd +from read_utils import custom_na_values + + +def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_file, mode): + """ + TODO: explain what this function does + TODO 2: store a log file that is also outputted and can be used to check some basic statistics + + right now the use of mode is not implemented, + since we only compute one type of synonymous mutation densities. + """ + + mutdensity_df_init = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values) + all_possible_genes = list(mutdensity_df_init["GENE"].unique()) + + cohort_syn_mutdensity_df = pd.read_csv(cohort_syn_mutdensities_file, sep = "\t", header = 0, na_values = custom_na_values) + cohort_syn_mutdensity_df.columns = ['GENE', 'cohort_synonymous'] + cohort_syn_mutdensity_df = cohort_syn_mutdensity_df.set_index("GENE") + + init_cohort_syn_df = pd.DataFrame(index = all_possible_genes) + cohort_syn_df = pd.concat((init_cohort_syn_df, cohort_syn_mutdensity_df), axis = 0) + + # filling the null mutation densities with the value of the 1st decile + cohort_syn_df = cohort_syn_df.fillna(cohort_syn_df[~(cohort_syn_df.isna())].quantile(.1)) + + mutdensity_df = mutdensity_df_init.merge(cohort_syn_df, on = "GENE") + for impact in ["missense", "truncating", "nonsynonymous_splice"]: + mutdensity_df[f"d_{impact}/d_synonymous"] = mutdensity_df[impact] / mutdensity_df["synonymous"] + mutdensity_df[f"d_{impact}/d_cohort_synonymous"] = mutdensity_df[impact] / mutdensity_df["cohort_synonymous"] + + # summary at all_samples level + subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples')] + for impact in ["missense", "truncating"]: + print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[ + ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"] + ].head(10)) + + + # # summary at sample-level + # subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] != 'all_samples')] + # for impact in ["missense", "truncating"]: + # print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[ + # ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"] + # ].head(10)) + + # TODO implement these different modes if appropriate + # if mode == 'mutations': + # synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']] + # elif mode == 'mutated_reads': + # synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']] + + mutdensity_df.to_csv(f"{output_file}", + header=True, + index=False, + sep="\t") + + +@click.command() +@click.option('--mutdensities', type=click.Path(exists=True), help='Input mutation density file') +@click.option('--cohort-syn-mutdensities', type=click.Path(exists=True), help='Input cohort synonymous mutation densities') +@click.option('--output', type=click.Path(), help='Output file') +@click.option('--mode', type=click.Choice(['mutations', 'mutated_reads']), default='mutations') +def main(mutdensities, cohort_syn_mutdensities, output, mode): + click.echo("Selecting the gene synonymous mutation densities...") + compute_dnds_proxy(mutdensities, cohort_syn_mutdensities, output, mode) + +if __name__ == '__main__': + main() + diff --git a/conf/results_outputs.config b/conf/results_outputs.config index 5dc2d67a..8980385a 100644 --- a/conf/results_outputs.config +++ b/conf/results_outputs.config @@ -205,6 +205,14 @@ process { pattern: '**{tsv,per_sample,sigprofiler}', ] } + + withName: DNDSPROXY { + publishDir = [ + path: { "${params.outdir}/selection/dndsproxy" }, + mode: params.publish_dir_mode, + pattern: '**{tsv,log}', + ] + } withName: COMPUTETRINUC { publishDir = [ diff --git a/modules/local/dnds_proxy/main.nf b/modules/local/dnds_proxy/main.nf new file mode 100644 index 00000000..af55fbb0 --- /dev/null +++ b/modules/local/dnds_proxy/main.nf @@ -0,0 +1,45 @@ +process DNDS_PROXY { + tag "$meta.id" + label 'process_single' + + label 'deepcsa_core' + + input: + path(all_mutation_densities) + tuple val(meta), path(cohort_synonymous_mutdensities) + + output: + tuple val(meta), path("*.gene_mutdensities.tsv") , emit: mutdensity + path "versions.yml" , topic: versions + + + + script: + def prefix = task.ext.prefix ?: "" + prefix = "${meta.id}${prefix}" + def mode = task.ext.mode ?: "mutations" + """ + mut_density_adjusted_dnds.py \\ + --mutdensities ${all_mutation_densities} \\ + --cohort-syn-mutdensities ${cohort_synonymous_mutdensities} \\ + --output ${prefix}.gene_mutdensities_n_dnds.tsv \\ + --mode ${mode}; + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "all_samples" + """ + touch ${prefix}.gene_mutdensities.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + +} diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf index a32d8907..b1bb0f4c 100644 --- a/workflows/deepcsa.nf +++ b/workflows/deepcsa.nf @@ -103,6 +103,7 @@ include { DOWNSAMPLE_DEPTHS as DOWNSAMPLEDEPTHS } from '../m include { SELECT_MUTDENSITIES as SYNMUTDENSITY } from '../modules/local/select_mutdensity/main' include { SELECT_MUTDENSITIES as SYNMUTREADSDENSITY } from '../modules/local/select_mutdensity/main' +include { DNDS_PROXY as DNDSPROXY } from '../modules/local/dnds_proxy/main' include { DNA_2_PROTEIN_MAPPING as DNA2PROTEINMAPPING } from '../modules/local/dna2protein/main' @@ -280,6 +281,8 @@ workflow DEEPCSA{ SYNMUTDENSITY(all_samples_adj_mutdensity) SYNMUTREADSDENSITY(all_samples_adj_mutdensity) + + DNDSPROXY(compiled_adjusted_mutdensities, SYNMUTDENSITY.out.mutdensity.first()) } } if (params.profilenonprot){ From abee52b4e8481bf928817992f0e5f5cfe2e481f0 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Thu, 19 Feb 2026 12:01:17 +0100 Subject: [PATCH 10/11] fix dnds proxy computation --- bin/mut_density_adjusted_dnds.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bin/mut_density_adjusted_dnds.py b/bin/mut_density_adjusted_dnds.py index 68cf59b9..e2633e8d 100755 --- a/bin/mut_density_adjusted_dnds.py +++ b/bin/mut_density_adjusted_dnds.py @@ -23,10 +23,11 @@ def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_fil cohort_syn_mutdensity_df = cohort_syn_mutdensity_df.set_index("GENE") init_cohort_syn_df = pd.DataFrame(index = all_possible_genes) - cohort_syn_df = pd.concat((init_cohort_syn_df, cohort_syn_mutdensity_df), axis = 0) + cohort_syn_df = pd.concat((init_cohort_syn_df, cohort_syn_mutdensity_df), axis = 1) # filling the null mutation densities with the value of the 1st decile - cohort_syn_df = cohort_syn_df.fillna(cohort_syn_df[~(cohort_syn_df.isna())].quantile(.1)) + cohort_syn_df = cohort_syn_df.fillna(cohort_syn_df[~(cohort_syn_df.isna())].quantile(.1)).reset_index() + cohort_syn_df.columns = ['GENE', 'cohort_synonymous'] mutdensity_df = mutdensity_df_init.merge(cohort_syn_df, on = "GENE") for impact in ["missense", "truncating", "nonsynonymous_splice"]: @@ -34,18 +35,18 @@ def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_fil mutdensity_df[f"d_{impact}/d_cohort_synonymous"] = mutdensity_df[impact] / mutdensity_df["cohort_synonymous"] # summary at all_samples level - subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples')] + subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE_ID"] == 'all_samples')] for impact in ["missense", "truncating"]: print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[ - ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"] + ["GENE", "SAMPLE_ID", impact, "synonymous", f"d_{impact}/d_synonymous"] ].head(10)) # # summary at sample-level - # subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] != 'all_samples')] + # subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE_ID"] != 'all_samples')] # for impact in ["missense", "truncating"]: # print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[ - # ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"] + # ["GENE", "SAMPLE_ID", impact, "synonymous", f"d_{impact}/d_synonymous"] # ].head(10)) # TODO implement these different modes if appropriate From 6bf72217b6964e6e046eb6f3a72a307be068b444 Mon Sep 17 00:00:00 2001 From: FerriolCalvet Date: Thu, 19 Feb 2026 12:51:50 +0100 Subject: [PATCH 11/11] update 0 handling and output --- bin/mut_density_adjusted_dnds.py | 2 ++ modules/local/dnds_proxy/main.nf | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/mut_density_adjusted_dnds.py b/bin/mut_density_adjusted_dnds.py index e2633e8d..604e0997 100755 --- a/bin/mut_density_adjusted_dnds.py +++ b/bin/mut_density_adjusted_dnds.py @@ -3,6 +3,7 @@ import click import pandas as pd +import numpy as np from read_utils import custom_na_values @@ -16,6 +17,7 @@ def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_fil """ mutdensity_df_init = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values) + mutdensity_df_init["synonymous"] = mutdensity_df_init["synonymous"].replace(0, np.nan) all_possible_genes = list(mutdensity_df_init["GENE"].unique()) cohort_syn_mutdensity_df = pd.read_csv(cohort_syn_mutdensities_file, sep = "\t", header = 0, na_values = custom_na_values) diff --git a/modules/local/dnds_proxy/main.nf b/modules/local/dnds_proxy/main.nf index af55fbb0..f0120a70 100644 --- a/modules/local/dnds_proxy/main.nf +++ b/modules/local/dnds_proxy/main.nf @@ -9,8 +9,8 @@ process DNDS_PROXY { tuple val(meta), path(cohort_synonymous_mutdensities) output: - tuple val(meta), path("*.gene_mutdensities.tsv") , emit: mutdensity - path "versions.yml" , topic: versions + tuple val(meta), path("*.gene_mutdensities_n_dnds.tsv") , emit: mutdensity_with_dnds + path "versions.yml" , topic: versions