From 74f50a628b2094826397bfbd160a12ebe1f15681 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Sat, 31 Jan 2026 11:17:26 +0100
Subject: [PATCH 01/11] remove computation of simple mutdensity_adjusted

- omega not working
---
 bin/compute_mutdensity.py | 43 ++++++++++++++-------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/bin/compute_mutdensity.py b/bin/compute_mutdensity.py
index 880138e7..ab77258e 100755
--- a/bin/compute_mutdensity.py
+++ b/bin/compute_mutdensity.py
@@ -19,7 +19,7 @@
 
 MUTDENSITY_IMPACT_GROUPS = [False, ["SNV"] , ["INSERTION", "DELETION"], ["SNV", "INSERTION", "DELETION"]]
 
-def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name):
+def mutdensity_sample(maf_df, depths_df, sample_name):
     """
     Computes a sample's global mutation density. Returns the mutation density
     per Mb, non-adjusted and adjusted by panel
@@ -29,8 +29,7 @@ def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name):
     impact_group_results = list()
 
     # mutation density depth information
-    sample_features_depth = {"DEPTH" : depths_df.drop_duplicates(subset = ["CHROM", "POS"])[f"{sample_name}"].sum(),
-                                "DEPTH_ADJUSTED": depths_adj_df[f"{sample_name}"].sum()
+    sample_features_depth = {"DEPTH" : depths_df.drop_duplicates(subset = ["CHROM", "POS"])[f"{sample_name}"].sum()
                                 }
 
     for type_list in MUTDENSITY_IMPACT_GROUPS:
@@ -55,9 +54,7 @@ def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name):
         sample_features["N_MUTATED"] = n_mutated_reads
 
         sample_features["MUTDENSITY_MB"] = ( sample_features["N_MUTS"] / sample_features["DEPTH"] * 1000000 ).astype(float)
-        sample_features["MUTDENSITY_MB_ADJUSTED"] = ( sample_features["N_MUTS"] / sample_features["DEPTH_ADJUSTED"] * 1000000 ).astype(float)
         sample_features["MUTREADSDENSITY_MB"] = ( sample_features["N_MUTATED"] / sample_features["DEPTH"] * 1000000 ).astype(float)
-        sample_features["MUTREADSDENSITY_MB_ADJUSTED"] = ( sample_features["N_MUTATED"] / sample_features["DEPTH_ADJUSTED"] * 1000000 ).astype(float)
 
         sample_features["GENE"] = "ALL_GENES"
         sample_features["MUTTYPES"] = types_included
@@ -70,7 +67,7 @@ def mutdensity_sample(maf_df, depths_df, depths_adj_df, sample_name):
     return mutdensity_sample
 
 
-def mutdensity_gene(maf_df, depths_df, depths_adj_df, sample_name):
+def mutdensity_gene(maf_df, depths_df, sample_name):
     """
     Computes each gene mutation density. Returns the mutation density
     both per Mb and Kb sequenced, both non-adjusted and adjusted by panel
@@ -101,21 +98,16 @@ def mutdensity_gene(maf_df, depths_df, depths_adj_df, sample_name):
 
         depths_gene_df = depths_df.groupby("GENE").agg({f"{sample_name}" : "sum" })
         depths_gene_df.columns = ["DEPTH"]
-        depths_adj_gene_df = depths_adj_df.groupby("GENE").agg({f"{sample_name}" : "sum" })
-        depths_adj_gene_df.columns = ["DEPTH_ADJUSTED"]
 
         mut_rate_mut_reads_df = n_muts_gene.merge(n_mutated_reads, on = "GENE")
-        depths_depthsadj_gene_df = depths_gene_df.merge(depths_adj_gene_df, on = "GENE")
+    
         ## merge so that mutation density is computed although the number of mutations is NA (meaning, zero)
-        mut_depths_df = depths_depthsadj_gene_df.merge(mut_rate_mut_reads_df, on = "GENE", how = 'left')
-        mut_depths_df = mut_depths_df.fillna(0) # I think this is not needed
+        mut_depths_df = depths_gene_df.merge(mut_rate_mut_reads_df, on = "GENE", how = 'left')
+        mut_depths_df = mut_depths_df.fillna(0)
 
         # mutation density metrics
         mut_depths_df["MUTDENSITY_MB"] = (mut_depths_df["N_MUTS"] / mut_depths_df["DEPTH"] * 1000000).astype(float)
-        mut_depths_df["MUTDENSITY_MB_ADJUSTED"] = (mut_depths_df["N_MUTS"] / mut_depths_df["DEPTH_ADJUSTED"] * 1000000).astype(float)
-
         mut_depths_df["MUTREADSDENSITY_MB"] = (mut_depths_df["N_MUTATED"] / mut_depths_df["DEPTH"] * 1000000).astype(float)
-        mut_depths_df["MUTREADSDENSITY_MB_ADJUSTED"] = (mut_depths_df["N_MUTATED"] / mut_depths_df["DEPTH_ADJUSTED"] * 1000000).astype(float)
 
         mut_depths_df["MUTTYPES"] = types_included
         impact_group_results.append(mut_depths_df.reset_index())
@@ -137,25 +129,21 @@ def load_n_process_inputs(maf_path, depths_path, annot_panel_path, sample_name):
     ## mode 1: each position counts one (once per gene, be careful that it might be duplicated in different genes)
     depths_subset_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]].drop_duplicates(),
                                         on = ["CHROM", "POS"], how = "inner")
-    ## mode 2 (adjusted): each position counts as many times it contributes to the panel
-    depths_df[sample_name] = depths_df[sample_name] / 3   # the depth per position can contribute to three different mutations
-    depths_subset_adj_df = depths_df.merge(annot_panel_df[["CHROM", "POS", "GENE"]], on = ["CHROM", "POS"], how = "inner")
-
-    ## mode 3 (adjusted): each position counts as many times it contributes to the panel, but ONLY ONCE PER SAMPLE
-    depths_subset_adj_sample_df = depths_df.merge(annot_panel_df.drop_duplicates(subset = ["CHROM", "POS", "REF", "ALT"])[["CHROM", "POS"]],
-                                                    on = ["CHROM", "POS"], how = "inner")
 
     # Add domains and exons to maf_df
     annot_panel_df['CHROM_POS'] = annot_panel_df['CHROM'].astype(str) + ':' + annot_panel_df['POS'].astype(str)
     maf_df_raw['CHROM_POS'] = maf_df_raw['MUT_ID'].str.split('_', expand = True)[0]
 
-    maf_df = maf_df_raw.merge(annot_panel_df[['CHROM_POS', 'GENE']], on = ['CHROM_POS'], how = 'left', suffixes=['','_subgenic']).reset_index(drop=True)
+    maf_df = maf_df_raw.merge(annot_panel_df[['CHROM_POS', 'GENE']],
+                              on = ['CHROM_POS'], how = 'left',
+                              suffixes=['','_subgenic']).reset_index(drop=True)
+    
     maf_df = maf_df.drop(columns = ['GENE', 'CHROM_POS'])
     maf_df = maf_df.rename(columns={ 'GENE_subgenic' : 'GENE'})
 
     maf_df = maf_df.drop_duplicates()
 
-    return maf_df, depths_subset_df, depths_subset_adj_df, depths_subset_adj_sample_df
+    return maf_df, depths_subset_df
 
 
 # -- Main function -- #
@@ -166,14 +154,14 @@ def compute_mutdensity(maf_path, depths_path, annot_panel_path, sample_name, pan
     the panel composition. It saves the results to a TSV file.
     """
 
-    maf_df, depths_subset_df, depths_subset_adj_df, depths_subset_adj_sample_df = load_n_process_inputs(maf_path, depths_path, annot_panel_path, sample_name)
+    maf_df, depths_subset_df = load_n_process_inputs(maf_path, depths_path, annot_panel_path, sample_name)
 
     # Compute mutation densities
     ## sample mutation density
-    mutdensity_sample_df = mutdensity_sample(maf_df, depths_subset_df, depths_subset_adj_sample_df, sample_name)
+    mutdensity_sample_df = mutdensity_sample(maf_df, depths_subset_df, sample_name)
 
     ## per gene mutation density
-    mutdensity_genes_df = mutdensity_gene(maf_df, depths_subset_df, depths_subset_adj_df, sample_name)
+    mutdensity_genes_df = mutdensity_gene(maf_df, depths_subset_df, sample_name)
 
     mutdensity_df = pd.concat([mutdensity_sample_df, mutdensity_genes_df])
 
@@ -184,8 +172,7 @@ def compute_mutdensity(maf_path, depths_path, annot_panel_path, sample_name, pan
     mutdensity_df[["SAMPLE_ID", "GENE", "REGIONS", "MUTTYPES",
                 "DEPTH",
                 "N_MUTS", "N_MUTATED",
-                "MUTDENSITY_MB", "MUTDENSITY_MB_ADJUSTED",
-                "MUTREADSDENSITY_MB", "MUTREADSDENSITY_MB_ADJUSTED",
+                "MUTDENSITY_MB", "MUTREADSDENSITY_MB"
                 ]].to_csv(f"{sample_name}.{panel_v}.mutdensities.tsv",
                                                             sep = "\t",
                                                             header = True,

From e7ff64cf2553466870357fa486d0740f3688bfa2 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Sat, 31 Jan 2026 11:29:05 +0100
Subject: [PATCH 02/11] provide adjusted mutation density to omega

- not tested, but should work
- NOT CORRECT
---
 bin/omega_select_mutdensity.py | 23 +++++++++-----
 workflows/deepcsa.nf           | 58 +++++++++++++++++-----------------
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/bin/omega_select_mutdensity.py b/bin/omega_select_mutdensity.py
index 912afbbf..fd299d6a 100755
--- a/bin/omega_select_mutdensity.py
+++ b/bin/omega_select_mutdensity.py
@@ -8,19 +8,26 @@
 
 def select_syn_mutdensity(mutdensity_file, output_file, mode):
     """
-    INFO
+    This function selects the synonymous mutation densities for all genes
+    from the mutation density file of all samples.
+    
+    right now the use of mode is not implemented,
+    since we only compute one type of synonymous mutation densities.
     """
 
     mutdensity_df = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values)
 
-    synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["MUTTYPES"] == "SNV") &
-                                                        (mutdensity_df["GENE"] != "ALL_GENES") &
-                                                        ~(mutdensity_df["GENE"].str.contains("--"))].reset_index(drop = True)
+    synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples') &
+                                                        ~(mutdensity_df["GENE"].str.contains("--"))
+                                                        ]["synonymous"].reset_index(drop = True)
 
-    if mode == 'mutations':
-        synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'MUTDENSITY_MB_ADJUSTED']]
-    elif mode == 'mutated_reads':
-        synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'MUTREADSDENSITY_MB_ADJUSTED']]
+    synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']]
+
+    # TODO implement these different modes if appropriate
+    # if mode == 'mutations':
+    #     synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']]
+    # elif mode == 'mutated_reads':
+    #     synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']]
 
     synonymous_mutdensities_genes.columns = ["GENE", "MUTDENSITY"]
     synonymous_mutdensities_genes.to_csv(f"{output_file}",
diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf
index 0e07c75a..d8aac712 100644
--- a/workflows/deepcsa.nf
+++ b/workflows/deepcsa.nf
@@ -254,37 +254,10 @@ workflow DEEPCSA{
         DEPTHSSYNONYMOUSCONS(annotated_depths, CREATEPANELS.out.synonymous_consensus_bed)
     }
 
-    if (run_mutdensity){
-        // Mutation Density
-        MUTDENSITYALL(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.all_consensus_bed, ENRICHPANELS.out.all_consensus_expanded_panel.first())
-        MUTDENSITYPROT(somatic_mutations, DEPTHSPROTCONS.out.subset, CREATEPANELS.out.prot_consensus_bed, ENRICHPANELS.out.prot_consensus_expanded_panel.first())
-        MUTDENSITYNONPROT(somatic_mutations, DEPTHSNONPROTCONS.out.subset, CREATEPANELS.out.nonprot_consensus_bed, ENRICHPANELS.out.nonprot_consensus_expanded_panel.first())
-        MUTDENSITYSYNONYMOUS(somatic_mutations, DEPTHSSYNONYMOUSCONS.out.subset, CREATEPANELS.out.synonymous_consensus_bed, ENRICHPANELS.out.synonymous_consensus_expanded_panel.first())
-
-        channel.of([ [ id: "all_samples" ] ])
-        .join( MUTDENSITYSYNONYMOUS.out.mutdensities )
-        .set{ all_samples_syn_mutdensity }
-
-        SYNMUTDENSITY(all_samples_syn_mutdensity)
-
-        SYNMUTREADSDENSITY(all_samples_syn_mutdensity)
-
-
-        // Concatenate all outputs into a single file
-        channel.empty()
-        .concat(MUTDENSITYALL.out.mutdensities.map{ it -> it[1]}.flatten())
-        .concat(MUTDENSITYPROT.out.mutdensities.map{ it -> it[1]}.flatten())
-        .concat(MUTDENSITYNONPROT.out.mutdensities.map{ it -> it[1]}.flatten())
-        .concat(MUTDENSITYSYNONYMOUS.out.mutdensities.map{ it -> it[1]}.flatten())
-        .set{ all_mutdensities }
-        all_mutdensities.collectFile(name: "all_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity", skip: 1, keepHeader: true).set{ all_mutdensities_file }
-
-    }
-
-
     // Mutational profile
-    if ( params.profileall || run_mutabilities || params.omega ){
+    if ( params.profileall || run_mutabilities || params.omega || run_mutdensity){
         MUTPROFILEALL(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.all_consensus_bed, wgs_trinucs, TABLE2GROUP.out.json_allgroups)
+
         if (run_mutdensity){
             MUTDENSITYADJUSTED(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.exons_consensus_bed, CREATEPANELS.out.exons_consensus_panel, MUTPROFILEALL.out.profile, wgs_trinucs)
 
@@ -296,6 +269,14 @@ workflow DEEPCSA{
             MUTDENSITYADJUSTED.out.mutdensities_flat.map{ it -> it[1]}.flatten()
             .set{ all_adjusted_mutdensities_flat }
             all_adjusted_mutdensities_flat.collectFile(name: "all_adjusted_mutdensities_flat.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true)
+
+            channel.of([ [ id: "all_samples" ] ])
+            .join( MUTDENSITYADJUSTED.out.mutdensities )
+            .set{ all_samples_adj_mutdensity }
+
+            SYNMUTDENSITY(all_samples_adj_mutdensity)
+
+            SYNMUTREADSDENSITY(all_samples_adj_mutdensity)
         }
     }
     if (params.profilenonprot){
@@ -310,6 +291,25 @@ workflow DEEPCSA{
     }
 
 
+    if (run_mutdensity){
+        // Mutation Density
+        MUTDENSITYALL(somatic_mutations, DEPTHSALLCONS.out.subset, CREATEPANELS.out.all_consensus_bed, ENRICHPANELS.out.all_consensus_expanded_panel.first())
+        MUTDENSITYPROT(somatic_mutations, DEPTHSPROTCONS.out.subset, CREATEPANELS.out.prot_consensus_bed, ENRICHPANELS.out.prot_consensus_expanded_panel.first())
+        MUTDENSITYNONPROT(somatic_mutations, DEPTHSNONPROTCONS.out.subset, CREATEPANELS.out.nonprot_consensus_bed, ENRICHPANELS.out.nonprot_consensus_expanded_panel.first())
+        MUTDENSITYSYNONYMOUS(somatic_mutations, DEPTHSSYNONYMOUSCONS.out.subset, CREATEPANELS.out.synonymous_consensus_bed, ENRICHPANELS.out.synonymous_consensus_expanded_panel.first())
+
+        // Concatenate all outputs into a single file
+        channel.empty()
+        .concat(MUTDENSITYALL.out.mutdensities.map{ it -> it[1]}.flatten())
+        .concat(MUTDENSITYPROT.out.mutdensities.map{ it -> it[1]}.flatten())
+        .concat(MUTDENSITYNONPROT.out.mutdensities.map{ it -> it[1]}.flatten())
+        .concat(MUTDENSITYSYNONYMOUS.out.mutdensities.map{ it -> it[1]}.flatten())
+        .set{ all_mutdensities }
+        all_mutdensities.collectFile(name: "all_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity", skip: 1, keepHeader: true).set{ all_mutdensities_file }
+
+    }
+
+
     if (run_mutabilities) {
         if (params.profileall){
             MUTABILITYALL(somatic_mutations,

From 53c27bed7eb18006d9fe870a349e4afab97ce980 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Tue, 3 Feb 2026 13:29:22 +0100
Subject: [PATCH 03/11] rename and reorganize mutdensity files

---
 bin/{compute_mutdensity.py => mut_density_simple.py}          | 0
 modules/local/mut_density/{ => adjusted}/main.nf              | 4 ++--
 .../local/{computemutdensity => mut_density/simple}/main.nf   | 2 +-
 subworkflows/local/adjmutdensity/main.nf                      | 2 +-
 subworkflows/local/mutationdensity/main.nf                    | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)
 rename bin/{compute_mutdensity.py => mut_density_simple.py} (100%)
 rename modules/local/mut_density/{ => adjusted}/main.nf (95%)
 rename modules/local/{computemutdensity => mut_density/simple}/main.nf (97%)

diff --git a/bin/compute_mutdensity.py b/bin/mut_density_simple.py
similarity index 100%
rename from bin/compute_mutdensity.py
rename to bin/mut_density_simple.py
diff --git a/modules/local/mut_density/main.nf b/modules/local/mut_density/adjusted/main.nf
similarity index 95%
rename from modules/local/mut_density/main.nf
rename to modules/local/mut_density/adjusted/main.nf
index 632415f8..6f6388fd 100644
--- a/modules/local/mut_density/main.nf
+++ b/modules/local/mut_density/adjusted/main.nf
@@ -8,7 +8,7 @@ process MUTATION_DENSITY {
     input:
     tuple val(meta), path(somatic_mutations_file), path(depths_file), path(mutability_file)
     tuple val(meta2), path(panel_file)
-    path(trinucleotide_counts_file)
+    path (trinucleotide_counts_file)
 
 
     output:
@@ -20,7 +20,7 @@ process MUTATION_DENSITY {
     script:
     def sample_name = "${meta.id}"
     """
-    mut_density.py \\
+    mut_density_adjusted.py \\
                         --sample_name ${sample_name} \\
                         --depths_file ${depths_file} \\
                         --somatic_mutations_file ${somatic_mutations_file} \\
diff --git a/modules/local/computemutdensity/main.nf b/modules/local/mut_density/simple/main.nf
similarity index 97%
rename from modules/local/computemutdensity/main.nf
rename to modules/local/mut_density/simple/main.nf
index 9883c740..8c50db94 100644
--- a/modules/local/computemutdensity/main.nf
+++ b/modules/local/mut_density/simple/main.nf
@@ -16,7 +16,7 @@ process MUTATION_DENSITY {
     def sample_name = "${meta.id}"
     def panel_version = task.ext.panel_version ?: "${meta2.id}"
     """
-    compute_mutdensity.py \\
+    mut_density_simple.py \\
                 --maf_path ${mutations} \\
                 --depths_path ${depth} \\
                 --annot_panel_path ${consensus_panel} \\
diff --git a/subworkflows/local/adjmutdensity/main.nf b/subworkflows/local/adjmutdensity/main.nf
index 25cfbe53..687e8a64 100644
--- a/subworkflows/local/adjmutdensity/main.nf
+++ b/subworkflows/local/adjmutdensity/main.nf
@@ -2,7 +2,7 @@ include { TABIX_BGZIPTABIX_QUERY    as QUERYMUTATIONS          } from '../../../
 
 include { SUBSET_MAF                as SUBSETMUTDENSITYADJUSTED } from '../../../modules/local/subsetmaf/main'
 
-include { MUTATION_DENSITY          as MUTDENSITYADJ            } from '../../../modules/local/mut_density/main'
+include { MUTATION_DENSITY          as MUTDENSITYADJ            } from '../../../modules/local/mut_density/adjusted/main'
 
 
 workflow MUTATION_DENSITY {
diff --git a/subworkflows/local/mutationdensity/main.nf b/subworkflows/local/mutationdensity/main.nf
index e9569ad9..87a85a2b 100644
--- a/subworkflows/local/mutationdensity/main.nf
+++ b/subworkflows/local/mutationdensity/main.nf
@@ -2,7 +2,7 @@ include { TABIX_BGZIPTABIX_QUERY    as QUERYMUTATIONS      } from '../../../modu
 
 include { SUBSET_MAF                as SUBSETMUTDENSITY     } from '../../../modules/local/subsetmaf/main'
 
-include { MUTATION_DENSITY          as MUTDENSITY           } from '../../../modules/local/computemutdensity/main'
+include { MUTATION_DENSITY          as MUTDENSITY           } from '../../../modules/local/mut_density/simple/main'
 
 
 workflow MUTATION_DENSITY{

From 5a4aaeaa662d014ffea7217609fbf5243cdf55d1 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Tue, 3 Feb 2026 13:31:24 +0100
Subject: [PATCH 04/11] update adjusted mut density with sample values

- add drop duplicates with potential fix
---
 ...{mut_density.py => mut_density_adjusted.py} | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)
 rename bin/{mut_density.py => mut_density_adjusted.py} (90%)

diff --git a/bin/mut_density.py b/bin/mut_density_adjusted.py
similarity index 90%
rename from bin/mut_density.py
rename to bin/mut_density_adjusted.py
index 308b2953..1e90733a 100755
--- a/bin/mut_density.py
+++ b/bin/mut_density_adjusted.py
@@ -61,14 +61,17 @@ def mutation_density(sample_name, depths_file, somatic_mutations_file, mutabilit
 
     for csqn, csqn_set in broadimpact_grouping_dict_with_synonymous.items():
         
-        for gene in panel_df['GENE'].unique():
+        for gene in list(panel_df['GENE'].unique()) + ["ALL_GENES"]:
             
             # compute vector of sum of depths per trinucleotide context
             # tailored to the specific gene-impact target
-            region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)].copy()
+            if gene == 'ALL_GENES':
+                region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set))][['CHROM', 'POS']].drop_duplicates()
+            else:
+                region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)][['CHROM', 'POS']].drop_duplicates()
 
             # counting every position once
-            dh = pd.merge(region_df[['CHROM', 'POS']],
+            dh = pd.merge(region_df,
                           depths_df[['CHROM', 'POS', 'CONTEXT', sample_name]],
                           on=['CHROM', 'POS'], how='left')
             depth_sum_df = dh.groupby(by='CONTEXT').agg({sample_name: 'sum'}).reset_index()
@@ -88,10 +91,13 @@ def mutation_density(sample_name, depths_file, somatic_mutations_file, mutabilit
             except AssertionError:
                 res.loc[gene, csqn] = None
                 continue
-            
-            # observed somatic mutations
 
-            n = somatic_mutations_df[(somatic_mutations_df['IMPACT'].isin(csqn_set)) & (somatic_mutations_df['GENE'] == gene)].shape[0]
+
+            # observed somatic mutations
+            if gene == 'ALL_GENES':
+                n = somatic_mutations_df[(somatic_mutations_df['IMPACT'].isin(csqn_set))].shape[0]
+            else:
+                n = somatic_mutations_df[(somatic_mutations_df['IMPACT'].isin(csqn_set)) & (somatic_mutations_df['GENE'] == gene)].shape[0]
 
             res.loc[gene, csqn] = n / effective_length
     

From c04b90e9930704e30970e083d246902b31736e95 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Tue, 3 Feb 2026 13:53:24 +0100
Subject: [PATCH 05/11] add adjusted mut density variability plots

- not tested
---
 bin/mut_density_adjusted.py                   |  8 +++---
 bin/plot_explore_variability.py               | 28 +++++++++----------
 .../plot/interindividual_variability/main.nf  |  8 ++++--
 subworkflows/local/plottingsummary/main.nf    |  3 +-
 workflows/deepcsa.nf                          |  4 ++-
 5 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/bin/mut_density_adjusted.py b/bin/mut_density_adjusted.py
index 1e90733a..762a5bd2 100755
--- a/bin/mut_density_adjusted.py
+++ b/bin/mut_density_adjusted.py
@@ -155,12 +155,12 @@ def main(sample_name, depths_file, somatic_mutations_file, mutability_file, pane
     logfoldchange_plot(sample_name, res, res_flat)
 
     # save results
-    res["SAMPLE"] = sample_name
-    res_flat["SAMPLE"] = sample_name
+    res["SAMPLE_ID"] = sample_name
+    res_flat["SAMPLE_ID"] = sample_name
     res.index.name = 'GENE'
     res_flat.index.name = 'GENE'
-    res[['SAMPLE'] + [col for col in res.columns if col != 'SAMPLE']].to_csv(f'{sample_name}.mutdensities.tsv', sep='\t')
-    res_flat[['SAMPLE'] + [col for col in res_flat.columns if col != 'SAMPLE']].to_csv(f'{sample_name}.mutdensities_flat.tsv', sep='\t')
+    res[['SAMPLE_ID'] + [col for col in res.columns if col != 'SAMPLE_ID']].to_csv(f'{sample_name}.mutdensities.tsv', sep='\t')
+    res_flat[['SAMPLE_ID'] + [col for col in res_flat.columns if col != 'SAMPLE_ID']].to_csv(f'{sample_name}.mutdensities_flat.tsv', sep='\t')
 
 
 
diff --git a/bin/plot_explore_variability.py b/bin/plot_explore_variability.py
index e089490c..f55ceb0a 100755
--- a/bin/plot_explore_variability.py
+++ b/bin/plot_explore_variability.py
@@ -84,15 +84,13 @@ def mut_density_heatmaps(data, genes_list, samples_list, outdir, prefix = '',
 
 def adj_mut_density_heatmaps(data, genes_list, samples_list, outdir, prefix = '',
                                 config_datasets = {
-                                    "all" : ({"MUTTYPES": 'all_types', "REGIONS": 'all'}, 'MUTDENSITY_MB'),
-                                    "all protein-affecting" : ({"MUTTYPES": 'all_types', "REGIONS": 'protein_affecting'}, 'MUTDENSITY_MB'),
-                                    "all non-protein-affecting" : ({"MUTTYPES": 'all_types', "REGIONS": 'non_protein_affecting'}, 'MUTDENSITY_MB'),
-                                    "SNVs" : ({"MUTTYPES": 'SNV', "REGIONS": 'all'}, 'MUTDENSITY_MB'),
-                                    "SNVs protein-affecting" : ({"MUTTYPES": 'SNV', "REGIONS": 'protein_affecting'}, 'MUTDENSITY_MB'),
-                                    "SNVs non-protein-affecting" : ({"MUTTYPES": 'SNV', "REGIONS": 'non_protein_affecting'}, 'MUTDENSITY_MB'),
-                                    "INDELs" : ({"MUTTYPES": 'DELETION-INSERTION', "REGIONS": 'all'}, 'MUTDENSITY_MB'),
-                                    "INDELs protein-affecting" : ({"MUTTYPES": 'DELETION-INSERTION', "REGIONS": 'protein_affecting'}, 'MUTDENSITY_MB'),
-                                    "INDELs non-protein-affecting" : ({"MUTTYPES": 'DELETION-INSERTION', "REGIONS": 'non_protein_affecting'}, 'MUTDENSITY_MB')
+                                    "synonymous" : "synonymous",
+                                    "missense" : "missense",
+                                    "nonsense" : "nonsense",
+                                    "essential_splice" : "essential_splice",
+                                    "truncating" : "truncating",
+                                    "nonsynonymous_splice" : "nonsynonymous_splice",
+                                    "all_impacts" : "all_impacts",
                                 }
                             ):
     """
@@ -105,13 +103,13 @@ def adj_mut_density_heatmaps(data, genes_list, samples_list, outdir, prefix = ''
         print("No data available for the selected samples/groups")
         return
     
-    pdf_filename = f"{outdir}/{prefix}mut_density_heatmaps.pdf"
+    pdf_filename = f"{outdir}/{prefix}_adjusted_mut_density_heatmaps.pdf"
     with PdfPages(pdf_filename) as pdf:
 
-        for title, (config, value) in config_datasets.items():
-            print("Creating heatmap for:", title, config, value)
-            filtered_data = filter_data_from_config(data, config)
-            # print(filtered_data[['GENE', 'SAMPLE_ID', value]].head())
+        for title, value in config_datasets.items():
+            print("Creating heatmap for:", title)
+            filtered_data = data[["GENE", "SAMPLE_ID", value]]
+
             # Create a pivot table for the heatmap
             heatmap_data = filtered_data.pivot_table(index='GENE', columns='SAMPLE_ID', values=value)
             heatmap_data = heatmap_data.reindex(index=genes_list, columns=samples_list)
@@ -208,7 +206,7 @@ def main(outdir, panel_regions, samples_json, all_groups_json, mutdensities, adj
         plotting_manager(outdir, genes_list, samples_list, "samples.", data_string, data_objects)
     except Exception as e:
         print("Error in the process", e)
-    
+
     try:
         plotting_manager(outdir, genes_list, groups_names, "groups.", data_string, data_objects)
     except Exception as e:
diff --git a/modules/local/plot/interindividual_variability/main.nf b/modules/local/plot/interindividual_variability/main.nf
index 6a3c1a34..daca8584 100644
--- a/modules/local/plot/interindividual_variability/main.nf
+++ b/modules/local/plot/interindividual_variability/main.nf
@@ -6,10 +6,11 @@ process PLOT_INTERINDIVIDUAL_VARIABILITY {
     container "docker.io/bbglab/deepcsa-core:0.0.2-alpha"
 
     input:
-    path(samples_json)
-    path(all_groups_json)
+    path (samples_json)
+    path (all_groups_json)
     tuple val(meta), path(panel_file)
-    path(mutdensities_file)
+    path (mutdensities_file)
+    path (adjusted_mutdensities_file)
 
     output:
     path("**.pdf")      , emit: plots
@@ -23,6 +24,7 @@ process PLOT_INTERINDIVIDUAL_VARIABILITY {
     mkdir ${prefix}.variability_plots
     plot_explore_variability.py \\
                     --mutdensities ${mutdensities_file} \\
+                    --adjusted-mutdensities ${adjusted_mutdensities_file} \\
                     --panel-regions ${panel_file} \\
                     --outdir ${prefix}.variability_plots \\
                     --samples-json ${samples_json} \\
diff --git a/subworkflows/local/plottingsummary/main.nf b/subworkflows/local/plottingsummary/main.nf
index afb4d9dc..70c3f3be 100644
--- a/subworkflows/local/plottingsummary/main.nf
+++ b/subworkflows/local/plottingsummary/main.nf
@@ -14,6 +14,7 @@ workflow PLOTTING_SUMMARY {
     positive_selection_results_ready
     all_mutations
     all_mutdensities
+    all_mutdensities_adjusted
     site_comparison
     all_samples_depth
     samples
@@ -80,7 +81,7 @@ workflow PLOTTING_SUMMARY {
     // ? plot saturation kinetics curves
 
 
-    PLOTINTERINDIVIDUALVARIABILITY(samples, all_groups, panel,  all_mutdensities)
+    PLOTINTERINDIVIDUALVARIABILITY(samples, all_groups, panel,  all_mutdensities, all_mutdensities_adjusted)
     // heatmaps:
     //     mutations per gene/sample (total, SNV only, INDEL only, per consequence type)
     //     driver mutations per gene/sample
diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf
index d8aac712..e45f4f9b 100644
--- a/workflows/deepcsa.nf
+++ b/workflows/deepcsa.nf
@@ -148,6 +148,7 @@ workflow DEEPCSA{
     all_compiled_omegas             = channel.empty()
     all_compiled_omegasgloballoc    = channel.empty()
     all_mutdensities_file           = channel.empty()
+    compiled_adjusted_mutdensities  = channel.empty()
 
 
     // if the user wants to use custom gene groups, import the gene groups table
@@ -264,7 +265,7 @@ workflow DEEPCSA{
             // Concatenate all outputs into a single file
             MUTDENSITYADJUSTED.out.mutdensities.map{ it -> it[1]}.flatten()
             .set{ all_adjusted_mutdensities }
-            all_adjusted_mutdensities.collectFile(name: "all_adjusted_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true)
+            all_adjusted_mutdensities.collectFile(name: "all_adjusted_mutdensities.tsv", storeDir:"${params.outdir}/mutdensity_adjusted", skip: 1, keepHeader: true).set{ compiled_adjusted_mutdensities }
 
             MUTDENSITYADJUSTED.out.mutdensities_flat.map{ it -> it[1]}.flatten()
             .set{ all_adjusted_mutdensities_flat }
@@ -576,6 +577,7 @@ workflow DEEPCSA{
         PLOTTINGSUMMARY(positive_selection_results_ready,
                         somatic_mutations,
                         all_mutdensities_file.first(),
+                        compiled_adjusted_mutdensities.first(),
                         
                         site_comparison_results,
                         ANNOTATEDEPTHS.out.all_samples_depths.first(),

From dfab72d59a6f8272d7845c4fca5862c69fa115fc Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Fri, 13 Feb 2026 12:41:52 +0100
Subject: [PATCH 06/11] fix bug after upstream change

---
 bin/omega_select_mutdensity.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/omega_select_mutdensity.py b/bin/omega_select_mutdensity.py
index fd299d6a..2178cd3f 100755
--- a/bin/omega_select_mutdensity.py
+++ b/bin/omega_select_mutdensity.py
@@ -17,7 +17,7 @@ def select_syn_mutdensity(mutdensity_file, output_file, mode):
 
     mutdensity_df = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values)
 
-    synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples') &
+    synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE_ID"] == 'all_samples') &
                                                         ~(mutdensity_df["GENE"].str.contains("--"))
                                                         ]["synonymous"].reset_index(drop = True)
 

From 61d4532d038bf83f9a74512bdc675a47e935978f Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Fri, 13 Feb 2026 15:44:51 +0100
Subject: [PATCH 07/11] fix error in computation of adjusted mutation density

---
 bin/mut_density_adjusted.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/bin/mut_density_adjusted.py b/bin/mut_density_adjusted.py
index 762a5bd2..ebf684f9 100755
--- a/bin/mut_density_adjusted.py
+++ b/bin/mut_density_adjusted.py
@@ -31,6 +31,8 @@ def get_correction_factor(sample_name, trinucleotide_counts_df, mutability_df, f
     triplet_counts = np.array(l)
 
     # genome length in Mb
+    #   accounting for the fact that each position contributes:
+    #   3*depth because of the 3 mutations available at each position
     genome_length = sum(triplet_counts) / (3 * 1e6)
 
     # vector of relative mutabilities in 96-channel canonical sorting
@@ -66,12 +68,13 @@ def mutation_density(sample_name, depths_file, somatic_mutations_file, mutabilit
             # compute vector of sum of depths per trinucleotide context
             # tailored to the specific gene-impact target
             if gene == 'ALL_GENES':
-                region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set))][['CHROM', 'POS']].drop_duplicates()
+                region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set))][['CHROM', 'POS', 'REF', 'ALT']].drop_duplicates()
             else:
-                region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)][['CHROM', 'POS']].drop_duplicates()
+                region_df = panel_df[(panel_df['IMPACT'].isin(csqn_set)) & (panel_df['GENE'] == gene)][['CHROM', 'POS', 'REF', 'ALT']].drop_duplicates()
 
-            # counting every position once
-            dh = pd.merge(region_df,
+            # counting every position as many times as the number of possible
+            # mutations of the selected consequences at that position (1,2 or 3)
+            dh = pd.merge(region_df[['CHROM', 'POS']],
                           depths_df[['CHROM', 'POS', 'CONTEXT', sample_name]],
                           on=['CHROM', 'POS'], how='left')
             depth_sum_df = dh.groupby(by='CONTEXT').agg({sample_name: 'sum'}).reset_index()

From db38bf0437ad4fb6c7f5f89a5af440ccf00510f5 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Tue, 17 Feb 2026 09:43:10 +0000
Subject: [PATCH 08/11] fix bug in synonymous mut. density selection

---
 bin/omega_select_mutdensity.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/omega_select_mutdensity.py b/bin/omega_select_mutdensity.py
index 2178cd3f..0af801dd 100755
--- a/bin/omega_select_mutdensity.py
+++ b/bin/omega_select_mutdensity.py
@@ -19,9 +19,9 @@ def select_syn_mutdensity(mutdensity_file, output_file, mode):
 
     synonymous_mutdensities_all_samples = mutdensity_df[(mutdensity_df["SAMPLE_ID"] == 'all_samples') &
                                                         ~(mutdensity_df["GENE"].str.contains("--"))
-                                                        ]["synonymous"].reset_index(drop = True)
+                                                        ].reset_index(drop = True)
 
-    synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']]
+    synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']].copy()
 
     # TODO implement these different modes if appropriate
     # if mode == 'mutations':

From c80749480ae776b8a69127b0457ba8e077eba601 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Thu, 19 Feb 2026 09:15:27 +0000
Subject: [PATCH 09/11] add dndsproxy computation

---
 bin/mut_density_adjusted_dnds.py | 74 ++++++++++++++++++++++++++++++++
 conf/results_outputs.config      |  8 ++++
 modules/local/dnds_proxy/main.nf | 45 +++++++++++++++++++
 workflows/deepcsa.nf             |  3 ++
 4 files changed, 130 insertions(+)
 create mode 100755 bin/mut_density_adjusted_dnds.py
 create mode 100644 modules/local/dnds_proxy/main.nf

diff --git a/bin/mut_density_adjusted_dnds.py b/bin/mut_density_adjusted_dnds.py
new file mode 100755
index 00000000..68cf59b9
--- /dev/null
+++ b/bin/mut_density_adjusted_dnds.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+
+import click
+import pandas as pd
+from read_utils import custom_na_values
+
+
+def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_file, mode):
+    """
+    TODO: explain what this function does
+    TODO 2: store a log file that is also outputted and can be used to check some basic statistics
+
+    right now the use of mode is not implemented,
+    since we only compute one type of synonymous mutation densities.
+    """
+
+    mutdensity_df_init = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values)
+    all_possible_genes = list(mutdensity_df_init["GENE"].unique())
+
+    cohort_syn_mutdensity_df = pd.read_csv(cohort_syn_mutdensities_file, sep = "\t", header = 0, na_values = custom_na_values)
+    cohort_syn_mutdensity_df.columns = ['GENE', 'cohort_synonymous']
+    cohort_syn_mutdensity_df = cohort_syn_mutdensity_df.set_index("GENE")
+
+    init_cohort_syn_df = pd.DataFrame(index = all_possible_genes)
+    cohort_syn_df = pd.concat((init_cohort_syn_df, cohort_syn_mutdensity_df), axis = 0)
+    
+    # filling the null mutation densities with the value of the 1st decile
+    cohort_syn_df = cohort_syn_df.fillna(cohort_syn_df[~(cohort_syn_df.isna())].quantile(.1))
+
+    mutdensity_df = mutdensity_df_init.merge(cohort_syn_df, on = "GENE")
+    for impact in ["missense", "truncating", "nonsynonymous_splice"]:
+        mutdensity_df[f"d_{impact}/d_synonymous"] = mutdensity_df[impact] / mutdensity_df["synonymous"]
+        mutdensity_df[f"d_{impact}/d_cohort_synonymous"] = mutdensity_df[impact] / mutdensity_df["cohort_synonymous"]
+
+    # summary at all_samples level
+    subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples')]
+    for impact in ["missense", "truncating"]:
+        print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[
+            ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"]
+            ].head(10))
+
+
+    # # summary at sample-level
+    # subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] != 'all_samples')]
+    # for impact in ["missense", "truncating"]:
+    #     print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[
+    #         ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"]
+    #         ].head(10))
+
+    # TODO implement these different modes if appropriate
+    # if mode == 'mutations':
+    #     synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']]
+    # elif mode == 'mutated_reads':
+    #     synonymous_mutdensities_genes = synonymous_mutdensities_all_samples[['GENE', 'synonymous']]
+
+    mutdensity_df.to_csv(f"{output_file}",
+                            header=True,
+                            index=False,
+                            sep="\t")
+
+
+@click.command()
+@click.option('--mutdensities', type=click.Path(exists=True), help='Input mutation density file')
+@click.option('--cohort-syn-mutdensities', type=click.Path(exists=True), help='Input cohort synonymous mutation densities')
+@click.option('--output', type=click.Path(), help='Output file')
+@click.option('--mode', type=click.Choice(['mutations', 'mutated_reads']), default='mutations')
+def main(mutdensities, cohort_syn_mutdensities, output, mode):
+    click.echo("Selecting the gene synonymous mutation densities...")
+    compute_dnds_proxy(mutdensities, cohort_syn_mutdensities, output, mode)
+
+if __name__ == '__main__':
+    main()
+
diff --git a/conf/results_outputs.config b/conf/results_outputs.config
index 5dc2d67a..8980385a 100644
--- a/conf/results_outputs.config
+++ b/conf/results_outputs.config
@@ -205,6 +205,14 @@ process {
             pattern: '**{tsv,per_sample,sigprofiler}',
         ]
     }
+    
+    withName: DNDSPROXY {
+        publishDir = [
+            path: { "${params.outdir}/selection/dndsproxy" },
+            mode: params.publish_dir_mode,
+            pattern: '**{tsv,log}',
+        ]
+    }
 
     withName: COMPUTETRINUC {
         publishDir = [
diff --git a/modules/local/dnds_proxy/main.nf b/modules/local/dnds_proxy/main.nf
new file mode 100644
index 00000000..af55fbb0
--- /dev/null
+++ b/modules/local/dnds_proxy/main.nf
@@ -0,0 +1,45 @@
+process DNDS_PROXY {
+    tag "$meta.id"
+    label 'process_single'
+
+    label 'deepcsa_core'
+
+    input:
+    path(all_mutation_densities)
+    tuple val(meta), path(cohort_synonymous_mutdensities)
+
+    output:
+    tuple val(meta), path("*.gene_mutdensities.tsv") , emit: mutdensity
+    path  "versions.yml"                             , topic: versions
+
+
+
+    script:
+    def prefix = task.ext.prefix ?: ""
+    prefix = "${meta.id}${prefix}"
+    def mode = task.ext.mode ?: "mutations"
+    """
+    mut_density_adjusted_dnds.py \\
+                --mutdensities ${all_mutation_densities} \\
+                --cohort-syn-mutdensities ${cohort_synonymous_mutdensities} \\
+                --output ${prefix}.gene_mutdensities_n_dnds.tsv \\
+                --mode ${mode};
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "all_samples"
+    """
+    touch ${prefix}.gene_mutdensities.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+
+}
diff --git a/workflows/deepcsa.nf b/workflows/deepcsa.nf
index a32d8907..b1bb0f4c 100644
--- a/workflows/deepcsa.nf
+++ b/workflows/deepcsa.nf
@@ -103,6 +103,7 @@ include { DOWNSAMPLE_DEPTHS             as DOWNSAMPLEDEPTHS         } from '../m
 
 include { SELECT_MUTDENSITIES           as SYNMUTDENSITY            } from '../modules/local/select_mutdensity/main'
 include { SELECT_MUTDENSITIES           as SYNMUTREADSDENSITY       } from '../modules/local/select_mutdensity/main'
+include { DNDS_PROXY                    as DNDSPROXY                } from '../modules/local/dnds_proxy/main'
 
 include { DNA_2_PROTEIN_MAPPING         as DNA2PROTEINMAPPING       } from '../modules/local/dna2protein/main'
 
@@ -280,6 +281,8 @@ workflow DEEPCSA{
             SYNMUTDENSITY(all_samples_adj_mutdensity)
 
             SYNMUTREADSDENSITY(all_samples_adj_mutdensity)
+
+            DNDSPROXY(compiled_adjusted_mutdensities, SYNMUTDENSITY.out.mutdensity.first())
         }
     }
     if (params.profilenonprot){

From abee52b4e8481bf928817992f0e5f5cfe2e481f0 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Thu, 19 Feb 2026 12:01:17 +0100
Subject: [PATCH 10/11] fix dnds proxy computation

---
 bin/mut_density_adjusted_dnds.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/bin/mut_density_adjusted_dnds.py b/bin/mut_density_adjusted_dnds.py
index 68cf59b9..e2633e8d 100755
--- a/bin/mut_density_adjusted_dnds.py
+++ b/bin/mut_density_adjusted_dnds.py
@@ -23,10 +23,11 @@ def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_fil
     cohort_syn_mutdensity_df = cohort_syn_mutdensity_df.set_index("GENE")
 
     init_cohort_syn_df = pd.DataFrame(index = all_possible_genes)
-    cohort_syn_df = pd.concat((init_cohort_syn_df, cohort_syn_mutdensity_df), axis = 0)
+    cohort_syn_df = pd.concat((init_cohort_syn_df, cohort_syn_mutdensity_df), axis = 1)
     
     # filling the null mutation densities with the value of the 1st decile
-    cohort_syn_df = cohort_syn_df.fillna(cohort_syn_df[~(cohort_syn_df.isna())].quantile(.1))
+    cohort_syn_df = cohort_syn_df.fillna(cohort_syn_df[~(cohort_syn_df.isna())].quantile(.1)).reset_index()
+    cohort_syn_df.columns = ['GENE', 'cohort_synonymous']
 
     mutdensity_df = mutdensity_df_init.merge(cohort_syn_df, on = "GENE")
     for impact in ["missense", "truncating", "nonsynonymous_splice"]:
@@ -34,18 +35,18 @@ def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_fil
         mutdensity_df[f"d_{impact}/d_cohort_synonymous"] = mutdensity_df[impact] / mutdensity_df["cohort_synonymous"]
 
     # summary at all_samples level
-    subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] == 'all_samples')]
+    subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE_ID"] == 'all_samples')]
     for impact in ["missense", "truncating"]:
         print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[
-            ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"]
+            ["GENE", "SAMPLE_ID", impact, "synonymous", f"d_{impact}/d_synonymous"]
             ].head(10))
 
 
     # # summary at sample-level
-    # subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE"] != 'all_samples')]
+    # subset_mutdensities = mutdensity_df[(mutdensity_df["SAMPLE_ID"] != 'all_samples')]
     # for impact in ["missense", "truncating"]:
     #     print(subset_mutdensities.sort_values(by=f"d_{impact}/d_synonymous", ascending=False)[
-    #         ["GENE", "SAMPLE", impact, "synonymous", f"d_{impact}/d_synonymous"]
+    #         ["GENE", "SAMPLE_ID", impact, "synonymous", f"d_{impact}/d_synonymous"]
     #         ].head(10))
 
     # TODO implement these different modes if appropriate

From 6bf72217b6964e6e046eb6f3a72a307be068b444 Mon Sep 17 00:00:00 2001
From: FerriolCalvet <ferriolcalvet@gmail.com>
Date: Thu, 19 Feb 2026 12:51:50 +0100
Subject: [PATCH 11/11] update 0 handling and output

---
 bin/mut_density_adjusted_dnds.py | 2 ++
 modules/local/dnds_proxy/main.nf | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bin/mut_density_adjusted_dnds.py b/bin/mut_density_adjusted_dnds.py
index e2633e8d..604e0997 100755
--- a/bin/mut_density_adjusted_dnds.py
+++ b/bin/mut_density_adjusted_dnds.py
@@ -3,6 +3,7 @@
 
 import click
 import pandas as pd
+import numpy as np
 from read_utils import custom_na_values
 
 
@@ -16,6 +17,7 @@ def compute_dnds_proxy(mutdensity_file, cohort_syn_mutdensities_file, output_fil
     """
 
     mutdensity_df_init = pd.read_csv(mutdensity_file, sep = "\t", header = 0, na_values = custom_na_values)
+    mutdensity_df_init["synonymous"] = mutdensity_df_init["synonymous"].replace(0, np.nan)
     all_possible_genes = list(mutdensity_df_init["GENE"].unique())
 
     cohort_syn_mutdensity_df = pd.read_csv(cohort_syn_mutdensities_file, sep = "\t", header = 0, na_values = custom_na_values)
diff --git a/modules/local/dnds_proxy/main.nf b/modules/local/dnds_proxy/main.nf
index af55fbb0..f0120a70 100644
--- a/modules/local/dnds_proxy/main.nf
+++ b/modules/local/dnds_proxy/main.nf
@@ -9,8 +9,8 @@ process DNDS_PROXY {
     tuple val(meta), path(cohort_synonymous_mutdensities)
 
     output:
-    tuple val(meta), path("*.gene_mutdensities.tsv") , emit: mutdensity
-    path  "versions.yml"                             , topic: versions
+    tuple val(meta), path("*.gene_mutdensities_n_dnds.tsv") , emit: mutdensity_with_dnds
+    path  "versions.yml"                                    , topic: versions