Skip to content

Add ID signatures in sigprofilerAssignment #369

@efigb

Description

@efigb

Add code to use the ID83 matrix generated in sigprofilermatrixgenerator process to also plot ID signatures in sigprofilerassignment. Be aware that 3.4 version of COSMIC is in GRCh37.

The script used to obtain ID signatures:

#!/usr/bin/env python3

"""
Process deepCSA all sample signatures output file as a matrix and run SigProfiler Assignment with specified signatures. User is able to pass specific signatures for reffiting and to calculate SBS, ID or other type of signatures.

NOTE: The Conda environment 'sigproext' must be activated *before* running this script.
"""

import pandas as pd
import os
import subprocess
import click
from SigProfilerAssignment import Analyzer as Analyze
import SigProfilerAssignment as spa

@click.command()
@click.option('--input_dir', '-i',
              help='Specify input data path', 
              type=str,
              required=True)

@click.option('--input_type', '--type', 
              help='Specify input data type',
              type=str,
              default='matrix',
              show_default=True)

@click.option('--output_dir', '-o',
              help='Specify output results path',
              type=str,
              required=True)

@click.option('--genome_build', '-g',
              help='Specify genome to use',
              type=str,
              default='GRCh38',
              show_default=True)

@click.option('--context_type', '-ctx',
              help='Specify context type (ID for indels or 96 for sbs)',
              type=str,
              default='96',
              show_default=True)
@click.option('--plots/--no-plots',
              help='Generate plots for the results',
              default=True,
              show_default=True)

@click.option('--signature_database', '-sigs',
              help='Use signature database with COSMIC probabilities for refitting purposes',
              type=click.Path(exists=False),
              required=False,
              default=None,
              show_default=True)



def main(input_dir, input_type, output_dir, genome_build, context_type, plots, signature_database):

    """
   Process mutations file in vcf or matrix format and run SigProfiler Assignment with specified signatures.
   NOTE: The Conda environment 'sigproext' must be activated *before* running this script.
    """

    print(f"Input directory: {input_dir}")
    print(f"Output directory: {output_dir}")
    
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    if context_type != '96':
        print("Context type is not 96, setting to ID for indels analysis.")
        flag =  False
    else:
        flag = True

    # Run the assignment
    Analyze.cosmic_fit(
        samples=input_dir,
        output=output_dir,
        input_type=input_type,  # Adjust based on your input format
        context_type=context_type,  # This is the crucial parameter for ID signatures
        genome_build=genome_build,  # or "GRCh38", depending on your data
        make_plots=plots,
        collapse_to_SBS96=flag,
        signature_database=signature_database
)
    
if __name__ == "__main__":
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions