-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Milestone
Description
Add code to use the ID83 matrix generated in sigprofilermatrixgenerator process to also plot ID signatures in sigprofilerassignment. Be aware that 3.4 version of COSMIC is in GRCh37.
The script used to obtain ID signatures:
#!/usr/bin/env python3
"""
Process deepCSA all sample signatures output file as a matrix and run SigProfiler Assignment with specified signatures. User is able to pass specific signatures for reffiting and to calculate SBS, ID or other type of signatures.
NOTE: The Conda environment 'sigproext' must be activated *before* running this script.
"""
import pandas as pd
import os
import subprocess
import click
from SigProfilerAssignment import Analyzer as Analyze
import SigProfilerAssignment as spa
@click.command()
@click.option('--input_dir', '-i',
help='Specify input data path',
type=str,
required=True)
@click.option('--input_type', '--type',
help='Specify input data type',
type=str,
default='matrix',
show_default=True)
@click.option('--output_dir', '-o',
help='Specify output results path',
type=str,
required=True)
@click.option('--genome_build', '-g',
help='Specify genome to use',
type=str,
default='GRCh38',
show_default=True)
@click.option('--context_type', '-ctx',
help='Specify context type (ID for indels or 96 for sbs)',
type=str,
default='96',
show_default=True)
@click.option('--plots/--no-plots',
help='Generate plots for the results',
default=True,
show_default=True)
@click.option('--signature_database', '-sigs',
help='Use signature database with COSMIC probabilities for refitting purposes',
type=click.Path(exists=False),
required=False,
default=None,
show_default=True)
def main(input_dir, input_type, output_dir, genome_build, context_type, plots, signature_database):
"""
Process mutations file in vcf or matrix format and run SigProfiler Assignment with specified signatures.
NOTE: The Conda environment 'sigproext' must be activated *before* running this script.
"""
print(f"Input directory: {input_dir}")
print(f"Output directory: {output_dir}")
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
if context_type != '96':
print("Context type is not 96, setting to ID for indels analysis.")
flag = False
else:
flag = True
# Run the assignment
Analyze.cosmic_fit(
samples=input_dir,
output=output_dir,
input_type=input_type, # Adjust based on your input format
context_type=context_type, # This is the crucial parameter for ID signatures
genome_build=genome_build, # or "GRCh38", depending on your data
make_plots=plots,
collapse_to_SBS96=flag,
signature_database=signature_database
)
if __name__ == "__main__":
main()
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels