MathOnco · biolxy · Feb 6, 2024
diff --git a/NeoPredPipe.py b/NeoPredPipe.py
diff --git a/StandardPredsClass.py b/StandardPredsClass.py
@@ -374,7 +374,7 @@ def PerformBlastPAlignments(self, blastp, outputDir):
 
             if os.path.isfile(out)==False:
                 print("INFO: Running blastp on %s"%(fasta))
-                cmd = ' '.join([blastp, '-evalue 100000000 -gapextend 1 -gapopen 11 -outfmt 5 -out', out, '-query', fasta, '-subject', iedb])
+                cmd = ' '.join([blastp, '-num_threads 8 -evalue 100000000 -gapextend 1 -gapopen 11 -outfmt 5 -out', out, '-query', fasta, '-subject', iedb])
                 os.system(cmd)
                 blastpOut.append(out)
             else:

diff --git a/plugins.py b/plugins.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@File    :   plugins.py
+@Time    :   2023/12/14 14:02:56
+@Author  :   biolxy
+@Version :   1.0
+@Contact :   biolxy@aliyun.com
+@Desc    :   None
+'''
+
+# here put the import lib
+import sys
+import os
+import pandas as pd
+from typing import List
+# from icecream import ic
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Bio import SeqIO
+
+# 生成 野生型 fasta   WILDTYPE
+
+
+def ConstructWildtypeFa(infa, outfa):
+    # /home/report/lixy/neo/tc1/fastaFiles/test1.tmp.8.fasta
+    """
+    fasta format:
+        :>line7;NM_001099852;c.G247A;p.D83N;protein-altering;;(position;83;changed;from;D;to;N)
+        :ETFQAVLNGLDALLT
+    Args:
+        infa (_type_): _description_
+        outfa (_type_): _description_
+    Add:
+        vcf_manipulate.py line 191
+    """
+    records = []
+    for seq_record in SeqIO.parse(infa, "fasta"):
+        seqid = str(seq_record.id)
+        tag = seqid.split(";")
+        aa_wildType = tag[-3]
+        aa_mutant = tag[-1]
+        half_len = int(len(seq_record.seq) / 2)
+        # ic(seq_record.id)
+        # ic(seq_record.seq)
+        # ic(aa_wildType, aa_mutant, seq_record.seq[half_len])
+        # 赋值
+        tag[4] = "wildType"
+        nid = ";".join(tag)
+        seq = str(seq_record.seq)
+        seq = seq[:half_len] + aa_wildType + seq[half_len+1:]
+        tmp_rec = SeqRecord(Seq(seq), id=nid, description="")
+        records.append(tmp_rec)
+        # ic(seq_record.seq)
+        # ic(tmp_rec.seq)
+    SeqIO.write(records, outfa, "fasta")
+
+
+def getdf(infile):
+    """_summary_
+
+    Args:
+        infile (_type_): 读取class1 的 df 表格 TestRun.MT.neoantigens.txt
+
+    Returns:
+        _type_: _description_
+    """
+    columns = ["Sample", "R1", "R2", "Line", "chr", "allelepos", "ref", "alt", "GeneName:RefSeqID", "pos", "hla", "peptide", "Core", "Of",
+               "Gp", "Gl", "Ip", "Il", "Icore", "Identity", "Score_EL", "%Rank_EL", "Score_BA", "%Rank_BA", "Aff(nM)", "Candidate", "BindLevel"]
+    df = pd.read_csv(infile, sep="\t", names=columns)
+    df["peplen"] = len(df["peptide"].astype(str))
+    df = df.drop(["R1", "R2", "Core", "Of", "Gp",
+                 "Gl", "Ip", "Il", "Icore"], axis=1)
+    return df
+
+
+def getdf_class2(infile):
+    """_summary_
+
+    Args:
+        infile (_type_): 读取class2 的 df 表格 TestRun.MT.neoantigens.txt
+
+    ../netMHCIIpan -f example.pep -inptype 1 -a DRB1_0101 -BA
+
+    INFO: Running Epitope Predictions for test2 on epitopes of length 13
+
+    Returns:
+        _type_: _description_
+    """
+    columns = ["Sample", "R1", "R2", "Line", "chr", "allelepos", "ref", "alt", "GeneName:RefSeqID", "pos", "hla", "peptide", "Of",
+               "Core", "Core_Rel", "Identity", "Score_EL", "%Rank_EL", "Exp_Bind", "Score_BA", "Aff(nM)", "%Rank_BA", "BindLevel"]
+    df = pd.read_csv(infile, sep="\t", names=columns)
+    df["peplen"] = len(df["peptide"].astype(str))
+    df = df.drop(["R1", "R2", "Core", "Of", "Core_Rel",
+                 "Exp_Bind"], axis=1)
+    return df
+
+
+def mergeMTWT(infile1, infile2, outfile):
+    """
+    infile1: MT
+    infile2: WT
+    """
+    df1 = getdf(infile1)
+    df2 = getdf(infile2)
+    on_columns = ["Sample", "Line", "chr", "allelepos",
+                  "ref", "alt", "GeneName:RefSeqID", "pos", "hla", "Identity"]  # , "peplen"
+    df = pd.merge(df1, df2, on=on_columns,
+                  how='inner', suffixes=('_MT', '_WT'))
+    df["Aff(nM)_FC"] = df["Aff(nM)_WT"] / df["Aff(nM)_MT"]
+    # df.to_excel(outfile, index = True)
+    df.to_csv(outfile, encoding='utf-8', sep='\t', index=False)
+    return df
+
+
+def mergeMTWT_class2(infile1, infile2, outfile):
+    """
+    infile1: MT
+    infile2: WT
+    """
+    df1 = getdf_class2(infile1)
+    df2 = getdf_class2(infile2)
+    on_columns = ["Sample", "Line", "chr", "allelepos",
+                  "ref", "alt", "GeneName:RefSeqID", "pos", "hla", "Identity"]  # , "peplen"
+    df = pd.merge(df1, df2, on=on_columns,
+                  how='inner', suffixes=('_MT', '_WT'))
+    df["Aff(nM)_FC"] = df["Aff(nM)_WT"] / df["Aff(nM)_MT"]
+    # df.to_excel(outfile, index = True)
+    df.to_csv(outfile, encoding='utf-8', sep='\t', index=False)
+    return df
+
+def main():
+    # infile = "/home/report/lixy/neo/tc1/fastaFiles/test1.tmp.9.fasta"
+    # outfile = "xx.fasta"
+    # ConstructWildtypeFa(infile, outfile)
+
+    # infile1 = "/home/report/lixy/neo/tc1/TestRun.MT.neoantigens.txt"
+    # infile2 = "/home/report/lixy/neo/tc1/TestRun.WT.neoantigens.txt"
+    # outfile = 'aa.txt'
+    # df = mergeMTWT(infile1, infile2, outfile)
+    # ic(df)
+
+
+    infile1 = "/home/report/lixy/neo/tc2/TestRun.MT.neoantigens.txt"
+    infile2 = "/home/report/lixy/neo/tc2/TestRun.WT.neoantigens.txt"
+    outfile = 'aa2.txt'
+
+    df = mergeMTWT_class2(infile1, infile2, outfile)
+    # ic(df)
+
+    pass
+
+
+if __name__ == "__main__":
+    main()
diff --git a/postprocessing.py b/postprocessing.py
@@ -22,7 +22,6 @@ def DigestIndSample(toDigest, patName, Options, pepmatchPaths, indels=False):
     '''
     # temp_files = None
     # output_file = "%s%s.digested.txt" % (FilePath, toDigest[0].split('/')[len(toDigest[0].split('/')) - 1].split('.epitopes.')[0])
-
     lines = []
     if indels:
         pmInputFile = Options.OutputDir+'tmp/'+patName+'.epitopes.Indels.peptidematch.input'
@@ -170,12 +169,11 @@ def AppendDigestedEps(FilePath,digestedEps, patName, exonicVars, avReady, Option
     genoTypesPresent = []
     for ep in digestedEps:
         if Options.typeII:
-            epID = int(ep.split('\t')[3].split(';')[0].replace('line',''))
+            epID = int(ep.split('\t')[6].split(';')[0].replace('line',''))
         else:
             epID = int(ep.split('\t')[10].split('_')[0].replace('line',''))
         exonicLine = exonicInfo[epID]
         avReadyLine = varInfo[epID]
-        chrom = avReadyLine.split('\t')[0]
         pos = avReadyLine.split('\t')[1]
         ref = avReadyLine.split('\t')[3]
         alt = avReadyLine.split('\t')[4]
@@ -188,6 +186,7 @@ def AppendDigestedEps(FilePath,digestedEps, patName, exonicVars, avReady, Option
         geneList = [':'.join(item.split(':')[0:2]) for item in exonicLine.split('\t')[2].split(',') if item != '']
         nmList = [item.split(':')[1] for item in geneList]
         genes = ','.join(geneList)
+        chrom = avReadyLine.split('\t')[0]
 
         if Options.expression is not None:
             geneExp = 'NA'

diff --git a/predict_binding.py b/predict_binding.py
@@ -9,6 +9,7 @@
 import os
 import subprocess
 
+
 def predict_neoantigens(FilePath, patName, inFile, hlasnormed, epitopeLens, netMHCpan, Options):
     '''
     Strips out all WILDTYPE and IMMEDIATE-STOPGAIN from fasta file.
@@ -31,29 +32,41 @@ def predict_neoantigens(FilePath, patName, inFile, hlasnormed, epitopeLens, netM
         cmd = "wc -l %s" % (inFile[n])
         pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout
         k = int(pipe.read().decode("utf-8").lstrip(" ").split(" ")[0])
-        checks[n]=k
+        checks[n] = k
 
     epcalls = []
     for n in inFile:
         if checks[n] > 0:
             output_file = FilePath+'tmp/%s.epitopes.%s.txt' % (patName, n)
             epcalls.append(output_file)
             with open(output_file, 'a') as epitope_pred:
-                print("INFO: Running Epitope Predictions for %s on epitopes of length %s"%(patName,n))
+                print("INFO: Running Epitope Predictions for %s on epitopes of length %s" % (
+                    patName, n))
                 if Options.ELpred:
-                    cmd = [netMHCpan['netmhcpan'], '-l', str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                    # cmd = [netMHCpan['netmhcpan'], '-l',
+                    #        str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                    cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
+                           str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]                
                 elif Options.typeII:
-                    cmd = [netMHCpan['netmhcpan'], '-length', str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                    # cmd = [netMHCpan['netmhcpan'], '-length',
+                    #        str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                    # class2 也添加 -BA 参数 netMHCIIpan-4.2
+                    cmd = [netMHCpan['netmhcpan'], '-BA', '-length',
+                           str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
                 else:
-                    cmd = [netMHCpan['netmhcpan'], '-BA', '-l', str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
-                netMHC_run = subprocess.Popen(cmd, stdout=epitope_pred, stderr=epitope_pred)
+                    cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
+                           str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                netMHC_run = subprocess.Popen(
+                    cmd, stdout=epitope_pred, stderr=epitope_pred)
                 netMHC_run.wait()
         else:
             print("INFO: Skipping Sample! No peptides to predict for %s" % (patName))
 
-    print("INFO: Predictions complete for %s on epitopes of length %s" % (patName, n))
+    print("INFO: Predictions complete for %s on epitopes of length %s" %
+          (patName, n))
+
+    return (epcalls)
 
-    return(epcalls)
 
 def predict_neoantigensWT(FilePath, patName, inFile, hlasnormed, epitopeLens, netMHCpan):
     '''
@@ -74,26 +87,91 @@ def predict_neoantigensWT(FilePath, patName, inFile, hlasnormed, epitopeLens, ne
         cmd = "wc -l %s" % (inFile[n])
         pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout
         k = int(pipe.read().decode("utf-8").lstrip(" ").split(" ")[0])
-        checks[n]=k
+        checks[n] = k
 
     epcalls = []
     for n in epitopeLens:
         if checks[n] > 0:
-            output_file = '%s%s.wildtype.epitopes.%s.txt' % (FilePath, patName, n)
+            output_file = '%s%s.wildtype.epitopes.%s.txt' % (
+                FilePath, patName, n)
             epcalls.append(output_file)
 
-            if os.path.isfile(output_file)==False:
+            if os.path.isfile(output_file) == False:
                 print("INFO: Predicting neoantigens for %s" % (patName))
 
                 with open(output_file, 'a') as epitope_pred:
-                    print("INFO: Running Epitope Predictions for %s on epitopes of length %s"%(patName,n))
-                    cmd = [netMHCpan['netmhcpan'], '-BA', '-l', str(n), '-a', ','.join(hlasnormed), '-f', inFile[n]]
-                    netMHC_run = subprocess.Popen(cmd, stdout=epitope_pred, stderr=epitope_pred)
+                    print("INFO: Running Epitope Predictions for %s on epitopes of length %s" % (
+                        patName, n))
+                    cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
+                           str(n), '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                    netMHC_run = subprocess.Popen(
+                        cmd, stdout=epitope_pred, stderr=epitope_pred)
                     netMHC_run.wait()
-                    print("INFO: Predictions complete for %s on epitopes of length %s" % (patName, n))
+                    print(
+                        "INFO: Predictions complete for %s on epitopes of length %s" % (patName, n))
             else:
-                print("INFO: Neoantigen predictions already complete for %s epitopes of length %s" % (patName, n))
+                print("INFO: Neoantigen predictions already complete for %s epitopes of length %s" % (
+                    patName, n))
+        else:
+            print("INFO: Skipping Sample! No peptides to predict for %s" % (patName))
+
+    return (epcalls)
+
+
+def predict_neoantigens2(FilePath, patName, inFile, hlasnormed, epitopeLens, netMHCpan, Options, inType):
+    '''
+    Strips out all WILDTYPE and IMMEDIATE-STOPGAIN from fasta file.
+
+    :param FilePath: Primary path of working directory
+    :param patName: ID associated with a patient
+    :param inFile: Fasta file with reformatted coding changes.
+    :param hlas: HLA types for the patient.
+    :param epitopeLens: List of epitope lengths to predict
+    :param netMHCpan: Dictionary housing netMHCpan specific script locations and data. See README.md.
+    :param ELpred: Logical for EL (true) or BA (false) predictions
+    :return: netMHCpan predictions for each file.
+    :inType: MT or WT (string)
+    '''
+    assert inType in ['MT', 'WT']
+    print("INFO: Predicting neoantigens for %s" % (patName))
+
+    # Verify that the fasta file has information in it to avoid any errors thrown from netMHCpan
+    checks = dict.fromkeys(inFile.keys())
+    for n in inFile:
+        cmd = "wc -l %s" % (inFile[n])
+        pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout
+        k = int(pipe.read().decode("utf-8").lstrip(" ").split(" ")[0])
+        checks[n] = k
+
+    epcalls = []
+    for n in inFile:
+        if checks[n] > 0:
+            output_file = FilePath + \
+                'tmp/%s.epitopes.%s.%s.txt' % (patName, str(inType), n)
+            epcalls.append(output_file)
+            with open(output_file, 'a') as epitope_pred:
+                print("INFO: Running Epitope Predictions for %s on epitopes of length %s" % (
+                    patName, n))
+                if Options.ELpred:
+                    # cmd = [netMHCpan['netmhcpan'], '-l',
+                    #        str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                    cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
+                           str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                elif Options.typeII:
+                    # cmd = [netMHCpan['netmhcpan'], '-length',
+                    #        str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                    cmd = [netMHCpan['netmhcpan'], '-BA', '-length',
+                           str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]                    
+                else:
+                    cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
+                           str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
+                netMHC_run = subprocess.Popen(
+                    cmd, stdout=epitope_pred, stderr=epitope_pred)
+                netMHC_run.wait()
         else:
             print("INFO: Skipping Sample! No peptides to predict for %s" % (patName))
 
-    return(epcalls)
+    print("INFO: Predictions complete for %s on epitopes of length %s" %
+          (patName, n))
+
+    return (epcalls)
diff --git a/singularity_images.def b/singularity_images.def
@@ -0,0 +1,27 @@
+Bootstrap: docker
+From: python:3.9.18
+Stage: build
+
+
+%files
+    annovar /opt
+    NeoPredPipe_class1 /opt
+    NeoPredPipe_class2 /opt
+    /home/report/lixy/sif/neo/opt/netMHCpan-4.1 /opt
+    /home/report/lixy/sif/neo/opt/netMHCIIpan-4.2 /opt
+    /home/report/lixy/neo/ncbi-blast-2.13.0+/bin/blastp /opt
+    /home/report/lixy/neo/PeptideMatchCMD_1.1.jar /opt
+%post
+    ln -s /opt/netMHCIIpan-4.2/netMHCIIpan /usr/local/bin/
+    ln -s /opt/netMHCpan-4.1/netMHCpan /usr/local/bin/
+    ln -s /opt/blastp /usr/local/bin/
+    apt-get update && apt-get install -y tcsh perl ccrypt
+    pip install -i https://pypi.tuna.tsinghua.edu.cn/simple biopython pandas numpy
+
+%environment
+    export PATH=/opt:$PATH
+
+%labels
+    Author biolxy@aliyun.com
+
+# singularity  build --fakeroot NeoPredPipe.sif singularity_images.def