Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
401 changes: 261 additions & 140 deletions NeoPredPipe.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion StandardPredsClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def PerformBlastPAlignments(self, blastp, outputDir):

if os.path.isfile(out)==False:
print("INFO: Running blastp on %s"%(fasta))
cmd = ' '.join([blastp, '-evalue 100000000 -gapextend 1 -gapopen 11 -outfmt 5 -out', out, '-query', fasta, '-subject', iedb])
cmd = ' '.join([blastp, '-num_threads 8 -evalue 100000000 -gapextend 1 -gapopen 11 -outfmt 5 -out', out, '-query', fasta, '-subject', iedb])
os.system(cmd)
blastpOut.append(out)
else:
Expand Down
155 changes: 155 additions & 0 deletions plugins.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File : plugins.py
@Time : 2023/12/14 14:02:56
@Author : biolxy
@Version : 1.0
@Contact : biolxy@aliyun.com
@Desc : None
'''

# here put the import lib
import sys
import os
import pandas as pd
from typing import List
# from icecream import ic
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO

# 生成 野生型 fasta WILDTYPE


def ConstructWildtypeFa(infa, outfa):
# /home/report/lixy/neo/tc1/fastaFiles/test1.tmp.8.fasta
"""
fasta format:
:>line7;NM_001099852;c.G247A;p.D83N;protein-altering;;(position;83;changed;from;D;to;N)
:ETFQAVLNGLDALLT
Args:
infa (_type_): _description_
outfa (_type_): _description_
Add:
vcf_manipulate.py line 191
"""
records = []
for seq_record in SeqIO.parse(infa, "fasta"):
seqid = str(seq_record.id)
tag = seqid.split(";")
aa_wildType = tag[-3]
aa_mutant = tag[-1]
half_len = int(len(seq_record.seq) / 2)
# ic(seq_record.id)
# ic(seq_record.seq)
# ic(aa_wildType, aa_mutant, seq_record.seq[half_len])
# 赋值
tag[4] = "wildType"
nid = ";".join(tag)
seq = str(seq_record.seq)
seq = seq[:half_len] + aa_wildType + seq[half_len+1:]
tmp_rec = SeqRecord(Seq(seq), id=nid, description="")
records.append(tmp_rec)
# ic(seq_record.seq)
# ic(tmp_rec.seq)
SeqIO.write(records, outfa, "fasta")


def getdf(infile):
"""_summary_

Args:
infile (_type_): 读取class1 的 df 表格 TestRun.MT.neoantigens.txt

Returns:
_type_: _description_
"""
columns = ["Sample", "R1", "R2", "Line", "chr", "allelepos", "ref", "alt", "GeneName:RefSeqID", "pos", "hla", "peptide", "Core", "Of",
"Gp", "Gl", "Ip", "Il", "Icore", "Identity", "Score_EL", "%Rank_EL", "Score_BA", "%Rank_BA", "Aff(nM)", "Candidate", "BindLevel"]
df = pd.read_csv(infile, sep="\t", names=columns)
df["peplen"] = len(df["peptide"].astype(str))
df = df.drop(["R1", "R2", "Core", "Of", "Gp",
"Gl", "Ip", "Il", "Icore"], axis=1)
return df


def getdf_class2(infile):
"""_summary_

Args:
infile (_type_): 读取class2 的 df 表格 TestRun.MT.neoantigens.txt

../netMHCIIpan -f example.pep -inptype 1 -a DRB1_0101 -BA

INFO: Running Epitope Predictions for test2 on epitopes of length 13

Returns:
_type_: _description_
"""
columns = ["Sample", "R1", "R2", "Line", "chr", "allelepos", "ref", "alt", "GeneName:RefSeqID", "pos", "hla", "peptide", "Of",
"Core", "Core_Rel", "Identity", "Score_EL", "%Rank_EL", "Exp_Bind", "Score_BA", "Aff(nM)", "%Rank_BA", "BindLevel"]
df = pd.read_csv(infile, sep="\t", names=columns)
df["peplen"] = len(df["peptide"].astype(str))
df = df.drop(["R1", "R2", "Core", "Of", "Core_Rel",
"Exp_Bind"], axis=1)
return df


def mergeMTWT(infile1, infile2, outfile):
"""
infile1: MT
infile2: WT
"""
df1 = getdf(infile1)
df2 = getdf(infile2)
on_columns = ["Sample", "Line", "chr", "allelepos",
"ref", "alt", "GeneName:RefSeqID", "pos", "hla", "Identity"] # , "peplen"
df = pd.merge(df1, df2, on=on_columns,
how='inner', suffixes=('_MT', '_WT'))
df["Aff(nM)_FC"] = df["Aff(nM)_WT"] / df["Aff(nM)_MT"]
# df.to_excel(outfile, index = True)
df.to_csv(outfile, encoding='utf-8', sep='\t', index=False)
return df


def mergeMTWT_class2(infile1, infile2, outfile):
"""
infile1: MT
infile2: WT
"""
df1 = getdf_class2(infile1)
df2 = getdf_class2(infile2)
on_columns = ["Sample", "Line", "chr", "allelepos",
"ref", "alt", "GeneName:RefSeqID", "pos", "hla", "Identity"] # , "peplen"
df = pd.merge(df1, df2, on=on_columns,
how='inner', suffixes=('_MT', '_WT'))
df["Aff(nM)_FC"] = df["Aff(nM)_WT"] / df["Aff(nM)_MT"]
# df.to_excel(outfile, index = True)
df.to_csv(outfile, encoding='utf-8', sep='\t', index=False)
return df

def main():
# infile = "/home/report/lixy/neo/tc1/fastaFiles/test1.tmp.9.fasta"
# outfile = "xx.fasta"
# ConstructWildtypeFa(infile, outfile)

# infile1 = "/home/report/lixy/neo/tc1/TestRun.MT.neoantigens.txt"
# infile2 = "/home/report/lixy/neo/tc1/TestRun.WT.neoantigens.txt"
# outfile = 'aa.txt'
# df = mergeMTWT(infile1, infile2, outfile)
# ic(df)


infile1 = "/home/report/lixy/neo/tc2/TestRun.MT.neoantigens.txt"
infile2 = "/home/report/lixy/neo/tc2/TestRun.WT.neoantigens.txt"
outfile = 'aa2.txt'

df = mergeMTWT_class2(infile1, infile2, outfile)
# ic(df)

pass


if __name__ == "__main__":
main()
5 changes: 2 additions & 3 deletions postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def DigestIndSample(toDigest, patName, Options, pepmatchPaths, indels=False):
'''
# temp_files = None
# output_file = "%s%s.digested.txt" % (FilePath, toDigest[0].split('/')[len(toDigest[0].split('/')) - 1].split('.epitopes.')[0])

lines = []
if indels:
pmInputFile = Options.OutputDir+'tmp/'+patName+'.epitopes.Indels.peptidematch.input'
Expand Down Expand Up @@ -170,12 +169,11 @@ def AppendDigestedEps(FilePath,digestedEps, patName, exonicVars, avReady, Option
genoTypesPresent = []
for ep in digestedEps:
if Options.typeII:
epID = int(ep.split('\t')[3].split(';')[0].replace('line',''))
epID = int(ep.split('\t')[6].split(';')[0].replace('line',''))
else:
epID = int(ep.split('\t')[10].split('_')[0].replace('line',''))
exonicLine = exonicInfo[epID]
avReadyLine = varInfo[epID]
chrom = avReadyLine.split('\t')[0]
pos = avReadyLine.split('\t')[1]
ref = avReadyLine.split('\t')[3]
alt = avReadyLine.split('\t')[4]
Expand All @@ -188,6 +186,7 @@ def AppendDigestedEps(FilePath,digestedEps, patName, exonicVars, avReady, Option
geneList = [':'.join(item.split(':')[0:2]) for item in exonicLine.split('\t')[2].split(',') if item != '']
nmList = [item.split(':')[1] for item in geneList]
genes = ','.join(geneList)
chrom = avReadyLine.split('\t')[0]

if Options.expression is not None:
geneExp = 'NA'
Expand Down
112 changes: 95 additions & 17 deletions predict_binding.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import subprocess


def predict_neoantigens(FilePath, patName, inFile, hlasnormed, epitopeLens, netMHCpan, Options):
'''
Strips out all WILDTYPE and IMMEDIATE-STOPGAIN from fasta file.
Expand All @@ -31,29 +32,41 @@ def predict_neoantigens(FilePath, patName, inFile, hlasnormed, epitopeLens, netM
cmd = "wc -l %s" % (inFile[n])
pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout
k = int(pipe.read().decode("utf-8").lstrip(" ").split(" ")[0])
checks[n]=k
checks[n] = k

epcalls = []
for n in inFile:
if checks[n] > 0:
output_file = FilePath+'tmp/%s.epitopes.%s.txt' % (patName, n)
epcalls.append(output_file)
with open(output_file, 'a') as epitope_pred:
print("INFO: Running Epitope Predictions for %s on epitopes of length %s"%(patName,n))
print("INFO: Running Epitope Predictions for %s on epitopes of length %s" % (
patName, n))
if Options.ELpred:
cmd = [netMHCpan['netmhcpan'], '-l', str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
# cmd = [netMHCpan['netmhcpan'], '-l',
# str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
elif Options.typeII:
cmd = [netMHCpan['netmhcpan'], '-length', str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
# cmd = [netMHCpan['netmhcpan'], '-length',
# str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
# class2 也添加 -BA 参数 netMHCIIpan-4.2
cmd = [netMHCpan['netmhcpan'], '-BA', '-length',
str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
else:
cmd = [netMHCpan['netmhcpan'], '-BA', '-l', str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
netMHC_run = subprocess.Popen(cmd, stdout=epitope_pred, stderr=epitope_pred)
cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
netMHC_run = subprocess.Popen(
cmd, stdout=epitope_pred, stderr=epitope_pred)
netMHC_run.wait()
else:
print("INFO: Skipping Sample! No peptides to predict for %s" % (patName))

print("INFO: Predictions complete for %s on epitopes of length %s" % (patName, n))
print("INFO: Predictions complete for %s on epitopes of length %s" %
(patName, n))

return (epcalls)

return(epcalls)

def predict_neoantigensWT(FilePath, patName, inFile, hlasnormed, epitopeLens, netMHCpan):
'''
Expand All @@ -74,26 +87,91 @@ def predict_neoantigensWT(FilePath, patName, inFile, hlasnormed, epitopeLens, ne
cmd = "wc -l %s" % (inFile[n])
pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout
k = int(pipe.read().decode("utf-8").lstrip(" ").split(" ")[0])
checks[n]=k
checks[n] = k

epcalls = []
for n in epitopeLens:
if checks[n] > 0:
output_file = '%s%s.wildtype.epitopes.%s.txt' % (FilePath, patName, n)
output_file = '%s%s.wildtype.epitopes.%s.txt' % (
FilePath, patName, n)
epcalls.append(output_file)

if os.path.isfile(output_file)==False:
if os.path.isfile(output_file) == False:
print("INFO: Predicting neoantigens for %s" % (patName))

with open(output_file, 'a') as epitope_pred:
print("INFO: Running Epitope Predictions for %s on epitopes of length %s"%(patName,n))
cmd = [netMHCpan['netmhcpan'], '-BA', '-l', str(n), '-a', ','.join(hlasnormed), '-f', inFile[n]]
netMHC_run = subprocess.Popen(cmd, stdout=epitope_pred, stderr=epitope_pred)
print("INFO: Running Epitope Predictions for %s on epitopes of length %s" % (
patName, n))
cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
str(n), '-a', ','.join(hlasnormed), '-f', inFile[n]]
netMHC_run = subprocess.Popen(
cmd, stdout=epitope_pred, stderr=epitope_pred)
netMHC_run.wait()
print("INFO: Predictions complete for %s on epitopes of length %s" % (patName, n))
print(
"INFO: Predictions complete for %s on epitopes of length %s" % (patName, n))
else:
print("INFO: Neoantigen predictions already complete for %s epitopes of length %s" % (patName, n))
print("INFO: Neoantigen predictions already complete for %s epitopes of length %s" % (
patName, n))
else:
print("INFO: Skipping Sample! No peptides to predict for %s" % (patName))

return (epcalls)


def predict_neoantigens2(FilePath, patName, inFile, hlasnormed, epitopeLens, netMHCpan, Options, inType):
'''
Strips out all WILDTYPE and IMMEDIATE-STOPGAIN from fasta file.

:param FilePath: Primary path of working directory
:param patName: ID associated with a patient
:param inFile: Fasta file with reformatted coding changes.
:param hlas: HLA types for the patient.
:param epitopeLens: List of epitope lengths to predict
:param netMHCpan: Dictionary housing netMHCpan specific script locations and data. See README.md.
:param ELpred: Logical for EL (true) or BA (false) predictions
:return: netMHCpan predictions for each file.
:inType: MT or WT (string)
'''
assert inType in ['MT', 'WT']
print("INFO: Predicting neoantigens for %s" % (patName))

# Verify that the fasta file has information in it to avoid any errors thrown from netMHCpan
checks = dict.fromkeys(inFile.keys())
for n in inFile:
cmd = "wc -l %s" % (inFile[n])
pipe = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout
k = int(pipe.read().decode("utf-8").lstrip(" ").split(" ")[0])
checks[n] = k

epcalls = []
for n in inFile:
if checks[n] > 0:
output_file = FilePath + \
'tmp/%s.epitopes.%s.%s.txt' % (patName, str(inType), n)
epcalls.append(output_file)
with open(output_file, 'a') as epitope_pred:
print("INFO: Running Epitope Predictions for %s on epitopes of length %s" % (
patName, n))
if Options.ELpred:
# cmd = [netMHCpan['netmhcpan'], '-l',
# str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
elif Options.typeII:
# cmd = [netMHCpan['netmhcpan'], '-length',
# str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
cmd = [netMHCpan['netmhcpan'], '-BA', '-length',
str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
else:
cmd = [netMHCpan['netmhcpan'], '-BA', '-l',
str(n).split('.')[0], '-a', ','.join(hlasnormed), '-f', inFile[n]]
netMHC_run = subprocess.Popen(
cmd, stdout=epitope_pred, stderr=epitope_pred)
netMHC_run.wait()
else:
print("INFO: Skipping Sample! No peptides to predict for %s" % (patName))

return(epcalls)
print("INFO: Predictions complete for %s on epitopes of length %s" %
(patName, n))

return (epcalls)
27 changes: 27 additions & 0 deletions singularity_images.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Bootstrap: docker
From: python:3.9.18
Stage: build


%files
annovar /opt
NeoPredPipe_class1 /opt
NeoPredPipe_class2 /opt
/home/report/lixy/sif/neo/opt/netMHCpan-4.1 /opt
/home/report/lixy/sif/neo/opt/netMHCIIpan-4.2 /opt
/home/report/lixy/neo/ncbi-blast-2.13.0+/bin/blastp /opt
/home/report/lixy/neo/PeptideMatchCMD_1.1.jar /opt
%post
ln -s /opt/netMHCIIpan-4.2/netMHCIIpan /usr/local/bin/
ln -s /opt/netMHCpan-4.1/netMHCpan /usr/local/bin/
ln -s /opt/blastp /usr/local/bin/
apt-get update && apt-get install -y tcsh perl ccrypt
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple biopython pandas numpy

%environment
export PATH=/opt:$PATH

%labels
Author biolxy@aliyun.com

# singularity build --fakeroot NeoPredPipe.sif singularity_images.def
Loading