diff --git a/.gitignore b/.gitignore index 2a561e75..ec3692e8 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ build dist *.egg-info *.ipynb_checkpoints +.eggs/* \ No newline at end of file diff --git a/src/pmx/ligand_alchemy.py b/src/pmx/ligand_alchemy.py index eccdd44a..69f45ee4 100644 --- a/src/pmx/ligand_alchemy.py +++ b/src/pmx/ligand_alchemy.py @@ -22,7 +22,7 @@ # ================ # Helper functions # ================ -def reformatPDB(fname,num,randint=42,bStrict=False): +def reformatPDB(fname,num,randint=42,bStrict=False,bCONECT=False): """Higher level command to read, format and call a pdb writer. Params @@ -35,6 +35,8 @@ def reformatPDB(fname,num,randint=42,bStrict=False): random number to mark the output pdb bStrict : bool limit atom names to 3 char (default False) + bCONECT : bool + read CONECT entries from pdb (default False) Returns ------- newname : str @@ -46,7 +48,7 @@ def reformatPDB(fname,num,randint=42,bStrict=False): """ newname = "tempFormat_"+str(randint)+'_'+str(num)+".pdb" - m = Model().read(fname) + m = Model().read(fname,bCONECT=bCONECT) # adjust atom names and remember the changes atomNameID = {} @@ -61,10 +63,11 @@ def reformatPDB(fname,num,randint=42,bStrict=False): atomNameID[a.id] = a.name a.name = newAtomName - writeFormatPDB(newname,m,bStrict=bStrict) + writeFormatPDB(newname,m,bStrict=bStrict, + _conect_entries=m._pdb_conect if bCONECT else None) return(newname,atomNameID,sigmaHoleID) -def writeFormatPDB(fname,m,title="",nr=1,bStrict=False): +def writeFormatPDB(fname,m,title="",nr=1,bStrict=False,_conect_entries=None): """Writes formatted pdb of a ligand. Params @@ -76,9 +79,11 @@ def writeFormatPDB(fname,m,title="",nr=1,bStrict=False): title : str currently not used nr : int - currently not used + currently not used bStrict : bool limit atom names to 3 char (default False) + _conect_entries : list + List of conect entries Returns ------- None @@ -105,6 +110,9 @@ def writeFormatPDB(fname,m,title="",nr=1,bStrict=False): print(foo,file=fp) else: print(atom,file=fp) + if _conect_entries is not None: + for line in _conect_entries: + fp.write(f"{line}\n") fp.write('ENDMDL\n') fp.close() @@ -205,7 +213,7 @@ def assignFF(model, itp): ------ model : pmx Model model - itp : itp TopolBase + itp : itp TopolBase itp Returns @@ -229,7 +237,7 @@ def readPairsFile(fn): Params ------ fn : str - filename + filename Returns ------- @@ -302,7 +310,7 @@ def superimposeStructures( fname1, fname2, m3, m2, plst, logfile ): m3 : pmx Model model m1 that will get the coordinates of m2 m2 : pmx Model - model m2 that will be fit on m1 + model m2 that will be fit on m1 plst : array pairs list Returns @@ -476,7 +484,7 @@ def _count_ringNonRing( self, n1,n2, mol1,mol2 ): counter+=1 return(counter) - + #*************************************************# # MCS # #*************************************************# @@ -590,7 +598,7 @@ def mcs(self): n1_foo = [] n2_foo = [] for n1,n2 in list(zip(n1_list,n2_list)): - n1,n2 = self._checkTop(n1,n2) + n1,n2 = self._checkTop(n1,n2) n1_foo.append(n1) n2_foo.append(n2) n1_list = cp.copy(n1_foo) @@ -646,7 +654,7 @@ def mcs(self): n1,n2 = self._mapH( n1, n2 ) # one more final check for possible issues with the 1-2, 1-3 and 1-4 interactions - n1,n2 = self._checkTop(n1,n2) + n1,n2 = self._checkTop(n1,n2) # remove sigma hole virtual particles n1,n2 = self._removeSigmaHoles( n1,n2,self.sigmaHoleID1,self.sigmaHoleID2) @@ -710,7 +718,7 @@ def _mapH( self, nfoo, nbar ): nb1 = a1.GetNeighbors() nb2 = a2.GetNeighbors() - # only allow mapping hydrogens in those cases, + # only allow mapping hydrogens in those cases, # when n1 and n2 have an equal number of them bound nH1 = self._countHydrogens(nb1) nH2 = self._countHydrogens(nb2) @@ -813,18 +821,18 @@ def _checkTop(self, n1, n2 ): # 2) identify problematic mappings # 3) fix the problems: discard the atom with fewer mapped neighbours - ####### 1-2 ######### + ####### 1-2 ######### # 1a) 1-2 lists dict12_mol1 = self._getList12(self.mol1,n1) dict12_mol2 = self._getList12(self.mol2,n2) - # 2a) identify problems 1-2; and + # 2a) identify problems 1-2; and # 3a) fix 1-2 rem12_mol2_start,rem12_mol2_end = self._findProblemsExclusions(n1,n2,dict12_mol1,dict12_mol2,case='1-2') # output: indeces of mol2 n2,n1 = self._fixProblemsExclusions(self.mol2,self.mol1,n2,n1,rem12_mol2_start,rem12_mol2_end) rem12_mol1_start,rem12_mol1_end = self._findProblemsExclusions(n2,n1,dict12_mol2,dict12_mol1,case='1-2') # output: indeces of mol1 n1,n2 = self._fixProblemsExclusions(self.mol1,self.mol2,n1,n2,rem12_mol1_start,rem12_mol1_end) - ####### 1-3 ######### + ####### 1-3 ######### # 1b) 1-3 lists dict13_mol1 = self._getList13(self.mol1,n1) dict13_mol2 = self._getList13(self.mol2,n2) @@ -835,11 +843,11 @@ def _checkTop(self, n1, n2 ): rem13_mol1_start,rem13_mol1_end = self._findProblemsExclusions(n2,n1,dict13_mol2,dict13_mol1,case='1-3') # output: indeces of mol1 n1,n2 = self._fixProblemsExclusions(self.mol1,self.mol2,n1,n2,rem13_mol1_start,rem13_mol1_end) - ####### 1-4 ######### + ####### 1-4 ######### # 1b) 1-4 lists dict14_mol1 = self._getList14(self.mol1,n1) dict14_mol2 = self._getList14(self.mol2,n2) - # 2b) identify problems 1-4 and + # 2b) identify problems 1-4 and # 3b) fix 1-4 rem14_mol2_start,rem14_mol2_end = self._findProblemsExclusions(n1,n2,dict14_mol1,dict14_mol2,case='1-4') # output: indeces of mol2 n2,n1 = self._fixProblemsExclusions(self.mol2,self.mol1,n2,n1,rem14_mol2_start,rem14_mol2_end) @@ -985,7 +993,7 @@ def _fixProblemsExclusions(self,mol1,mol2,n1,n2,startList,endList): a,b = self._getAttr(n1,n2,iEnd,bar) if( (a!=None) and (b!=None) ): endNeighb = endNeighb+1 - elif( iEnd==bar ): # atom of interest + elif( iEnd==bar ): # atom of interest a,b = self._getAttr(n1,n2,iEnd,foo) if( (a!=None) and (b!=None) ): endNeighb = endNeighb+1 @@ -1048,7 +1056,7 @@ def _disconnectedRecursive(self,mol1,mol2,ind1,ind2): matchDict1 = {} matchDict2 = {} for id1,id2 in list(zip(ind1,ind2)): - # find id1 + # find id1 key1 = '' for i in range(0,len(n1_fragments)): if id1 in n1_fragments[i]: @@ -1710,7 +1718,7 @@ def _distance_based(self, mol1, mol2, id1=None, id2=None, calcOnly=False): pairs1.append(keep1) pairs2.append(keep2) return(pairs1,pairs2) - + # mcs for ind1 in id1: c1 = mol1.GetConformer() @@ -1834,7 +1842,7 @@ def _selectOneMCS(self,n1_list,n2_list,mol1,mol2): if bOK==True: n1_largestB.append(nfoo) n2_largestB.append(nbar) - + # of the largest ones select the minRMSD rmsdMin = 9999.999 for nfoo,nbar in list(zip(n1_largestB,n2_largestB)): @@ -1984,8 +1992,8 @@ def _removeH(self, mol1,mol2,nfoo,nbar): # when heavyAtom1 and heavyAtom2 have an equal number of them bound heavyAtom1 = a1.GetNeighbors()[0] heavyAtom2 = a2.GetNeighbors()[0] - nb1 = heavyAtom1.GetNeighbors() - nb2 = heavyAtom2.GetNeighbors() + nb1 = heavyAtom1.GetNeighbors() + nb2 = heavyAtom2.GetNeighbors() nH1 = self._countHydrogens(nb1) nH2 = self._countHydrogens(nb2) if nH1!=nH2: @@ -2061,7 +2069,7 @@ def __init__(self, **kwargs): self.d = 0.05 self.logfile = None self.bDist = True # this variable is always set to True - + for key, val in kwargs.items(): setattr(self,key,val) @@ -2069,7 +2077,7 @@ def __init__(self, **kwargs): def _makeHybridTop( self ): """The main function of this class. Generated hybrid topology - + Parameters ---------- None @@ -2144,7 +2152,7 @@ def _makeHybridTop( self ): ##################################### # make exclusions doLog(self.logfile,"Construct exclusions....") - self.newexclusions = [] + self.newexclusions = [] self.bHasExclusions = False self._make_exclusions() @@ -2818,7 +2826,7 @@ def _sum_charge_of_states( self, itp ): def _atoms_morphe( self, atoms ): for atom in atoms: - if atom.atomtypeB is not None and (atom.q!=atom.qB or atom.m != atom.mB): + if atom.atomtypeB is not None and (atom.q!=atom.qB or atom.m != atom.mB): return(True) return(False) @@ -3021,9 +3029,7 @@ def _write_FF_file(atoms,file): for atype in atoms.keys(): at = atoms[atype] fp.write(' %s %s %s %s %s %s\n' % (at.type,at.sigmaA,at.epsA,at.A,at.sigmaB,at.epsB) ) - + def _merge_FF_files( fnameOut, ffsIn=[] ): atoms = _get_FF_atoms( ffsIn ) _write_FF_file( atoms, fnameOut ) - - diff --git a/src/pmx/model.py b/src/pmx/model.py index dde068ab..16d97df9 100644 --- a/src/pmx/model.py +++ b/src/pmx/model.py @@ -362,8 +362,10 @@ def make_residues(self): r.chain = ch r.chain_id = ch.id - def __readPDB(self, fname=None, pdbline=None): + def __readPDB(self, fname=None, pdbline=None, bCONECT=False): """Reads a PDB file""" + if bCONECT: + self._pdb_conect = [] if pdbline: lines = pdbline.split('\n') else: @@ -374,6 +376,8 @@ def __readPDB(self, fname=None, pdbline=None): self.atoms.append(a) if line[:6] == 'CRYST1': self.box = _p.box_from_cryst1(line) + if bCONECT and line[:6] == "CONECT": + self._pdb_conect.append(line.strip()) self.make_chains() self.make_residues() self.unity = 'A' @@ -385,14 +389,14 @@ def __check_if_gap( self, atC, atN ): if atN.name != 'N': return(False) d = atC - atN - if d > 1.7: # bond + if d > 1.7: # bond return(True) return(False) - def __compareWithoutLastChar(self, str1, str2): + def __compareWithoutLastChar(self, str1, str2): if isinstance(str1,int) and isinstance(str2,int): # e.g. 52, 53 return(False) - + if isinstance(str1,int): # e.g. 52, 52A if str1==int(str2[0:-1]): # 52, 52A return(True) @@ -419,7 +423,7 @@ def _getChainIDs( self, lines ): if( a.chain_id not in usedChainIDs ): usedChainIDs.append( a.chain_id ) return( usedChainIDs ) - + # TODO: make readPDB and readPDBTER a single function. It seems like # readPDBTER is more general PDB reader? def __readPDBTER(self, fname=None, pdbline=None, bNoNewID=True, bPDBGAP=False, bPDBMASS=False): @@ -467,7 +471,7 @@ def __readPDBTER(self, fname=None, pdbline=None, bNoNewID=True, bPDBGAP=False, b bNewChain = True if (prevAtomName == 'HH33') and ((prevResName=='NME') or (prevResName=='NAC') or (prevResName=='CT3')): # NME cap bNewChain = True - # do not assign new chain IDs for waters, ions + # do not assign new chain IDs for waters, ions if (a.resname=='WAT') or (a.resname=='SOL') or (a.resname=='TIP3') or (a.resname=='HOH') \ or (a.resname=='NA') or (a.resname=='CL') \ or (a.resname=='NaJ') or (a.resname=='Na') or (a.resname=='Cl') or (a.resname=='K') or (a.resname=='KJ') \ @@ -632,7 +636,7 @@ def assign_moltype(self): else: self.moltype = 'unknown' - def read(self, filename, bPDBTER=False, bNoNewID=True, bPDBGAP=False, bPDBMASS=False): + def read(self, filename, bPDBTER=False, bNoNewID=True, bPDBGAP=False, bPDBMASS=False, bCONECT=False): """PDB/GRO file reader. Parameters @@ -647,6 +651,8 @@ def read(self, filename, bPDBTER=False, bNoNewID=True, bPDBGAP=False, bPDBMASS=F True. Default is True. bPDBGAP : bool whether search for gaps in the chain to assign new chain IDs. + bCONECT : bool + whether to read CONECT entries from PDB """ ext = filename.split('.')[-1] if ext == 'pdb': @@ -655,7 +661,7 @@ def read(self, filename, bPDBTER=False, bNoNewID=True, bPDBGAP=False, bPDBMASS=F pdbline=None, bNoNewID=bNoNewID, bPDBGAP=bPDBGAP, bPDBMASS=bPDBMASS) else: - return self.__readPDB(fname=filename) + return self.__readPDB(fname=filename, bCONECT=bCONECT) elif ext == 'gro': return self.__readGRO(filename) else: @@ -808,7 +814,7 @@ def fetch_residue(self, idx, chain=None): else: valid_resids.append(r.id) - if chain is not None: + if chain is not None: valid_chresids = [] for r in self.chdic[chain].residues: if isinstance(r.id,str): @@ -854,7 +860,7 @@ def fetch_residue(self, idx, chain=None): else: if r.id.replace(" ","")==idx: return r - + def fetch_residues(self, key, inv=False): """Gets residues using a list of residue names. @@ -989,7 +995,7 @@ def assign_masses_to_model(model, topology=None): topology : Topology Topology object of the same molecule. When no topology provided, standard library masses are used. ''' - + if topology!=None: for ma, ta in zip(model.atoms, topology.atoms): if ma.name != ta.name: diff --git a/src/pmx/scripts/atomMapping.py b/src/pmx/scripts/atomMapping.py index 7a9e567b..8b9f748f 100755 --- a/src/pmx/scripts/atomMapping.py +++ b/src/pmx/scripts/atomMapping.py @@ -182,9 +182,13 @@ def parse_options(): type=int, help='Maximum time (s) for an MCS search (default 10 s).', default=10) + parser.add_argument("--conect", + dest="conect", + help="Read CONECT entries from pdb files (default False)", + action="store_true") parser.set_defaults(alignment=True,mcs=True,H2H=True,H2Hpolar=False,H2Heavy=False, - RingsOnly=False,dMCS=False,swap=False,chirality=True) + RingsOnly=False,dMCS=False,swap=False,chirality=True, conect=False) args, unknown = parser.parse_known_args() check_unknown_cmd(unknown) @@ -219,14 +223,15 @@ def main(args): bRingsOnly = args.RingsOnly d = args.d timeout = args.timeout - bdMCS = args.dMCS + bdMCS = args.dMCS + bCONECT = args.conect ##################################### # log file logfile = open(args.log,'w') ##################################### - # identify the methods to use + # identify the methods to use bAlignment = args.alignment bMCS = args.mcs if bMCS==False and bAlignment==False: @@ -256,10 +261,10 @@ def main(args): doLog(logfile,'Input pdb2 not found. Exiting...',commandName='atomMapping') sys.exit(0) pid = os.getpid() - pdbName1,atomNameID1,sigmaHoleID1 = reformatPDB(args.i1,1,pid) - pdbName2,atomNameID2,sigmaHoleID2 = reformatPDB(args.i2,2,pid) - mol1 = Chem.MolFromPDBFile(pdbName1,removeHs=False,sanitize=False) - mol2 = Chem.MolFromPDBFile(pdbName2,removeHs=False,sanitize=False) + pdbName1,atomNameID1,sigmaHoleID1 = reformatPDB(args.i1,1,pid,bCONECT=bCONECT) + pdbName2,atomNameID2,sigmaHoleID2 = reformatPDB(args.i2,2,pid,bCONECT=bCONECT) + mol1 = Chem.MolFromPDBFile(pdbName1,removeHs=False,sanitize=False,proximityBonding=(not bCONECT)) + mol2 = Chem.MolFromPDBFile(pdbName2,removeHs=False,sanitize=False,proximityBonding=(not bCONECT)) try: Chem.SanitizeMol(mol1) except: @@ -293,7 +298,7 @@ def main(args): bRingsOnly=True else: doLog(logfile,"-RingsOnly flag is unset, because one (or both) molecule has no rings",commandName='atomMapping') - + n1 = [] n2 = [] @@ -358,7 +363,7 @@ def main(args): ligMap.bCarbonize = False ligMap.bRingsOnly = True n1C,n2C = ligMap.mcs() - n1mcs,n2mcs = ligMap._compare_mappings_by_size( mol1,mol2, n1mcs, n2mcs, n1C, n2C ) + n1mcs,n2mcs = ligMap._compare_mappings_by_size( mol1,mol2, n1mcs, n2mcs, n1C, n2C ) doLog(logfile,"MCS: using variant with the mapping size of {0}".format(len(n1mcs)),commandName='atomMapping') @@ -423,7 +428,7 @@ def main(args): # swap TODO - + ######################### # check if( len(n1) != len(n2) ): @@ -481,4 +486,3 @@ def main(args): if __name__ == '__main__': entry_point() -