-
Notifications
You must be signed in to change notification settings - Fork 0
/
PDB.py
158 lines (139 loc) · 4.98 KB
/
PDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import amino_dict
import copy
from Bio import PDB
from Bio.PDB import PDBList, PDBParser
from collections import Counter
from itertools import groupby
from Bio.PDB import PDBIO, PDBParser
from Bio.PDB.PDBIO import Select
class PDB(object):
def __init__(self, pdbID, isobsolete, pdir, file_format, is_overwrite, ion_abbr):
self.pdbID = pdbID
self.isobsolete = isobsolete
self.pdir = pdir
self.file_format = file_format
self.is_overwrite = is_overwrite
self.ion_abbr = ion_abbr
def GetPDB(self):
""" Download the pdb file from the server.
Format will be pdb1crn.ent"""
pdbl = PDBList()
pdbl.retrieve_pdb_file(self.pdbID, self.isobsolete, self.pdir,
self.file_format, self.is_overwrite)
def PDBParser(self):
""" Returns a structure object for a given pdb id."""
parser = PDBParser()
structure = parser.get_structure(self.pdbID, 'pdb' + self.pdbID + '.ent')
return structure
def GetChainNames(self, ion_abbr):
""" Returns names of chains that contain the required HETATM as a list."""
Fh = open(self.pdir + 'pdb' + self.pdbID + '.ent', 'r')
lines = [i.strip() for i in Fh.readlines()]
chain = []
for item in lines:
if item.startswith('HETATM') and ion_abbr in item[12:15]:
chain.append(item[21])
chain = set(chain)
return sorted(list(chain))
def GetUniqueChains (self, pdir, pdbID, chains_to_check):
""" Returns a List Unique Chains based on the C-alpha atom information.
Structure based, not sequence based """
e = 'pdb' + self.pdbID + '.ent'
BioParser = PDBParser(PERMISSIVE=True, QUIET = True)
BioStructure = BioParser.get_structure (self.pdbID, pdir + 'pdb' + self.pdbID + '.ent')
BioModel = BioStructure[0]
Chain_AtomSeq = []
listMatches = []
for item in chains_to_check:
pdbid_chain = e[3:7] + '_' + item
BioChain = BioModel[item]
residues = []
for residue in BioChain:
for atom in residue:
if atom.name == 'CA':
aa1 = amino_dict.replace_all (residue.resname, amino_dict.one_letter)
residues.append(aa1)
req_res = [x for x in residues if x in amino_dict.amino]
atom = "".join(req_res)
Chain_AtomSeq.append((pdbid_chain, atom))
Chain_Dict = {}
for k,v in Chain_AtomSeq:
Chain_Dict.setdefault(k, v)
# print (Chain_Dict)
allChains = [i for i in Chain_Dict.values()]
set_allChains = list(set(allChains))
# print (set_allChains)
groups = {}
for k, v in Chain_Dict.items():
groups.setdefault(v, []).append(k)
matches = {
k: v
for k, v in groups.items()
}
list_of_matches = [i for i in matches.values()]
# print (list_of_matches)
listMatches.append(list_of_matches)
req_matches = [i[0] for i in matches.values()]
return sorted(req_matches), sorted(list_of_matches)
def DescribePDB (self, pdbID):
""" Presents a detailed description of a given PDB file. """
entry = 'pdb' + self.pdbID + '.ent'
BioParser = PDBParser(PERMISSIVE=True, QUIET = True)
BioStructure = BioParser.get_structure (entry[3:7], entry)
for model in BioStructure.get_models():
print("model", model, "has {} chains".format(len(model)))
for chain in model:
print(" - chain ", chain, "has {} residues".format(len(chain)))
for residue in chain:
print ("- residue", residue.get_resname(), "has {} atoms".format(len(residue)))
for atom in residue:
x,y,z = atom.get_coord()
print("- atom:", atom.get_name(), "x: {} y:{} z:{}".format(x,y,z))
def ChainExtractor(self, pdbID, pdir, reqChain_id, outLoc):
""" Extracts a specific chain from a given PDB file. """
io = PDBIO()
BioParser = PDBParser(PERMISSIVE=True, QUIET = True)
BioStructure = BioParser.get_structure(pdbID, self.pdir + 'pdb' + pdbID + '.ent')
BioStructure = BioStructure[0]
for chain in BioStructure.get_chains():
if chain.id == reqChain_id:
io.set_structure(chain)
io.save(outLoc)
def MultiHETATM (pdbID, chainID, pdir, outLoc, abbr):
""" Creates unique PDB files for the specified HETATM based on the
number of HETATM entries in the specific chain.
Specific to the Ion Environments project. """
datafile = open (pdir, 'r')
ID = [ p.strip() for p in datafile.readlines() ]
req_chain_id = 'A' # Converts all non A chains to A for AAAX.
ATOM = []
NEW_ATOM = []
HETATM = []
for item in ID:
if item.startswith ('ATOM'):
ATOM.append (item)
elif item.startswith('HETATM') and abbr in item[12:15]:
HETATM.append(item)
for item in ATOM:
if item[21] != req_chain_id:
new_item = item[0:21] + req_chain_id + item[22:]
NEW_ATOM.append(new_item)
else:
NEW_ATOM.append(item)
NEW_HET = []
for i in HETATM:
new_i = ('ATOM '+ i[7:11] + ' CA GLY ' + 'B' + ' 1 ' + i[32:])
NEW_HET.append (new_i)
for index, val in enumerate (NEW_HET):
index = index + 1
newfname = pdbID[0:6] + '_' + str(index) + '.pdb'
current = copy.deepcopy (NEW_ATOM)
current.append ('TER')
current.append (val)
current.append ('TER')
current.append('END')
with open (outLoc + newfname, 'w') as FH:
for i in current:
FH.write (i + '\n')
FH.close