Professional Documents
Culture Documents
Paul Pro
Paul Pro
Submitted by:
Mohammad Bataineh
Benjamin Alikali
Nadia Bourini
1. Find the distribution of phi and psi angles for amino acids in helices and
beta sheets (something similar to Ramachandran plot).
import math
import requests
import os
class PDB:
def get_seq_numbers(self):
self.pdb = open(self.file, 'r')
lines = self.pdb.readlines()
self.pdb.close()
seqNums = []
# Show the number of atoms in a given aa (aa sequence number should be provided)
for line in lines:
if (line[:4] == 'ATOM') and (line[21:22] == self.chain):
if line[23:26].strip() not in seqNums:
seqNums.append(line[23:26].strip())
return seqNums
if angletype == 'psi':
# A1 = y1 ( z2 - z3 ) + y2 ( z3 - z1 ) + y3 ( z1 - z2 )
A1 = N[1]*(CA[2]-C[2]) + CA[1]*( C[2]-N[2] ) + C[1]*(N[2]- CA[2])
# B1 = z1 ( x2 - x3 ) + z2 ( x3 - x1 ) + z3 ( x1 - x2 )
B1 = N[2]*(CA[0]-C[0]) + CA[2]*( C[0]-N[0] ) + C[2]*(N[0]- CA[0])
# C1= x1 ( y2 - y3 ) + x2 ( y3 - y1 ) + x3 ( y1 - y2 )
C1 = N[0]*(CA[1]-C[1]) + CA[0]*( C[1]-N[1] ) + C[0]*(N[1]- CA[1])
# ////////////////////////////
# A2 = y2 ( z3 - z4 ) + y3 ( z4 - z2 ) + y4 ( z2 - z3 )
A2 = (CA[1]*(C[2]-Npsi[2])) + (C[1]*( Npsi[2]-CA[2] )) + (Npsi[1]*(CA[2]- C[2]))
# B2 = z2 ( x3 - x4 ) + z3 ( x4 - x2 ) + z4 ( x2 - x3 )
B2 = (CA[2]*(C[0]-Npsi[0])) + (C[2]*( Npsi[0]-CA[0] )) + (Npsi[2]*(CA[0]- C[0]))
# C2 = x2 ( y3 - y4 ) + x3 ( y4 - y2 ) + x4 ( y2 - y3 )
C2 = (CA[0]*(C[1]-Npsi[1])) + (C[0]*( Npsi[1]-CA[1] )) + (Npsi[0]*(CA[1]- C[1]))
# //////////////////////////
angle = (A1*A2 + B1*B2 + C1*C2)/ (math.sqrt(A1*A1 + B1*B1 + C1*C1) *
math.sqrt(A2*A2 + B2*B2 + C2*C2))
# ////////////////// vNormal = the cross product of v1 and v2
V1 = [CA[0]-N[0], CA[1]-N[1], CA[2]-N[2]]
V2 = [C[0]-CA[0], C[1]-CA[1], C[2]-CA[2]]
V3 = [Npsi[0]-C[0], Npsi[1]-C[1], Npsi[2]-C[2]]
vNormal = []
vNormal.append( (V1[1]*V2[2] - V1[2]*V2[1]) )
vNormal.append( -(V1[0]*V2[2] - V1[2]*V2[0]) )
vNormal.append( (V1[0]*V2[1] - V1[1]*V2[0]) )
dotProduct = (V3[0]*vNormal[0] + V3[1]*vNormal[1] + V3[2]*vNormal[2])
# //////////////// final angle /// (acos(angle)* 180)/PI
finalangle = round(((math.acos(angle)*180) / math.pi),4)
if(dotProduct < 0):
return -1*finalangle
else:
return finalangle
except:
print("Give amino acid or chain doesn't exist")
PDBf = PDB('3UTSA')
print(PDBf.get_seq_numbers())
seqNums = PDBf.get_seq_numbers()
phis = []
psis = []
phiAngle = PDBf.Q1(num+".phi")
psiAngle = PDBf.Q1(num+".psi")
print (phis)
print (psis)
plt.scatter(phis, psis)
plt.show()
2. Find the distribution of phi and psi for each individual amino acid.
Highlight the distribution when the amino acid is within a helix or a sheet.
his code is used to download the desire files from pdb bank and keep it on our local
machine due to the big size of that database (1.7 G), this code should be run only one
time.
import urllib.request
import requests
Results sample
4. Find the distribution of the distance between any two consecutive Ca atoms.
import numpy as np
import pandas as pd
file = open('3UTS.pdb', 'r')
pdb = file.readlines()
file.close()
def cord():
xyz_list = []
final_dist =[]
for line in pdb:
if line[:4] == "ATOM":
if line[13:15] == "CA":
if line[21:22] == chain:
x1, y1, z1 = line[32:54].split()
x = float(x1)
y = float (y1)
z = float (z1)
xyz_list.append([x,y,z])
print(len(xyz_list))
for i in range(len(xyz_list)-1):
j=i+1
p1 = np.array([xyz_list[i][0], xyz_list[i][1], xyz_list[i][2]])
p2 = np.array([xyz_list[j][0], xyz_list[j][1], xyz_list[j][2]])
p1p2_min = p1 - p2
distance = np.sum((p1p2_min) ** 2, axis=0)
dist = np.sqrt(distance)
final_dist.append(dist)
df = pd.DataFrame(final_dist, columns=['distance'])
print(df)
cord()