Paul Pro

Comp 6100 project Report
Submitted by:
Mohammad Bataineh
Benjamin Alikali
Nadia Bourini
1. Find the distribution of phi and psi angles for amino acids in helices and
beta sheets (something similar to Ramachandran plot).
import math
import requests
import os
class PDB:
def __init__(self, pdb):

self.protein = pdb[:4]
self.file = self.protein+'.txt'
#try to read a protein from a local file
if (os.path.isfile(self.file)):
self.pdb = open(self.file, 'r') #opens pdb file 3UTS.text
self.pdb.close()
#if the protein file doesnt exist, we fetch it from online
else:
url = 'https://files.rcsb.org/view/' + self.protein + '.pdb'
myfile = requests.get(str(url).rstrip()) #copy the information of the url webpage
open(self.file, 'a').write(str(myfile.content).replace("\\n","\n")) #dump the information into
<protein name>.txt
self.chain = pdb[4:].strip() #read the last character of the input which is the chain
def get_seq_numbers(self):
self.pdb = open(self.file, 'r')
lines = self.pdb.readlines()
self.pdb.close()
seqNums = []
# Show the number of atoms in a given aa (aa sequence number should be provided)
for line in lines:
if (line[:4] == 'ATOM') and (line[21:22] == self.chain):
if line[23:26].strip() not in seqNums:
seqNums.append(line[23:26].strip())
return seqNums
def Q1(self, aa):

aaNumber, angletype = aa.split('.')
Cphi = []
N = []
CA = []
C = []
Npsi = []
self.pdb = open(self.file, 'r')
lines = self.pdb.readlines()
self.pdb.close()
for line in lines:
if( line[:4] == 'ATOM' and line[21:22] == self.chain and line[12:16].strip() == 'C' and
int(line[23:26].strip()) == int(aaNumber)-1):
Cphi.append(float(line[30:38].strip()))
if( line[:4] == 'ATOM' and line[21:22] == self.chain and line[12:16].strip() == 'N' and
line[23:26].strip() == aaNumber):
N.append(float(line[30:38].strip())) #append x
N.append(float(line[38:46].strip())) #append y
N.append(float(line[46:54].strip())) #append z
if( line[:4] == 'ATOM' and line[21:22] == self.chain and line[12:16].strip() == 'CA' and
CA.append(float(line[30:38].strip())) #append x
CA.append(float(line[38:46].strip())) #append y
CA.append(float(line[46:54].strip())) #append z
if( line[:4] == 'ATOM' and line[21:22] == self.chain and line[12:16].strip() == 'C' and
C.append(float(line[30:38].strip())) #append x
C.append(float(line[38:46].strip())) #append y
C.append(float(line[46:54].strip())) #append z
if( line[:4] == 'ATOM' and line[21:22] == self.chain and line[12:16].strip() == 'N' and
int(line[23:26].strip()) == int(aaNumber)+1):
Npsi.append(float(line[30:38].strip()))
try:
if angletype == 'phi':
# A1 = y1 ( z2 - z3 ) + y2 ( z3 - z1 ) + y3 ( z1 - z2 )
A1 = Cphi[1]*(N[2]-CA[2]) + N[1]*( CA[2]-Cphi[2] ) + CA[1]*(Cphi[2]- N[2])
# B1 = z1 ( x2 - x3 ) + z2 ( x3 - x1 ) + z3 ( x1 - x2 )
B1 = Cphi[2]*(N[0]-CA[0]) + N[2]*( CA[0]-Cphi[0] ) + CA[2]*(Cphi[0]- N[0])
# C1= x1 ( y2 - y3 ) + x2 ( y3 - y1 ) + x3 ( y1 - y2 )
C1 = Cphi[0]*(N[1]-CA[1]) + N[0]*( CA[1]-Cphi[1] ) + CA[0]*(Cphi[1]- N[1])
# ////////////////////////////
# A1 = y2 ( z3 - z4 ) + y3 ( z4 - z2 ) + y4 ( z2 - z3 )
A2 = ( N[1]*(CA[2]-C[2]) ) + (CA[1]*( C[2]-N[2] )) + (C[1]*(N[2]- CA[2]))
# B2 = z2 ( x3 - x4 ) + z3 ( x4 - x2 ) + z4 ( x2 - x3 )
B2 = (N[2]*(CA[0]-C[0])) + (CA[2]*( C[0]-N[0] )) + (C[2]*(N[0]- CA[0]))
# C2 = x2 ( y3 - y4 ) + x3 ( y4 - y2 ) + x4 ( y2 - y3 )
C2 = (N[0]*(CA[1]-C[1])) + (CA[0]*( C[1]-N[1] )) + (C[0]*(N[1]- CA[1]))
# //////////////////////////
angle = (A1*A2 + B1*B2 + C1*C2)/ (math.sqrt(A1*A1 + B1*B1 + C1*C1) *
math.sqrt(A2*A2 + B2*B2 + C2*C2))
# ////////////////// vNormal = the cross product of v1 and v2
V1 = [Cphi[0]-N[0], Cphi[1]-N[1], Cphi[2]-N[2]]
V2 = [CA[0]-N[0], CA[1]-N[1], CA[2]-N[2]]
V3 = [C[0]-CA[0], C[1]-CA[1], C[2]-CA[2]]
vNormal = []
vNormal.append( (V1[1]*V2[2] - V1[2]*V2[1]) )
vNormal.append( -(V1[0]*V2[2] - V1[2]*V2[0]) )
dotProduct = (V3[0]*vNormal[0] + V3[1]*vNormal[1] + V3[2]*vNormal[2])
# //////////////// final angle /// (acos(angle)* 180)/PI
finalangle = round(((math.acos(angle)*180) / math.pi),4)
if(dotProduct > 0):
return -1*finalangle
else:
return finalangle
if angletype == 'psi':
# A1 = y1 ( z2 - z3 ) + y2 ( z3 - z1 ) + y3 ( z1 - z2 )
A1 = N[1]*(CA[2]-C[2]) + CA[1]*( C[2]-N[2] ) + C[1]*(N[2]- CA[2])
# B1 = z1 ( x2 - x3 ) + z2 ( x3 - x1 ) + z3 ( x1 - x2 )
B1 = N[2]*(CA[0]-C[0]) + CA[2]*( C[0]-N[0] ) + C[2]*(N[0]- CA[0])
# C1= x1 ( y2 - y3 ) + x2 ( y3 - y1 ) + x3 ( y1 - y2 )
C1 = N[0]*(CA[1]-C[1]) + CA[0]*( C[1]-N[1] ) + C[0]*(N[1]- CA[1])
# ////////////////////////////
# A2 = y2 ( z3 - z4 ) + y3 ( z4 - z2 ) + y4 ( z2 - z3 )
A2 = (CA[1]*(C[2]-Npsi[2])) + (C[1]*( Npsi[2]-CA[2] )) + (Npsi[1]*(CA[2]- C[2]))
# B2 = z2 ( x3 - x4 ) + z3 ( x4 - x2 ) + z4 ( x2 - x3 )
B2 = (CA[2]*(C[0]-Npsi[0])) + (C[2]*( Npsi[0]-CA[0] )) + (Npsi[2]*(CA[0]- C[0]))
# C2 = x2 ( y3 - y4 ) + x3 ( y4 - y2 ) + x4 ( y2 - y3 )
C2 = (CA[0]*(C[1]-Npsi[1])) + (C[0]*( Npsi[1]-CA[1] )) + (Npsi[0]*(CA[1]- C[1]))
# //////////////////////////
angle = (A1*A2 + B1*B2 + C1*C2)/ (math.sqrt(A1*A1 + B1*B1 + C1*C1) *
math.sqrt(A2*A2 + B2*B2 + C2*C2))
# ////////////////// vNormal = the cross product of v1 and v2
V1 = [CA[0]-N[0], CA[1]-N[1], CA[2]-N[2]]
V2 = [C[0]-CA[0], C[1]-CA[1], C[2]-CA[2]]
V3 = [Npsi[0]-C[0], Npsi[1]-C[1], Npsi[2]-C[2]]
vNormal = []
vNormal.append( -(V1[0]*V2[2] - V1[2]*V2[0]) )
dotProduct = (V3[0]*vNormal[0] + V3[1]*vNormal[1] + V3[2]*vNormal[2])
# //////////////// final angle /// (acos(angle)* 180)/PI
finalangle = round(((math.acos(angle)*180) / math.pi),4)
if(dotProduct < 0):
return -1*finalangle
else:
return finalangle
except:
print("Give amino acid or chain doesn't exist")
PDBf = PDB('3UTSA')
print(PDBf.get_seq_numbers())
seqNums = PDBf.get_seq_numbers()
phis = []
psis = []
for num in seqNums:
phiAngle = PDBf.Q1(num+".phi")
psiAngle = PDBf.Q1(num+".psi")
if phiAngle is not None:

print ("phiAngle = ", phiAngle)
print ("psiAngle = ", psiAngle)
phis.append(phiAngle)
psis.append(psiAngle)
print (phis)
print (psis)
from pandas import DataFrame

import matplotlib.pyplot as plt
plt.scatter(phis, psis)
plt.show()
2. Find the distribution of phi and psi for each individual amino acid.
Highlight the distribution when the amino acid is within a helix or a sheet.
Paul did not do anything.

3. Analyze the length of helices (in terms of number of AAs). Is there any
relation between the type of a helix (alpha, 3-10,…,etc.) and its length?
his code is used to download the desire files from pdb bank and keep it on our local
machine due to the big size of that database (1.7 G), this code should be run only one
time.
import urllib.request
import requests
file = open("cullpdb_pc20_res1.6_R0.25_d200326_chains3655.gz", 'r')

next(file)
protein = file.readlines()
file.close()
i=1
for line in protein:
File='https://files.rcsb.org/view/'+ line[0:4].upper() + '.pdb'
urllib.request.urlretrieve(File, line[0:4]+'.pdb')
print(i)
i=i+1
This code is used to analyze and calculate different helix types
file = open("cullpdb_pc20_res1.6_R0.25_d200326_chains3655.gz", 'r')
next(file)
protein = file.readlines()
file.close()
bank=[]
chain=[]
count=[0,0,0,0,0,0,0,0,0,0,0]
sum=[0,0,0,0,0,0,0,0,0,0,0]
for line in protein:
bank.append(line[0:4]+'.pdb')
chain.append(line[4:7].strip())
for i in range(len(bank)):
file = open(bank[i], 'r')
pdb = file.readlines()
file.close()
for line in pdb:
if line[0:5] == "HELIX":
if line[19:20] and line[31:32] == chain[i]:
if line[39:40].strip() == '1':
count[int(line[39:40].strip())]=count[int(line[39:40].strip())] + 1
sum[int(line[39:40].strip())]= sum[int(line[39:40].strip())] +
int(line[72:76].strip())
elif line[39:40].strip() == '2':
elif line[39:40].strip() == '10':
for i in range(len(count)):
if count[i]!=0:
if i==1:
Type= 'Right-handed alpha (default)'
elif i==2:
Type= 'Right-handed omega'
elif i==3:
Type= 'Right-handed pi'
elif i==4:
Type= 'Right-handed gamma'
elif i==5:
Type= 'Right-handed 3 - 10'
elif i==6:
Type= 'Left-handed alpha'
elif i==7:
Type= 'Left-handed omega'
elif i==8:
Type= 'Left-handed gamma'
elif i==9:
Type= '2 - 7 ribbon/helix'
elif i==10:
Type= 'Polyproline'
print("We found", count[i], "helix of",Type,'that has class number',i,
'\nand the average length for this type was:', sum[i]/count[i],'\n')
print('\n\nNo other helix types have been found')
Results sample
4. Find the distribution of the distance between any two consecutive Ca atoms.
import numpy as np
import pandas as pd
file = open('3UTS.pdb', 'r')
pdb = file.readlines()
file.close()
def cord():
chain = input('please input your chain:').upper()
xyz_list = []
final_dist =[]
for line in pdb:
if line[:4] == "ATOM":
if line[13:15] == "CA":
if line[21:22] == chain:
x1, y1, z1 = line[32:54].split()
x = float(x1)
y = float (y1)
z = float (z1)
xyz_list.append([x,y,z])
print(len(xyz_list))
for i in range(len(xyz_list)-1):
j=i+1
p1 = np.array([xyz_list[i][0], xyz_list[i][1], xyz_list[i][2]])
p2 = np.array([xyz_list[j][0], xyz_list[j][1], xyz_list[j][2]])
p1p2_min = p1 - p2
distance = np.sum((p1p2_min) ** 2, axis=0)
dist = np.sqrt(distance)
final_dist.append(dist)
df = pd.DataFrame(final_dist, columns=['distance'])
print(df)
cord()

Paul Pro

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Paul Pro

Uploaded by

Copyright:

Available Formats

Comp 6100 project Report

def init(self, pdb):

def Q1(self, aa):

for num in seqNums:

if phiAngle is not None:

from pandas import DataFrame

Paul did not do anything.

file = open("cullpdb_pc20_res1.6_R0.25_d200326_chains3655.gz", 'r')

chain = input('please input your chain:').upper()

You might also like