Professional Documents
Culture Documents
PDF 5
PDF 5
import pandas as pd
import numpy as np
#import cairosvg
import pickle
import os.path as op
import math
import editdistance
def readNPModel(filename='publicnp.model.gz'):
fscore = pickle.load(gzip.open(filename))
sys.stderr.write("model in\n")
return fscore
def tanimoto_1d(fps):
ds = []
ds.extend(DataStructs.BulkTanimotoSimilarity(
fps[i], fps[:i], returnDistance=True))
return ds
afile.write(svg)
a_str = svg.encode('utf-8')
return
df = pd.DataFrame({'smiles': smiles})
if labels is None:
svg = Draw.MolsToGridImage(
return
def mol_diversity(smiles):
df = pd.DataFrame({'smiles': smiles})
fps = [Chem.GetMorganFingerprintAsBitVect(
dist_1d = tanimoto_1d(fps)
mean_dist = np.mean(dist_1d)
return mean_dist
return norm_dist
def SA_scores(smiles):
fscores = readFragmentScores(name='fpscores')
df = pd.DataFrame({'smiles': smiles})
return scores
def readFragmentScores(name='fpscores'):
import gzip
global _fscores
if name == "fpscores":
outDict = {}
for i in _fscores:
outDict[i[j]] = float(i[0])
_fscores = outDict
nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
def SA_score(m,fscores=None):
if fscores is None:
readFragmentScores()
# fragment score
fp = rdMolDescriptors.GetMorganFingerprint(m,
fps = fp.GetNonzeroElements()
score1 = 0.
nf = 0
sfp = bitId
score1 /= nf
# features score
nAtoms = m.GetNumAtoms()
ri = m.GetRingInfo()
nMacrocycles = 0
for x in ri.AtomRings():
if len(x) > 8:
nMacrocycles += 1
stereoPenalty = math.log10(nChiralCenters + 1)
spiroPenalty = math.log10(nSpiro + 1)
bridgePenalty = math.log10(nBridgeheads + 1)
macrocyclePenalty = 0.
# ---------------------------------------
# macrocyclePenalty = math.log10(nMacrocycles+1)
# This form generates better results when 2 or more macrocycles are present
if nMacrocycles > 0:
macrocyclePenalty = math.log10(2)
score3 = 0.
min = -4.0
max = 2.5
sascore = 10.0
sascore = 1.0
return sascore
def NP_scores(smiles):
fscore =readNPModel()
df = pd.DataFrame({'smiles': smiles})
return scores
if fscore is None:
fscore =readNPModel()
if mol is None:
fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
bits = fp.GetNonzeroElements()
score = 0.
score += fscore.get(bit, 0)
score /= float(mol.GetNumAtoms())
return score
assert(len(s1) == len(s2))
if not data:
return 0
entropy = 0
for x in characters:
p_x = float(data.count(x))/len(data)
if p_x > 0:
entropy += - p_x*math.log(p_x, 2)
return entropy
strings = []
for line in sys.stdin:
strings.append(line)
return s