In [1]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

Подготовка белка

In [3]:
pdb=pmx.Model('../hw6/model.B99990001.pdb')
In [8]:
for r in pdb.residues[-15:]:
    print r #посмотрим остатки
<Molecule: id = 130 name = CYS chain_id =    natoms = 6>
<Molecule: id = 131 name = GLN chain_id =    natoms = 9>
<Molecule: id = 132 name = ASN chain_id =    natoms = 8>
<Molecule: id = 133 name = GLN chain_id =    natoms = 9>
<Molecule: id = 134 name = ASP chain_id =    natoms = 8>
<Molecule: id = 135 name = LEU chain_id =    natoms = 8>
<Molecule: id = 136 name = ARG chain_id =    natoms = 11>
<Molecule: id = 137 name = SER chain_id =    natoms = 6>
<Molecule: id = 138 name = TYR chain_id =    natoms = 12>
<Molecule: id = 139 name = VAL chain_id =    natoms = 7>
<Molecule: id = 140 name = ALA chain_id =    natoms = 5>
<Molecule: id = 141 name = GLY chain_id =    natoms = 4>
<Molecule: id = 142 name = CYS chain_id =    natoms = 6>
<Molecule: id = 143 name = GLY chain_id =    natoms = 4>
<Molecule: id = 144 name = VAL chain_id =    natoms = 8>
In [26]:
# создание объектов белок и лиганда
newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
    newpdb.remove_residue(r)

newpdb.writePDB('myprot.pdb')
    
lig = pdb.copy()
del lig.residues[:-3]
Gap between residues  <Molecule: id = 141 name = GLY chain_id =    natoms = 4> < - > <Molecule: id = 142 name = GLY chain_id =    natoms = 4> dist =  3.24546653041
Gap between residues  <Molecule: id = 141 name = GLY chain_id =    natoms = 4> < - > <Molecule: id = 142 name = VAL chain_id =    natoms = 8> dist =  5.70041761628

Геометрический центр лиганда

In [27]:
lig_center = sum(map(lambda a: np.array(a.x), lig.atoms)) / len(lig.atoms)
lig_center
Out[27]:
array([ 38.02104632,  35.54515713,  21.11711262])

Подготовка белка для докинга

In [28]:
prot = oddt.toolkit.readfile('pdb','myprot.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()

print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt 
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 15455.44908

NAG содержит в себе СH3C(=O)NH группу. Создайте лиганды где метильный радикал этой группы будет заменён на :

OH

NH3+

H

Ph

COO-

Для каждого из этих лигандов проведется докинг, будут представлены результаты в виде таблицы от лучшего заместителя к худшему

In [32]:
NAG_smiles = 'CC(=O)NC1C(C(C(OC1O)CO)O)O'
In [33]:
smiles = ['OC(=O)NC1C(C(C(OC1O)CO)O)O',               #OH
          '[NH3+]C(=O)NC1C(C(C(OC1O)CO)O)O',          #NH3+
          'C(=O)NC1C(C(C(OC1O)CO)O)O',                #H
          'C1=CC=[C]C=C1C(=O)NC1C(C(C(OC1O)CO)O)O',   #PH
          '[O-]C(=O)C(=O)NC1C(C(C(OC1O)CO)O)O']       #COO  
mols= []
images =[]

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    images.append((SVG(copy.deepcopy(m).write('svg'))))

display_svg(*images)
***** - Open Babel Depiction O H H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H H H H O O O N O O N O *****
***** - Open Babel Depiction O H H H H H O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O O N O HO *****
In [35]:
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=lig_center.tolist(),
    executable='/usr/bin/vina',autocleanup=True, num_modes=9)

print dock_obj.tmp_dir
print " ".join(dock_obj.params) # Опишите выдачу
/tmp/autodock_vina_76XG3H
--center_x 38.0210463215 --center_y 35.5451571299 --center_z 21.1171126249 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
In [37]:
?dock_obj

protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors.

auto_ligand: oddt.toolkit.Molecule object or string (default=None)
    Ligand use to center the docking box. Either ODDT molecule or
    a file (opened based on extesion and read to ODDT molecule).
    Box is centered on geometric center of molecule.

size: tuple, shape=[3] (default=(20, 20, 20))
    Dimentions of docking box (in Angstroms)

center: tuple, shape=[3] (default=(0,0,0))
    The center of docking box in cartesian space.

exhaustiveness: int (default=8)
    Exhaustiveness parameter of Autodock Vina

num_modes: int (default=9)
    Number of conformations generated by Autodock Vina. The maximum
    number of docked poses is 9 (due to Autodock Vina limitation).

energy_range: int (default=3)
    Energy range cutoff for Autodock Vina

seed: int or None (default=None)
    Random seed for Autodock Vina

prefix_dir: string (default=/tmp)
    Temporary directory for Autodock Vina files

executable: string or None (default=None)
    Autodock Vina executable location in the system.
    It's realy necessary if autodetection fails.

autocleanup: bool (default=True)
    Should the docking engine clean up after execution?

skip_bad_mols: bool (default=True)
    Should molecules that crash Autodock Vina be skipped.

Результаты

In [38]:
res = dock_obj.dock(mols,prot)
In [40]:
for i,r in enumerate(res):
    print "%4d%10s%8s%8s%8s" %(i,r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'], r.residues[0].name)
   0  C7H13NO7    -6.0   0.000     UNL
   1  C7H13NO7    -5.3   3.082     UNL
   2  C7H13NO7    -5.3   5.960     UNL
   3  C7H13NO7    -5.3   5.060     UNL
   4  C7H13NO7    -5.2   5.044     UNL
   5  C7H13NO7    -5.1   4.951     UNL
   6  C7H13NO7    -4.9   2.979     UNL
   7  C7H13NO7    -4.8   5.507     UNL
   8  C7H13NO7    -4.7   2.065     UNL
   9 C7H16N2O6    -6.0   0.000     UNL
  10 C7H16N2O6    -5.8   4.647     UNL
  11 C7H16N2O6    -5.7   3.163     UNL
  12 C7H16N2O6    -5.5   4.332     UNL
  13 C7H16N2O6    -5.4   5.186     UNL
  14 C7H16N2O6    -5.4   3.846     UNL
  15 C7H16N2O6    -5.3   3.403     UNL
  16 C7H16N2O6    -5.1   4.756     UNL
  17 C7H16N2O6    -5.1   3.791     UNL
  18  C7H13NO6    -6.0   0.000     UNL
  19  C7H13NO6    -5.6   3.310     UNL
  20  C7H13NO6    -5.4   4.413     UNL
  21  C7H13NO6    -5.1   3.514     UNL
  22  C7H13NO6    -5.0   4.222     UNL
  23  C7H13NO6    -4.9   4.134     UNL
  24  C7H13NO6    -4.9   4.916     UNL
  25  C7H13NO6    -4.8   4.536     UNL
  26  C7H13NO6    -4.8   2.549     UNL
  27 C13H17NO6    -6.7   0.000     UNL
  28 C13H17NO6    -6.7   2.651     UNL
  29 C13H17NO6    -6.4   6.928     UNL
  30 C13H17NO6    -6.1   7.190     UNL
  31 C13H17NO6    -5.9   2.906     UNL
  32 C13H17NO6    -5.7   6.935     UNL
  33 C13H17NO6    -4.9   6.847     UNL
  34 C13H17NO6    -4.5   8.234     UNL
  35 C13H17NO6    -4.4  15.212     UNL
  36  C8H13NO8    -6.5   0.000     UNL
  37  C8H13NO8    -5.9   1.587     UNL
  38  C8H13NO8    -5.5   5.941     UNL
  39  C8H13NO8    -5.3   6.287     UNL
  40  C8H13NO8    -5.2   5.790     UNL
  41  C8H13NO8    -5.0   4.119     UNL
  42  C8H13NO8    -5.0   1.738     UNL
  43  C8H13NO8    -5.0   2.802     UNL
  44  C8H13NO8    -4.9   5.437     UNL
In [42]:
import pandas as pd
In [55]:
data = pd.DataFrame(np.array([
                      [i.formula for i in res],\
                      [i.data['vina_affinity'] for i in res],\
                      [i.data['vina_rmsd_ub'] for i in res],\
                      [i.residues[0].name for i in res],\
                    ['OH']*9+['NH3+']*9+['H']*9+['Ph']*9+['COO']*9
                      ]).T,columns = [['Formula','Affinity','RSMD','Name','Radical']])
In [56]:
data.sort_values(by='Affinity')
Out[56]:
Formula Affinity RSMD Name Radical
35 C13H17NO6 -4.4 15.212 UNL Ph
34 C13H17NO6 -4.5 8.234 UNL Ph
8 C7H13NO7 -4.7 2.065 UNL OH
7 C7H13NO7 -4.8 5.507 UNL OH
26 C7H13NO6 -4.8 2.549 UNL H
25 C7H13NO6 -4.8 4.536 UNL H
44 C8H13NO8 -4.9 5.437 UNL COO
33 C13H17NO6 -4.9 6.847 UNL Ph
6 C7H13NO7 -4.9 2.979 UNL OH
24 C7H13NO6 -4.9 4.916 UNL H
23 C7H13NO6 -4.9 4.134 UNL H
42 C8H13NO8 -5.0 1.738 UNL COO
41 C8H13NO8 -5.0 4.119 UNL COO
43 C8H13NO8 -5.0 2.802 UNL COO
22 C7H13NO6 -5.0 4.222 UNL H
5 C7H13NO7 -5.1 4.951 UNL OH
16 C7H16N2O6 -5.1 4.756 UNL NH3+
17 C7H16N2O6 -5.1 3.791 UNL NH3+
21 C7H13NO6 -5.1 3.514 UNL H
4 C7H13NO7 -5.2 5.044 UNL OH
40 C8H13NO8 -5.2 5.790 UNL COO
39 C8H13NO8 -5.3 6.287 UNL COO
2 C7H13NO7 -5.3 5.960 UNL OH
3 C7H13NO7 -5.3 5.060 UNL OH
15 C7H16N2O6 -5.3 3.403 UNL NH3+
1 C7H13NO7 -5.3 3.082 UNL OH
13 C7H16N2O6 -5.4 5.186 UNL NH3+
20 C7H13NO6 -5.4 4.413 UNL H
14 C7H16N2O6 -5.4 3.846 UNL NH3+
12 C7H16N2O6 -5.5 4.332 UNL NH3+
38 C8H13NO8 -5.5 5.941 UNL COO
19 C7H13NO6 -5.6 3.310 UNL H
32 C13H17NO6 -5.7 6.935 UNL Ph
11 C7H16N2O6 -5.7 3.163 UNL NH3+
10 C7H16N2O6 -5.8 4.647 UNL NH3+
31 C13H17NO6 -5.9 2.906 UNL Ph
37 C8H13NO8 -5.9 1.587 UNL COO
0 C7H13NO7 -6.0 0.000 UNL OH
9 C7H16N2O6 -6.0 0.000 UNL NH3+
18 C7H13NO6 -6.0 0.000 UNL H
30 C13H17NO6 -6.1 7.190 UNL Ph
29 C13H17NO6 -6.4 6.928 UNL Ph
36 C8H13NO8 -6.5 0.000 UNL COO
28 C13H17NO6 -6.7 2.651 UNL Ph
27 C13H17NO6 -6.7 0.000 UNL Ph

Анализ докинга

In [57]:
for i,r in enumerate(res):
    hbs = oddt.interactions.hbonds(prot,r)
    stack= oddt.interactions.pi_stacking(prot,r)
    phob = oddt.interactions.hydrophobic_contacts(prot,r)

Визуализация

In [61]:
for i,r in enumerate(res):
    r.write(filename='r%s.pdb' % i, format='pdb')
In [58]:
import time
import __main__
import os
import pymol
__main__.pymol_argv = [ 'pymol', '-cp' ]
pymol.finish_launching()
from pymol import cmd
from IPython.display import Image, display, HTML
In [59]:
FILE_NAME = 'img.png'
In [66]:
def prepareImage(width=550, height=550, sleep=1, filename=FILE_NAME):
    cmd.ray(width, height)
    cmd.png(filename)
    time.sleep(sleep)

Ph

In [69]:
cmd.reinitialize()
cmd.load('myprot.pdb')
cmd.load('r35.pdb')
cmd.show('sticks','r35')
cmd.show('cartoon','myprot')
cmd.hide('lines','myprot')
cmd.orient()
prepareImage()
Image(FILE_NAME)
Out[69]:

OH

In [68]:
cmd.reinitialize()
cmd.load('myprot.pdb')
cmd.load('r8.pdb')
cmd.show('sticks','r8')
cmd.show('cartoon','myprot')
cmd.hide('lines','myprot')
cmd.orient()
prepareImage()
Image(FILE_NAME)
Out[68]:

H

In [70]:
cmd.reinitialize()
cmd.load('myprot.pdb')
cmd.load('r26.pdb')
cmd.show('sticks','r26')
cmd.show('cartoon','myprot')
cmd.hide('lines','myprot')
cmd.orient()
prepareImage()
Image(FILE_NAME)
Out[70]:

COO

In [71]:
cmd.reinitialize()
cmd.load('myprot.pdb')
cmd.load('r44.pdb')
cmd.show('sticks','r44')
cmd.show('cartoon','myprot')
cmd.hide('lines','myprot')
cmd.orient()
prepareImage()
Image(FILE_NAME)
Out[71]:

NH3+

In [72]:
cmd.reinitialize()
cmd.load('myprot.pdb')
cmd.load('r16.pdb')
cmd.show('sticks','r16')
cmd.show('cartoon','myprot')
cmd.hide('lines','myprot')
cmd.orient()
prepareImage()
Image(FILE_NAME)
Out[72]:
In [73]:
!jupyter nbconvert --to html rudakov_kirill_hw7.ipynb