• на сайте PubChem найти все радикалы c азидом для Click Chemistry и скачать их SMILES нотации
  • Найти формулу ибупрофена и предложить способ изменения его SMILES для эмуляции реагента Click Chemistry (заменить изопропил на этин он же ацителен)
  • Заменить в найденых радикалах азидную группу на модифцированный ибупрофен
  • Превратить новые SMILES в объекты-молекулы
  • Отобрать те молекулы, которые удовлетворяют правилу пяти Lipinski
In [18]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
from rdkit.Chem.Draw import IPythonConsole 
from rdkit.Chem import Draw
import numpy as np
from IPython.display import display,Image
import rdkit.Chem.Lipinski as Lipinski
import pubchempy as pcp
import pandas as pd
import nglview as nv
import random

Ibuprofen

In [2]:
ibuprofen = Chem.MolFromSmiles("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O")
display(ibuprofen)

Lipinski's rule

  • No more than 5 hydrogen bond donors (the total number of nitrogen–hydrogen and oxygen–hydrogen bonds)
  • No more than 10 hydrogen bond acceptors (all nitrogen or oxygen atoms)
  • A molecular mass less than 500 daltons
  • An octanol-water partition coefficient[5] log P not greater than 5
In [3]:
def lipinski_ok(x, print_all=False):
    l1 = Lipinski.NumHDonors(x)
    l2 = Lipinski.NumHAcceptors(x)
    l3 = Lipinski.rdMolDescriptors.CalcExactMolWt(x)
    l4 = Lipinski.rdMolDescriptors.CalcCrippenDescriptors(x)[0]
    if print_all: print(l1, l2, l3, l4)
    if (l1 <= 5) and (l2 <= 10) and (l3 < 500) and (l4 <= 5):
        return True
    return False

lipinski_ok(ibuprofen, False)
Out[3]:
True

Isopropyl -> Ethin

In [4]:
isopropyl = Chem.MolFromSmiles("CC(C)")
ibuprofen_ = Chem.MolFromSmiles("CC1=CC=C(C=C1)C(C)C(=O)O")
ethin = Chem.MolFromSmiles("C#C")
ibuprofen_ethin = Chem.MolFromSmiles("C#CCC1=CC=C(C=C1)C(C)C(=O)O")

display(ibuprofen)
display(ibuprofen_ethin)

Azide-alkyne Huisgen cycloaddition

$R_1-N_3 \ + \ C_2-R_2 \ \longrightarrow \ R_1-C_2H_3N_3-R_2$

In [5]:
C2H3N3 = Chem.MolFromSmiles("N2C=C(N=N2)")
display(C2H3N3)
ibuprofen_C2H3N3 = Chem.MolFromSmiles("N2C=C(CC1=CC=C(C=C1)C(C)C(=O)O)N=N2")
ibuprofen_C2H3N3_str = "N2C=C(CC1=CC=C(C=C1)C(C)C(=O)O)N=N2"
display(ibuprofen_C2H3N3)
In [6]:
smiles_data = pcp.get_properties(properties="CanonicalSMILES", identifier="N=N=N", namespace="smiles", 
                                 searchtype="substructure", RingsNotEmbedded=True,  as_dataframe=False)
In [7]:
smiles_df = pd.DataFrame(smiles_data)
smiles_df.set_index('CID', inplace = True)
len(smiles_df)
Out[7]:
139590

Filtration

In [80]:
smiles_df = smiles_df[(smiles_df["CanonicalSMILES"].str.len() < 30) \
                      & (~smiles_df["CanonicalSMILES"].str.contains('\.')) \
                      & (smiles_df["CanonicalSMILES"].str.contains("N=[N+]=[N-]", regex = False))]
len(smiles_df)
Out[80]:
12504
In [81]:
smiles_ibu = pd.DataFrame(smiles_df["CanonicalSMILES"].str.replace("N\=\[N\+\]\=\[N\-\]", ibuprofen_C2H3N3_str),
                          index = smiles_df.index)
In [82]:
def check_lipinski_smiles(smile):
    try:
        if lipinski_ok(Chem.MolFromSmiles(smile)):
            return True
    except:
        pass
    return False
In [83]:
smiles_fin = smiles_ibu[smiles_ibu.applymap(check_lipinski_smiles)['CanonicalSMILES']]
len(smiles_fin)
RDKit ERROR: [02:32:19] Explicit valence for atom # 4 Cl, 3, is greater than permitted
RDKit ERROR: [02:32:28] Explicit valence for atom # 23 Cl, 3, is greater than permitted
RDKit ERROR: [02:32:28] Explicit valence for atom # 1 Cl, 2, is greater than permitted
RDKit ERROR: [02:32:36] Explicit valence for atom # 4 Cl, 3, is greater than permitted
RDKit ERROR: [02:32:46] Explicit valence for atom # 23 Cl, 3, is greater than permitted
RDKit ERROR: [02:32:46] Explicit valence for atom # 1 Cl, 2, is greater than permitted
Out[83]:
12294

Visualisation

In [90]:
mols_10 = random.sample(list(smiles_fin["CanonicalSMILES"].values), 10)
mols_10 = [x.encode('ascii','ignore') for x in mols_10]
mols_10 = [Chem.MolFromSmiles(mols_10[x]) for x in range(10)]
In [91]:
Chem.Draw.MolsToGridImage(mols_10,
                          molsPerRow=2,
                          subImgSize=(500,300))
Out[91]:

3D-version

In [92]:
m3d=Chem.AddHs(mols_10[0])
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d,maxIters=500,nonBondedThresh=200)
nv.show_rdkit(m3d)

(!!) shows nothing because of global versions conflict