Molecular Fragmentation for MMPA

Recently I want to develop new MMP service. In this development process, I want to control number of cuts of molecules.
Fortunately, RDKit has good function to do it. So, I checked the function.
Following memorandum for my self.

import os
from rdkit import rdBase
from rdkit import Chem
from rdkit import RDConfig
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdMMPA
IPythonConsole.ipython_useSVG = True
print( rdBase.rdkitVersion )
>>>2016.03.4

Read cdk2.sdf from datadir.

datadir =  os.path.join( RDConfig.RDDocsDir, "Book/data/cdk2.sdf" )
mols = [ mol for mol in Chem.SDMolSupplier( datadir ) if mol != None ]

Check molecules.

Draw.MolsToGridImage( mols[:3] )

fig1
OK!
Go next.
rdMMPA.FragmentMol is function for fragmentation molecules. And the function can set maxCuts argument. I wrote molFragmenter function and set default maxCuts arg as 1.

def molFragmenter( mol, mcuts=1, idfield="id" ):
    molid = mol.GetProp( idfield )
    frags = rdMMPA.FragmentMol( mol, maxCuts=mcuts )
    for frag in frags:
        if frag[0] != None:
            frag1 = Chem.MolToSmiles( frag[0] )
            frag2 = Chem.MolToSmiles( frag[1] )
        else:
            frag1 = ""
            frag2 = Chem.MolToSmiles( frag[1] )
        res = Chem.MolToSmiles(mol)+","+molid+","+frag1+","+frag2
        print( res ) 

Let’s test!
First, single cut only.

molFragmenter(mols[0], mcuts=1)
>>>
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C(=O)COc1nc(N)nc2[nH]cnc12)[*:1].C[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)[*:1].Nc1nc(OCC(=O)[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)[*:1].Nc1nc(OC[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)C[*:1].Nc1nc(O[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)CO[*:1].Nc1nc(c2nc[nH]c2n1)[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)COc1nc(nc2[nH]cnc12)[*:1].N[*:1]

Next, triple cuts.

molFragmenter(mols[0], mcuts=3)
>>>
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C(=O)COc1nc(N)nc2[nH]cnc12)[*:1].C[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,Nc1nc(OCC(=O)C([*:1])[*:2])c2nc[nH]c2n1,C[*:1].C[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C([*:1])([*:2])[*:3],C[*:1].C[*:2].Nc1nc(OCC(=O)[*:3])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C([*:1])[*:2])[*:3],C[*:1].C[*:2].Nc1nc(OC[*:3])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C[*:3])C([*:1])[*:2],C[*:1].C[*:2].Nc1nc(O[*:3])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(CO[*:3])C([*:1])[*:2],C[*:1].C[*:2].Nc1nc(c2nc[nH]c2n1)[*:3]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(COc1nc(nc2[nH]cnc12)[*:3])C([*:1])[*:2],C[*:1].C[*:2].N[*:3]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC([*:1])[*:2],C[*:1].Nc1nc(OCC(=O)[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)[*:2])[*:1],C[*:1].Nc1nc(OC[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)C[*:2])[*:1],C[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)CO[*:2])[*:1],C[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)COc1nc(nc2[nH]cnc12)[*:2])[*:1],C[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)[*:1].Nc1nc(OCC(=O)[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C([*:1])[*:2],CC(C)[*:1].Nc1nc(OC[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C[*:2])[*:1],CC(C)[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(CO[*:2])[*:1],CC(C)[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(COc1nc(nc2[nH]cnc12)[*:2])[*:1],CC(C)[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)[*:1].Nc1nc(OC[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C([*:1])[*:2],CC(C)C(=O)[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C(O[*:2])[*:1],CC(C)C(=O)[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])OC[*:1],CC(C)C(=O)[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)C[*:1].Nc1nc(O[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O([*:1])[*:2],CC(C)C(=O)C[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])O[*:1],CC(C)C(=O)C[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)CO[*:1].Nc1nc(c2nc[nH]c2n1)[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])[*:1],CC(C)C(=O)CO[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)COc1nc(nc2[nH]cnc12)[*:1].N[*:1]

Nice! Also FragmentMol can set pattern of cutting. Very flexible. 😉
http://www.rdkit.org/docs/api/index.html

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s