Molecular Fragmentation for MMPA

Recently I want to develop new MMP service. In this development process, I want to control number of cuts of molecules.
Fortunately, RDKit has good function to do it. So, I checked the function.
Following memorandum for my self.

import os
from rdkit import rdBase
from rdkit import Chem
from rdkit import RDConfig
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdMMPA
IPythonConsole.ipython_useSVG = True
print( rdBase.rdkitVersion )
>>>2016.03.4

Read cdk2.sdf from datadir.

datadir =  os.path.join( RDConfig.RDDocsDir, "Book/data/cdk2.sdf" )
mols = [ mol for mol in Chem.SDMolSupplier( datadir ) if mol != None ]

Check molecules.

Draw.MolsToGridImage( mols[:3] )

fig1
OK!
Go next.
rdMMPA.FragmentMol is function for fragmentation molecules. And the function can set maxCuts argument. I wrote molFragmenter function and set default maxCuts arg as 1.

def molFragmenter( mol, mcuts=1, idfield="id" ):
    molid = mol.GetProp( idfield )
    frags = rdMMPA.FragmentMol( mol, maxCuts=mcuts )
    for frag in frags:
        if frag[0] != None:
            frag1 = Chem.MolToSmiles( frag[0] )
            frag2 = Chem.MolToSmiles( frag[1] )
        else:
            frag1 = ""
            frag2 = Chem.MolToSmiles( frag[1] )
        res = Chem.MolToSmiles(mol)+","+molid+","+frag1+","+frag2
        print( res ) 

Let’s test!
First, single cut only.

molFragmenter(mols[0], mcuts=1)
>>>
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C(=O)COc1nc(N)nc2[nH]cnc12)[*:1].C[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)[*:1].Nc1nc(OCC(=O)[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)[*:1].Nc1nc(OC[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)C[*:1].Nc1nc(O[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)CO[*:1].Nc1nc(c2nc[nH]c2n1)[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)COc1nc(nc2[nH]cnc12)[*:1].N[*:1]

Next, triple cuts.

molFragmenter(mols[0], mcuts=3)
>>>
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C(=O)COc1nc(N)nc2[nH]cnc12)[*:1].C[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,Nc1nc(OCC(=O)C([*:1])[*:2])c2nc[nH]c2n1,C[*:1].C[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C([*:1])([*:2])[*:3],C[*:1].C[*:2].Nc1nc(OCC(=O)[*:3])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C([*:1])[*:2])[*:3],C[*:1].C[*:2].Nc1nc(OC[*:3])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C[*:3])C([*:1])[*:2],C[*:1].C[*:2].Nc1nc(O[*:3])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(CO[*:3])C([*:1])[*:2],C[*:1].C[*:2].Nc1nc(c2nc[nH]c2n1)[*:3]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(COc1nc(nc2[nH]cnc12)[*:3])C([*:1])[*:2],C[*:1].C[*:2].N[*:3]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC([*:1])[*:2],C[*:1].Nc1nc(OCC(=O)[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)[*:2])[*:1],C[*:1].Nc1nc(OC[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)C[*:2])[*:1],C[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)CO[*:2])[*:1],C[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)COc1nc(nc2[nH]cnc12)[*:2])[*:1],C[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)[*:1].Nc1nc(OCC(=O)[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C([*:1])[*:2],CC(C)[*:1].Nc1nc(OC[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C[*:2])[*:1],CC(C)[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(CO[*:2])[*:1],CC(C)[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(COc1nc(nc2[nH]cnc12)[*:2])[*:1],CC(C)[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)[*:1].Nc1nc(OC[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C([*:1])[*:2],CC(C)C(=O)[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C(O[*:2])[*:1],CC(C)C(=O)[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])OC[*:1],CC(C)C(=O)[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)C[*:1].Nc1nc(O[*:1])c2nc[nH]c2n1
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O([*:1])[*:2],CC(C)C(=O)C[*:1].Nc1nc(c2nc[nH]c2n1)[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])O[*:1],CC(C)C(=O)C[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)CO[*:1].Nc1nc(c2nc[nH]c2n1)[*:1]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])[*:1],CC(C)C(=O)CO[*:1].N[*:2]
CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)COc1nc(nc2[nH]cnc12)[*:1].N[*:1]

Nice! Also FragmentMol can set pattern of cutting. Very flexible. ;-)
http://www.rdkit.org/docs/api/index.html