Recently I want to develop new MMP service. In this development process, I want to control number of cuts of molecules.
Fortunately, RDKit has good function to do it. So, I checked the function.
Following memorandum for my self.
import os from rdkit import rdBase from rdkit import Chem from rdkit import RDConfig from rdkit.Chem import Draw from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem import rdMMPA IPythonConsole.ipython_useSVG = True print( rdBase.rdkitVersion ) >>>2016.03.4
Read cdk2.sdf from datadir.
datadir = os.path.join( RDConfig.RDDocsDir, "Book/data/cdk2.sdf" ) mols = [ mol for mol in Chem.SDMolSupplier( datadir ) if mol != None ]
Check molecules.
Draw.MolsToGridImage( mols[:3] )
OK!
Go next.
rdMMPA.FragmentMol is function for fragmentation molecules. And the function can set maxCuts argument. I wrote molFragmenter function and set default maxCuts arg as 1.
def molFragmenter( mol, mcuts=1, idfield="id" ): molid = mol.GetProp( idfield ) frags = rdMMPA.FragmentMol( mol, maxCuts=mcuts ) for frag in frags: if frag[0] != None: frag1 = Chem.MolToSmiles( frag[0] ) frag2 = Chem.MolToSmiles( frag[1] ) else: frag1 = "" frag2 = Chem.MolToSmiles( frag[1] ) res = Chem.MolToSmiles(mol)+","+molid+","+frag1+","+frag2 print( res )
Let’s test!
First, single cut only.
molFragmenter(mols[0], mcuts=1) >>> CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C(=O)COc1nc(N)nc2[nH]cnc12)[*:1].C[*:1] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)[*:1].Nc1nc(OCC(=O)[*:1])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)[*:1].Nc1nc(OC[*:1])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)C[*:1].Nc1nc(O[*:1])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)CO[*:1].Nc1nc(c2nc[nH]c2n1)[*:1] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)COc1nc(nc2[nH]cnc12)[*:1].N[*:1]
Next, triple cuts.
molFragmenter(mols[0], mcuts=3) >>> CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C(=O)COc1nc(N)nc2[nH]cnc12)[*:1].C[*:1] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,Nc1nc(OCC(=O)C([*:1])[*:2])c2nc[nH]c2n1,C[*:1].C[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C([*:1])([*:2])[*:3],C[*:1].C[*:2].Nc1nc(OCC(=O)[*:3])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C([*:1])[*:2])[*:3],C[*:1].C[*:2].Nc1nc(OC[*:3])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C[*:3])C([*:1])[*:2],C[*:1].C[*:2].Nc1nc(O[*:3])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(CO[*:3])C([*:1])[*:2],C[*:1].C[*:2].Nc1nc(c2nc[nH]c2n1)[*:3] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(COc1nc(nc2[nH]cnc12)[*:3])C([*:1])[*:2],C[*:1].C[*:2].N[*:3] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC([*:1])[*:2],C[*:1].Nc1nc(OCC(=O)[*:2])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)[*:2])[*:1],C[*:1].Nc1nc(OC[*:2])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)C[*:2])[*:1],C[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)CO[*:2])[*:1],C[*:1].Nc1nc(c2nc[nH]c2n1)[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,CC(C(=O)COc1nc(nc2[nH]cnc12)[*:2])[*:1],C[*:1].N[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)[*:1].Nc1nc(OCC(=O)[*:1])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C([*:1])[*:2],CC(C)[*:1].Nc1nc(OC[*:2])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(C[*:2])[*:1],CC(C)[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(CO[*:2])[*:1],CC(C)[*:1].Nc1nc(c2nc[nH]c2n1)[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O=C(COc1nc(nc2[nH]cnc12)[*:2])[*:1],CC(C)[*:1].N[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)[*:1].Nc1nc(OC[*:1])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C([*:1])[*:2],CC(C)C(=O)[*:1].Nc1nc(O[*:2])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,C(O[*:2])[*:1],CC(C)C(=O)[*:1].Nc1nc(c2nc[nH]c2n1)[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])OC[*:1],CC(C)C(=O)[*:1].N[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)C[*:1].Nc1nc(O[*:1])c2nc[nH]c2n1 CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,O([*:1])[*:2],CC(C)C(=O)C[*:1].Nc1nc(c2nc[nH]c2n1)[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])O[*:1],CC(C)C(=O)C[*:1].N[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)CO[*:1].Nc1nc(c2nc[nH]c2n1)[*:1] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,c1nc2c(nc(nc2[nH]1)[*:2])[*:1],CC(C)C(=O)CO[*:1].N[*:2] CC(C)C(=O)COc1nc(N)nc2[nH]cnc12,ZINC03814457,,CC(C)C(=O)COc1nc(nc2[nH]cnc12)[*:1].N[*:1]
Nice! Also FragmentMol can set pattern of cutting. Very flexible. ;-)
http://www.rdkit.org/docs/api/index.html