In an earlier post, I described a simple procedure to generate a hierarchy of substructures, and depicted the hierarchy with GraphViz. Pat Walters at Vertex realised that it is possible to add images as node labels in GraphViz and updated the script so that the image includes the actual chemical depictions (see below). He has also adapted the script to use OpenEye’s OEChem.
Update (8/11/2012): Wolf-D. Ihlenfeldt has implemented this with the Cactvs Toolkit and in addition has shown how to integrate the graph display with Knime.
#!/usr/bin/env python import copy import pickle import os from openeye.oechem import * from openeye.oedepict import * def mol2image(mol,imageFileName,width=200,height=200): clearcoords = True suppressH = False opts = OE2DMolDisplayOptions(width, height, OEScale_AutoScale) itf = OEInterface() OESetup2DMolDisplayOptions(opts, itf) OEPrepareDepiction(mol, clearcoords, suppressH) disp = OE2DMolDisplay(mol, opts) ofs = oeofstream(imageFileName) name,ext = os.path.splitext(imageFileName) OERenderMolecule(ofs,ext[1:],disp) def create_tree(structures): tree = {"Root":{}} stack = [(tree["Root"], structures.keys())] while len(stack) > 0: leaf, subset = stack.pop() max_matches = ("", []) for name in subset: smiles = structures[name] smarts = OESubSearch(smiles) matches = [] for nameb in subset: if nameb != name: molb = OEGraphMol() OEParseSmiles(molb,structures[nameb]) if smarts.SingleMatch(molb): matches.append(nameb) if len(matches) >= len(max_matches[1]): max_matches = (name, matches) if False: # Debug statement print max_matches for name in [max_matches[0]] + max_matches[1]: subset.remove(name) leaf[max_matches[0]] = {} if len(subset) > 0: stack.append( (leaf, subset) ) if len(max_matches[1]) > 0: stack.append( (leaf[max_matches[0]], copy.deepcopy(max_matches[1]))) with open("tmp.pickle", "w") as f: pickle.dump(tree, f) def fix(name): return name.replace("-", "_").replace("1", "one").replace("2", "two").replace("3", "three").replace("5", "five").replace(",", "_") def visit(name, leafdict): for x, y in leafdict.iteritems(): if name != "Root": print ' %s -> %s;' % (name,x) print ' %s [label=<<TABLE><TR><TD><IMG SRC="%s/%s.png"/></TD></TR></TABLE>>];' % (x,os.getcwd(),x) visit(x, y) if __name__ == "__main__": structureDict = dict([('001', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[CH]1(O)'), ('002', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@H](O)[CH]1(O)'), ('003', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[C@H]1(O)'), ('004', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1(O)'), ('005', 'C(O)[CH](O1)[CH](O)[CH](O)[CH](O)[CH]1(O)'), ('006', 'c1ccccc1'), ('007', 'c1ccccc1Br'), ('008', 'c1ccc(Br)cc1Br'), ('009', 'c1cccc(Br)c1Br'), ('010', 'c1c(Br)cc(Br)cc1Br')]) for k in sorted(structureDict.keys()): imageFileName = "%s.png" % (k) smi = structureDict[k] mol = OEGraphMol() OEParseSmiles(mol,smi) mol2image(mol,imageFileName) create_tree(structureDict) with open("tmp.pickle", "r") as f: tree = pickle.load(f) print "digraph graphname {" print "node [shape=plaintext];" visit("Root", tree["Root"]) print "}" """ Redirect the output to a file and then convert to a graph as follows: "C:\Program Files (x86)\Graphviz 2.28\bin\dot.exe" -Tpng -ooutput.png dot_input.txt """