Visualising a hierarchy of substructures Part II

In an earlier post, I described a simple procedure to generate a hierarchy of substructures, and depicted the hierarchy with GraphViz. Pat Walters at Vertex realised that it is possible to add images as node labels in GraphViz and updated the script so that the image includes the actual chemical depictions (see below). He has also adapted the script to use OpenEye’s OEChem.

Update (8/11/2012): Wolf-D. Ihlenfeldt has implemented this with the Cactvs Toolkit and in addition has shown how to integrate the graph display with Knime.

#!/usr/bin/env python

import copy
import pickle
import os

from openeye.oechem import *
from openeye.oedepict import *

def mol2image(mol,imageFileName,width=200,height=200):
    clearcoords = True
    suppressH   = False
    opts = OE2DMolDisplayOptions(width, height, OEScale_AutoScale)
    itf = OEInterface()
    OESetup2DMolDisplayOptions(opts, itf)
    OEPrepareDepiction(mol, clearcoords, suppressH)
    disp = OE2DMolDisplay(mol, opts)
    ofs = oeofstream(imageFileName)
    name,ext = os.path.splitext(imageFileName)
    OERenderMolecule(ofs,ext[1:],disp)

def create_tree(structures):
    tree = {"Root":{}}
    stack = [(tree["Root"], structures.keys())]

    while len(stack) > 0:
        leaf, subset = stack.pop()

        max_matches = ("", [])
        for name in subset:
            smiles = structures[name]
            smarts = OESubSearch(smiles)
            matches = []
            for nameb in subset:
                if nameb != name:
                    molb = OEGraphMol()
                    OEParseSmiles(molb,structures[nameb])
                    if smarts.SingleMatch(molb):
                        matches.append(nameb)
            if len(matches) >= len(max_matches[1]):
                max_matches = (name, matches)
        if False: # Debug statement
            print max_matches

        for name in [max_matches[0]] + max_matches[1]:
            subset.remove(name)
        leaf[max_matches[0]] = {}
        if len(subset) > 0:
            stack.append( (leaf, subset) )
        if len(max_matches[1]) > 0:
            stack.append( (leaf[max_matches[0]], copy.deepcopy(max_matches[1])))

    with open("tmp.pickle", "w") as f:
        pickle.dump(tree, f)

def fix(name):
    return name.replace("-", "_").replace("1", "one").replace("2", "two").replace("3", "three").replace("5", "five").replace(",", "_")

def visit(name, leafdict):
    for x, y in leafdict.iteritems():
        if name != "Root":
            print ' %s -> %s;' % (name,x)
        print ' %s [label=<<TABLE><TR><TD><IMG SRC="%s/%s.png"/></TD></TR></TABLE>>];' % (x,os.getcwd(),x)
        visit(x, y)

if __name__ == "__main__":
    structureDict = dict([('001', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[CH]1(O)'),
                       ('002', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@H](O)[CH]1(O)'),
                       ('003', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[C@H]1(O)'),
                       ('004', 'C(O)[C@@H](O1)[C@@H](O)[C@H](O)[C@@H](O)[C@@H]1(O)'),
                       ('005', 'C(O)[CH](O1)[CH](O)[CH](O)[CH](O)[CH]1(O)'),
                       ('006', 'c1ccccc1'),
                       ('007', 'c1ccccc1Br'),
                       ('008', 'c1ccc(Br)cc1Br'),
                       ('009', 'c1cccc(Br)c1Br'),
                       ('010', 'c1c(Br)cc(Br)cc1Br')])

    for k in sorted(structureDict.keys()):
        imageFileName = "%s.png" % (k)
        smi = structureDict[k]
        mol = OEGraphMol()
        OEParseSmiles(mol,smi)
        mol2image(mol,imageFileName)

    create_tree(structureDict)
    with open("tmp.pickle", "r") as f:
        tree = pickle.load(f)
    print "digraph graphname {"
    print "node [shape=plaintext];"
    visit("Root", tree["Root"])
    print "}"

"""
Redirect the output to a file and then convert to a graph as follows:
"C:\Program Files (x86)\Graphviz 2.28\bin\dot.exe" -Tpng -ooutput.png dot_input.txt
"""