| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401 | # This code is part of Grandalf# Copyright (C) 2008 Axel Tillequin (bdcht3@gmail.com) and others# published under GPLv2 license or EPLv1 license# Contributor(s): Axel Tillequintry:    import ply.lex as lex    import ply.yacc as yacc    _has_ply = Trueexcept ImportError:    _has_ply = False__all__ = ["_has_ply", "Dot"]# ------------------------------------------------------------------------------# LALR(1) parser for Graphviz dot file format.class Dot:    _reserved = (        "strict",        "graph",        "digraph",        "subgraph",        "node",        "edge",    )    _tokens = ("regulars", "string", "html", "comment",) + _reserved    _literals = [",", ";", "-", ">", "=", ":", "[", "]", "{", "}"]    class Lexer(object):        def __init__(self):            self.whitespace = "\0\t\n\f\r "            self.reserved = Dot._reserved            self.tokens = Dot._tokens            self.literals = Dot._literals            self.t_ignore = self.whitespace        def t_regulars(self, t):            r"[-]?[\w.]+"            v = t.value.lower()            if v in self.reserved:                t.type = v                return t            # check numeric string            if v[0].isdigit() or v[0] in ["-", "."]:                try:                    float(v)                except ValueError:                    print("invalid numeral token: %s" % v)                    raise SyntaxError            elif "." in v:  # forbidden in non-numeric                raise SyntaxError            return t        def t_comment_online(self, t):            r"(//(.*)\n)|\\\n"            pass        def t_comment_macro(self, t):            r"(\#(.*)\n)"            pass        def t_comment_multline(self, t):            r"(/\*)"            start = t.lexer.lexpos            t.lexer.lexpos = t.lexer.lexdata.index("*/", start) + 2        def t_string(self, t):            r'"'            start = t.lexer.lexpos - 1            i = t.lexer.lexdata.index('"', start + 1)            while t.lexer.lexdata[i - 1] == "\\":                i = t.lexer.lexdata.index('"', i + 1)            t.value = t.lexer.lexdata[start : i + 1]            t.lexer.lexpos = i + 1            return t        def t_html(self, t):            r"<"            start = t.lexer.lexpos - 1            level = 1            i = start + 1            while level > 0:                c = t.lexer.lexdata[i]                if c == "<":                    level += 1                if c == ">":                    level -= 1                i += 1            t.value = t.lexer.lexdata[start:i]            t.lexer.lexpos = i            return t        def t_ANY_error(self, t):            print("Illegal character '%s'" % t.value[0])            t.lexer.skip(1)        def build(self, **kargs):            if _has_ply:                self._lexer = lex.lex(module=self, **kargs)        def test(self, data):            self._lexer.input(data)            while 1:                tok = self._lexer.token()                if not tok:                    break                print(tok)    # Classes for the AST returned by Parser:    class graph(object):        def __init__(self, name, data, strict=None, direct=None):            self.name = name            self.strict = strict            self.direct = direct            self.nodes = {}            self.edges = []            self.subgraphs = []            self.attr = {}            eattr = {}            nattr = {}            for x in data:  # data is a statements (list of stmt)                # x is a stmt, ie one of:                # a graph object (subgraph)                # a attr object (graph/node/edge attributes)                # a dict object (ID=ID)                # a node object                # a list of edges                if isinstance(x, Dot.graph):                    self.subgraphs.append(x)                elif isinstance(x, Dot.attr):                    if x.type == "graph":                        self.attr.update(x.D)                    elif x.type == "node":                        nattr.update(x.D)                    elif x.type == "edge":                        eattr.update(x.D)                    else:                        raise TypeError("invalid attribute type")                elif isinstance(x, dict):                    self.attr.update(x)                elif isinstance(x, Dot.node):                    x.attr.update(nattr)                    self.nodes[x.name] = x                else:                    for e in x:                        e.attr.update(eattr)                        self.edges.append(e)                        for n in [e.n1, e.n2]:                            if isinstance(n, Dot.graph):                                continue                            if n.name not in self.nodes:                                n.attr.update(nattr)                                self.nodes[n.name] = n        def __repr__(self):            u = "<%s instance at %x, name: %s, %d nodes>" % (                self.__class__,                id(self),                self.name,                len(self.nodes),            )            return u    class attr(object):        def __init__(self, type, D):            self.type = type            self.D = D    class edge(object):        def __init__(self, n1, n2):            self.n1 = n1            self.n2 = n2            self.attr = {}    class node(object):        def __init__(self, name, port=None):            self.name = name            self.port = port            self.attr = {}    class Parser(object):        def __init__(self):            self.tokens = Dot._tokens        def __makelist(self, p):            N = len(p)            if N > 2:                L = p[1]                L.append(p[N - 1])            else:                L = []                if N > 1:                    L.append(p[N - 1])            p[0] = L        def p_Data(self, p):            """Data : Data Graph                    | Graph"""            self.__makelist(p)        def p_Graph_strict(self, p):            """Graph : strict graph name Block"""            p[0] = Dot.graph(name=p[3], data=p[4], strict=1, direct=0)            # print 'Dot.Parser: graph object %s created'%p[0].name        def p_Graph_graph(self, p):            """Graph : graph name Block"""            p[0] = Dot.graph(name=p[2], data=p[3], strict=0, direct=0)        def p_Graph_strict_digraph(self, p):            """Graph : strict digraph name Block"""            p[0] = Dot.graph(name=p[3], data=p[4], strict=1, direct=1)        def p_Graph_digraph(self, p):            """Graph : digraph name Block"""            p[0] = Dot.graph(name=p[2], data=p[3], strict=0, direct=1)        def p_ID(self, p):            """ID : regulars                  | string                  | html """            p[0] = p[1]        def p_name(self, p):            """name : ID                    | """            if len(p) == 1:                p[0] = ""            else:                p[0] = p[1]        def p_Block(self, p):            """Block : '{' statements '}' """            p[0] = p[2]        def p_statements(self, p):            """statements : statements stmt                          | stmt                          | """            self.__makelist(p)        def p_stmt(self, p):            """stmt : stmt ';' """            p[0] = p[1]        def p_comment(self, p):            """stmt : comment"""            pass  # comment tokens are not outputed by lexer anyway        def p_stmt_sub(self, p):            """stmt : sub"""            p[0] = p[1]        def p_subgraph(self, p):            """sub : subgraph name Block                   | Block """            N = len(p)            if N > 2:                ID = p[2]            else:                ID = ""            p[0] = Dot.graph(name=ID, data=p[N - 1], strict=0, direct=0)        def p_stmt_assign(self, p):            """stmt : affect """            p[0] = p[1]        def p_affect(self, p):            """affect : ID '=' ID """            p[0] = dict([(p[1], p[3])])        def p_stmt_lists(self, p):            """stmt : graph attrs                    | node  attrs                    | edge  attrs """            p[0] = Dot.attr(p[1], p[2])        def p_attrs(self, p):            """attrs : attrs attrl                     | attrl """            if len(p) == 3:                p[1].update(p[2])            p[0] = p[1]        def p_attrl(self, p):            """attrl : '[' alist ']' """            L = {}            for a in p[2]:                if isinstance(a, dict):                    L.update(a)                else:                    L[a] = "true"            p[0] = L        def p_alist_comma(self, p):            """alist : alist ',' alist """            p[1].extend(p[3])            p[0] = p[1]        def p_alist_affect(self, p):            """alist : alist affect                     | alist ID                     | affect                     | ID                     | """            self.__makelist(p)        def p_stmt_E_attrs(self, p):            """stmt : E attrs """            for e in p[1]:                e.attr = p[2]            p[0] = p[1]        def p_stmt_N_attrs(self, p):            """stmt : N attrs """            p[1].attr = p[2]            p[0] = p[1]        def p_stmt_EN(self, p):            """stmt : E                    | N """            p[0] = p[1]        def p_E(self, p):            """E : E   link                 | elt link """            try:                L = p[1]                L.append(Dot.edge(L[-1].n2, p[2]))            except Exception:                L = []                L.append(Dot.edge(p[1], p[2]))            p[0] = L        def p_elt(self, p):            """elt : N                   | sub """            p[0] = p[1]        def p_link(self, p):            """link : '-' '>' elt                    | '-' '-' elt """            p[0] = p[3]        def p_N_port(self, p):            """N : ID port """            p[0] = Dot.node(p[1], port=p[2])        def p_N(self, p):            """N : ID """            p[0] = Dot.node(p[1])        def p_port(self, p):            """port : ':' ID """            p[0] = p[2]        def p_port2(self, p):            """port : port port"""            assert p[2] in ["n", "ne", "e", "se", "s", "sw", "w", "nw", "c", "_"]            p[0] = "%s:%s" % (p[1], p[2])        def p_error(self, p):            print("Syntax Error: %s" % (p,))            self._parser.restart()        def build(self, **kargs):            opt = dict(debug=0, write_tables=0)            opt.update(**kargs)            if _has_ply:                self._parser = yacc.yacc(module=self, **opt)    def __init__(self, **kargs):        self.lexer = Dot.Lexer()        self.parser = Dot.Parser()        if not _has_ply:            print("warning: Dot parser not supported (install python-ply)")    def parse(self, data):        try:            self.parser._parser.restart()        except AttributeError:            self.lexer.build(reflags=lex.re.UNICODE)            self.parser.build()        except Exception:            print("unexpected error")            return None        try:            s = data.decode("utf-8")        except UnicodeDecodeError:            s = data        L = self.parser._parser.parse(s, lexer=self.lexer._lexer)        return L    def read(self, filename):        f = open(            filename, "rb"        )  # As it'll try to decode later on with utf-8, read it binary at this point.        return self.parse(f.read())
 |