dot.py 12 KB


  1. # This code is part of Grandalf
  2. # Copyright (C) 2008 Axel Tillequin (bdcht3@gmail.com) and others
  3. # published under GPLv2 license or EPLv1 license
  4. # Contributor(s): Axel Tillequin
  5. try:
  6. import ply.lex as lex
  7. import ply.yacc as yacc
  8. _has_ply = True
  9. except ImportError:
  10. _has_ply = False
  11. __all__ = ["_has_ply", "Dot"]
  12. # ------------------------------------------------------------------------------
  13. # LALR(1) parser for Graphviz dot file format.
  14. class Dot:
  15. _reserved = (
  16. "strict",
  17. "graph",
  18. "digraph",
  19. "subgraph",
  20. "node",
  21. "edge",
  22. )
  23. _tokens = ("regulars", "string", "html", "comment",) + _reserved
  24. _literals = [",", ";", "-", ">", "=", ":", "[", "]", "{", "}"]
  25. class Lexer(object):
  26. def __init__(self):
  27. self.whitespace = "\0\t\n\f\r "
  28. self.reserved = Dot._reserved
  29. self.tokens = Dot._tokens
  30. self.literals = Dot._literals
  31. self.t_ignore = self.whitespace
  32. def t_regulars(self, t):
  33. r"[-]?[\w.]+"
  34. v = t.value.lower()
  35. if v in self.reserved:
  36. t.type = v
  37. return t
  38. # check numeric string
  39. if v[0].isdigit() or v[0] in ["-", "."]:
  40. try:
  41. float(v)
  42. except ValueError:
  43. print("invalid numeral token: %s" % v)
  44. raise SyntaxError
  45. elif "." in v: # forbidden in non-numeric
  46. raise SyntaxError
  47. return t
  48. def t_comment_online(self, t):
  49. r"(//(.*)\n)|\\\n"
  50. pass
  51. def t_comment_macro(self, t):
  52. r"(\#(.*)\n)"
  53. pass
  54. def t_comment_multline(self, t):
  55. r"(/\*)"
  56. start = t.lexer.lexpos
  57. t.lexer.lexpos = t.lexer.lexdata.index("*/", start) + 2
  58. def t_string(self, t):
  59. r'"'
  60. start = t.lexer.lexpos - 1
  61. i = t.lexer.lexdata.index('"', start + 1)
  62. while t.lexer.lexdata[i - 1] == "\\":
  63. i = t.lexer.lexdata.index('"', i + 1)
  64. t.value = t.lexer.lexdata[start : i + 1]
  65. t.lexer.lexpos = i + 1
  66. return t
  67. def t_html(self, t):
  68. r"<"
  69. start = t.lexer.lexpos - 1
  70. level = 1
  71. i = start + 1
  72. while level > 0:
  73. c = t.lexer.lexdata[i]
  74. if c == "<":
  75. level += 1
  76. if c == ">":
  77. level -= 1
  78. i += 1
  79. t.value = t.lexer.lexdata[start:i]
  80. t.lexer.lexpos = i
  81. return t
  82. def t_ANY_error(self, t):
  83. print("Illegal character '%s'" % t.value[0])
  84. t.lexer.skip(1)
  85. def build(self, **kargs):
  86. if _has_ply:
  87. self._lexer = lex.lex(module=self, **kargs)
  88. def test(self, data):
  89. self._lexer.input(data)
  90. while 1:
  91. tok = self._lexer.token()
  92. if not tok:
  93. break
  94. print(tok)
  95. # Classes for the AST returned by Parser:
  96. class graph(object):
  97. def __init__(self, name, data, strict=None, direct=None):
  98. self.name = name
  99. self.strict = strict
  100. self.direct = direct
  101. self.nodes = {}
  102. self.edges = []
  103. self.subgraphs = []
  104. self.attr = {}
  105. eattr = {}
  106. nattr = {}
  107. for x in data: # data is a statements (list of stmt)
  108. # x is a stmt, ie one of:
  109. # a graph object (subgraph)
  110. # a attr object (graph/node/edge attributes)
  111. # a dict object (ID=ID)
  112. # a node object
  113. # a list of edges
  114. if isinstance(x, Dot.graph):
  115. self.subgraphs.append(x)
  116. elif isinstance(x, Dot.attr):
  117. if x.type == "graph":
  118. self.attr.update(x.D)
  119. elif x.type == "node":
  120. nattr.update(x.D)
  121. elif x.type == "edge":
  122. eattr.update(x.D)
  123. else:
  124. raise TypeError("invalid attribute type")
  125. elif isinstance(x, dict):
  126. self.attr.update(x)
  127. elif isinstance(x, Dot.node):
  128. x.attr.update(nattr)
  129. self.nodes[x.name] = x
  130. else:
  131. for e in x:
  132. e.attr.update(eattr)
  133. self.edges.append(e)
  134. for n in [e.n1, e.n2]:
  135. if isinstance(n, Dot.graph):
  136. continue
  137. if n.name not in self.nodes:
  138. n.attr.update(nattr)
  139. self.nodes[n.name] = n
  140. def __repr__(self):
  141. u = "<%s instance at %x, name: %s, %d nodes>" % (
  142. self.__class__,
  143. id(self),
  144. self.name,
  145. len(self.nodes),
  146. )
  147. return u
  148. class attr(object):
  149. def __init__(self, type, D):
  150. self.type = type
  151. self.D = D
  152. class edge(object):
  153. def __init__(self, n1, n2):
  154. self.n1 = n1
  155. self.n2 = n2
  156. self.attr = {}
  157. class node(object):
  158. def __init__(self, name, port=None):
  159. self.name = name
  160. self.port = port
  161. self.attr = {}
  162. class Parser(object):
  163. def __init__(self):
  164. self.tokens = Dot._tokens
  165. def __makelist(self, p):
  166. N = len(p)
  167. if N > 2:
  168. L = p[1]
  169. L.append(p[N - 1])
  170. else:
  171. L = []
  172. if N > 1:
  173. L.append(p[N - 1])
  174. p[0] = L
  175. def p_Data(self, p):
  176. """Data : Data Graph
  177. | Graph"""
  178. self.__makelist(p)
  179. def p_Graph_strict(self, p):
  180. """Graph : strict graph name Block"""
  181. p[0] = Dot.graph(name=p[3], data=p[4], strict=1, direct=0)
  182. # print 'Dot.Parser: graph object %s created'%p[0].name
  183. def p_Graph_graph(self, p):
  184. """Graph : graph name Block"""
  185. p[0] = Dot.graph(name=p[2], data=p[3], strict=0, direct=0)
  186. def p_Graph_strict_digraph(self, p):
  187. """Graph : strict digraph name Block"""
  188. p[0] = Dot.graph(name=p[3], data=p[4], strict=1, direct=1)
  189. def p_Graph_digraph(self, p):
  190. """Graph : digraph name Block"""
  191. p[0] = Dot.graph(name=p[2], data=p[3], strict=0, direct=1)
  192. def p_ID(self, p):
  193. """ID : regulars
  194. | string
  195. | html """
  196. p[0] = p[1]
  197. def p_name(self, p):
  198. """name : ID
  199. | """
  200. if len(p) == 1:
  201. p[0] = ""
  202. else:
  203. p[0] = p[1]
  204. def p_Block(self, p):
  205. """Block : '{' statements '}' """
  206. p[0] = p[2]
  207. def p_statements(self, p):
  208. """statements : statements stmt
  209. | stmt
  210. | """
  211. self.__makelist(p)
  212. def p_stmt(self, p):
  213. """stmt : stmt ';' """
  214. p[0] = p[1]
  215. def p_comment(self, p):
  216. """stmt : comment"""
  217. pass # comment tokens are not outputed by lexer anyway
  218. def p_stmt_sub(self, p):
  219. """stmt : sub"""
  220. p[0] = p[1]
  221. def p_subgraph(self, p):
  222. """sub : subgraph name Block
  223. | Block """
  224. N = len(p)
  225. if N > 2:
  226. ID = p[2]
  227. else:
  228. ID = ""
  229. p[0] = Dot.graph(name=ID, data=p[N - 1], strict=0, direct=0)
  230. def p_stmt_assign(self, p):
  231. """stmt : affect """
  232. p[0] = p[1]
  233. def p_affect(self, p):
  234. """affect : ID '=' ID """
  235. p[0] = dict([(p[1], p[3])])
  236. def p_stmt_lists(self, p):
  237. """stmt : graph attrs
  238. | node attrs
  239. | edge attrs """
  240. p[0] = Dot.attr(p[1], p[2])
  241. def p_attrs(self, p):
  242. """attrs : attrs attrl
  243. | attrl """
  244. if len(p) == 3:
  245. p[1].update(p[2])
  246. p[0] = p[1]
  247. def p_attrl(self, p):
  248. """attrl : '[' alist ']' """
  249. L = {}
  250. for a in p[2]:
  251. if isinstance(a, dict):
  252. L.update(a)
  253. else:
  254. L[a] = "true"
  255. p[0] = L
  256. def p_alist_comma(self, p):
  257. """alist : alist ',' alist """
  258. p[1].extend(p[3])
  259. p[0] = p[1]
  260. def p_alist_affect(self, p):
  261. """alist : alist affect
  262. | alist ID
  263. | affect
  264. | ID
  265. | """
  266. self.__makelist(p)
  267. def p_stmt_E_attrs(self, p):
  268. """stmt : E attrs """
  269. for e in p[1]:
  270. e.attr = p[2]
  271. p[0] = p[1]
  272. def p_stmt_N_attrs(self, p):
  273. """stmt : N attrs """
  274. p[1].attr = p[2]
  275. p[0] = p[1]
  276. def p_stmt_EN(self, p):
  277. """stmt : E
  278. | N """
  279. p[0] = p[1]
  280. def p_E(self, p):
  281. """E : E link
  282. | elt link """
  283. try:
  284. L = p[1]
  285. L.append(Dot.edge(L[-1].n2, p[2]))
  286. except Exception:
  287. L = []
  288. L.append(Dot.edge(p[1], p[2]))
  289. p[0] = L
  290. def p_elt(self, p):
  291. """elt : N
  292. | sub """
  293. p[0] = p[1]
  294. def p_link(self, p):
  295. """link : '-' '>' elt
  296. | '-' '-' elt """
  297. p[0] = p[3]
  298. def p_N_port(self, p):
  299. """N : ID port """
  300. p[0] = Dot.node(p[1], port=p[2])
  301. def p_N(self, p):
  302. """N : ID """
  303. p[0] = Dot.node(p[1])
  304. def p_port(self, p):
  305. """port : ':' ID """
  306. p[0] = p[2]
  307. def p_port2(self, p):
  308. """port : port port"""
  309. assert p[2] in ["n", "ne", "e", "se", "s", "sw", "w", "nw", "c", "_"]
  310. p[0] = "%s:%s" % (p[1], p[2])
  311. def p_error(self, p):
  312. print("Syntax Error: %s" % (p,))
  313. self._parser.restart()
  314. def build(self, **kargs):
  315. opt = dict(debug=0, write_tables=0)
  316. opt.update(**kargs)
  317. if _has_ply:
  318. self._parser = yacc.yacc(module=self, **opt)
  319. def __init__(self, **kargs):
  320. self.lexer = Dot.Lexer()
  321. self.parser = Dot.Parser()
  322. if not _has_ply:
  323. print("warning: Dot parser not supported (install python-ply)")
  324. def parse(self, data):
  325. try:
  326. self.parser._parser.restart()
  327. except AttributeError:
  328. self.lexer.build(reflags=lex.re.UNICODE)
  329. self.parser.build()
  330. except Exception:
  331. print("unexpected error")
  332. return None
  333. try:
  334. s = data.decode("utf-8")
  335. except UnicodeDecodeError:
  336. s = data
  337. L = self.parser._parser.parse(s, lexer=self.lexer._lexer)
  338. return L
  339. def read(self, filename):
  340. f = open(
  341. filename, "rb"
  342. ) # As it'll try to decode later on with utf-8, read it binary at this point.
  343. return self.parse(f.read())