| 1 | # Author: David Goodger |
|---|
| 2 | # Contact: goodger@users.sourceforge.net |
|---|
| 3 | # Revision: $Revision: 4242 $ |
|---|
| 4 | # Date: $Date: 2006-01-06 00:28:53 +0100 (Fri, 06 Jan 2006) $ |
|---|
| 5 | # Copyright: This module has been placed in the public domain. |
|---|
| 6 | |
|---|
| 7 | """ |
|---|
| 8 | Parser for Python modules. Requires Python 2.2 or higher. |
|---|
| 9 | |
|---|
| 10 | The `parse_module()` function takes a module's text and file name, |
|---|
| 11 | runs it through the module parser (using compiler.py and tokenize.py) |
|---|
| 12 | and produces a parse tree of the source code, using the nodes as found |
|---|
| 13 | in pynodes.py. For example, given this module (x.py):: |
|---|
| 14 | |
|---|
| 15 | # comment |
|---|
| 16 | |
|---|
| 17 | '''Docstring''' |
|---|
| 18 | |
|---|
| 19 | '''Additional docstring''' |
|---|
| 20 | |
|---|
| 21 | __docformat__ = 'reStructuredText' |
|---|
| 22 | |
|---|
| 23 | a = 1 |
|---|
| 24 | '''Attribute docstring''' |
|---|
| 25 | |
|---|
| 26 | class C(Super): |
|---|
| 27 | |
|---|
| 28 | '''C's docstring''' |
|---|
| 29 | |
|---|
| 30 | class_attribute = 1 |
|---|
| 31 | '''class_attribute's docstring''' |
|---|
| 32 | |
|---|
| 33 | def __init__(self, text=None): |
|---|
| 34 | '''__init__'s docstring''' |
|---|
| 35 | |
|---|
| 36 | self.instance_attribute = (text * 7 |
|---|
| 37 | + ' whaddyaknow') |
|---|
| 38 | '''instance_attribute's docstring''' |
|---|
| 39 | |
|---|
| 40 | |
|---|
| 41 | def f(x, # parameter x |
|---|
| 42 | y=a*5, # parameter y |
|---|
| 43 | *args): # parameter args |
|---|
| 44 | '''f's docstring''' |
|---|
| 45 | return [x + item for item in args] |
|---|
| 46 | |
|---|
| 47 | f.function_attribute = 1 |
|---|
| 48 | '''f.function_attribute's docstring''' |
|---|
| 49 | |
|---|
| 50 | The module parser will produce this module documentation tree:: |
|---|
| 51 | |
|---|
| 52 | <module_section filename="test data"> |
|---|
| 53 | <docstring> |
|---|
| 54 | Docstring |
|---|
| 55 | <docstring lineno="5"> |
|---|
| 56 | Additional docstring |
|---|
| 57 | <attribute lineno="7"> |
|---|
| 58 | <object_name> |
|---|
| 59 | __docformat__ |
|---|
| 60 | <expression_value lineno="7"> |
|---|
| 61 | 'reStructuredText' |
|---|
| 62 | <attribute lineno="9"> |
|---|
| 63 | <object_name> |
|---|
| 64 | a |
|---|
| 65 | <expression_value lineno="9"> |
|---|
| 66 | 1 |
|---|
| 67 | <docstring lineno="10"> |
|---|
| 68 | Attribute docstring |
|---|
| 69 | <class_section lineno="12"> |
|---|
| 70 | <object_name> |
|---|
| 71 | C |
|---|
| 72 | <class_base> |
|---|
| 73 | Super |
|---|
| 74 | <docstring lineno="12"> |
|---|
| 75 | C's docstring |
|---|
| 76 | <attribute lineno="16"> |
|---|
| 77 | <object_name> |
|---|
| 78 | class_attribute |
|---|
| 79 | <expression_value lineno="16"> |
|---|
| 80 | 1 |
|---|
| 81 | <docstring lineno="17"> |
|---|
| 82 | class_attribute's docstring |
|---|
| 83 | <method_section lineno="19"> |
|---|
| 84 | <object_name> |
|---|
| 85 | __init__ |
|---|
| 86 | <docstring lineno="19"> |
|---|
| 87 | __init__'s docstring |
|---|
| 88 | <parameter_list lineno="19"> |
|---|
| 89 | <parameter lineno="19"> |
|---|
| 90 | <object_name> |
|---|
| 91 | self |
|---|
| 92 | <parameter lineno="19"> |
|---|
| 93 | <object_name> |
|---|
| 94 | text |
|---|
| 95 | <parameter_default lineno="19"> |
|---|
| 96 | None |
|---|
| 97 | <attribute lineno="22"> |
|---|
| 98 | <object_name> |
|---|
| 99 | self.instance_attribute |
|---|
| 100 | <expression_value lineno="22"> |
|---|
| 101 | (text * 7 + ' whaddyaknow') |
|---|
| 102 | <docstring lineno="24"> |
|---|
| 103 | instance_attribute's docstring |
|---|
| 104 | <function_section lineno="27"> |
|---|
| 105 | <object_name> |
|---|
| 106 | f |
|---|
| 107 | <docstring lineno="27"> |
|---|
| 108 | f's docstring |
|---|
| 109 | <parameter_list lineno="27"> |
|---|
| 110 | <parameter lineno="27"> |
|---|
| 111 | <object_name> |
|---|
| 112 | x |
|---|
| 113 | <comment> |
|---|
| 114 | # parameter x |
|---|
| 115 | <parameter lineno="27"> |
|---|
| 116 | <object_name> |
|---|
| 117 | y |
|---|
| 118 | <parameter_default lineno="27"> |
|---|
| 119 | a * 5 |
|---|
| 120 | <comment> |
|---|
| 121 | # parameter y |
|---|
| 122 | <parameter excess_positional="1" lineno="27"> |
|---|
| 123 | <object_name> |
|---|
| 124 | args |
|---|
| 125 | <comment> |
|---|
| 126 | # parameter args |
|---|
| 127 | <attribute lineno="33"> |
|---|
| 128 | <object_name> |
|---|
| 129 | f.function_attribute |
|---|
| 130 | <expression_value lineno="33"> |
|---|
| 131 | 1 |
|---|
| 132 | <docstring lineno="34"> |
|---|
| 133 | f.function_attribute's docstring |
|---|
| 134 | |
|---|
| 135 | (Comments are not implemented yet.) |
|---|
| 136 | |
|---|
| 137 | compiler.parse() provides most of what's needed for this doctree, and |
|---|
| 138 | "tokenize" can be used to get the rest. We can determine the line |
|---|
| 139 | number from the compiler.parse() AST, and the TokenParser.rhs(lineno) |
|---|
| 140 | method provides the rest. |
|---|
| 141 | |
|---|
| 142 | The Docutils Python reader component will transform this module doctree into a |
|---|
| 143 | Python-specific Docutils doctree, and then a "stylist transform" will |
|---|
| 144 | further transform it into a generic doctree. Namespaces will have to be |
|---|
| 145 | compiled for each of the scopes, but I'm not certain at what stage of |
|---|
| 146 | processing. |
|---|
| 147 | |
|---|
| 148 | It's very important to keep all docstring processing out of this, so that it's |
|---|
| 149 | a completely generic and not tool-specific. |
|---|
| 150 | |
|---|
| 151 | :: |
|---|
| 152 | |
|---|
| 153 | > Why perform all of those transformations? Why not go from the AST to a |
|---|
| 154 | > generic doctree? Or, even from the AST to the final output? |
|---|
| 155 | |
|---|
| 156 | I want the docutils.readers.python.moduleparser.parse_module() function to |
|---|
| 157 | produce a standard documentation-oriented tree that can be used by any tool. |
|---|
| 158 | We can develop it together without having to compromise on the rest of our |
|---|
| 159 | design (i.e., HappyDoc doesn't have to be made to work like Docutils, and |
|---|
| 160 | vice-versa). It would be a higher-level version of what compiler.py provides. |
|---|
| 161 | |
|---|
| 162 | The Python reader component transforms this generic AST into a Python-specific |
|---|
| 163 | doctree (it knows about modules, classes, functions, etc.), but this is |
|---|
| 164 | specific to Docutils and cannot be used by HappyDoc or others. The stylist |
|---|
| 165 | transform does the final layout, converting Python-specific structures |
|---|
| 166 | ("class" sections, etc.) into a generic doctree using primitives (tables, |
|---|
| 167 | sections, lists, etc.). This generic doctree does *not* know about Python |
|---|
| 168 | structures any more. The advantage is that this doctree can be handed off to |
|---|
| 169 | any of the output writers to create any output format we like. |
|---|
| 170 | |
|---|
| 171 | The latter two transforms are separate because I want to be able to have |
|---|
| 172 | multiple independent layout styles (multiple runtime-selectable "stylist |
|---|
| 173 | transforms"). Each of the existing tools (HappyDoc, pydoc, epydoc, Crystal, |
|---|
| 174 | etc.) has its own fixed format. I personally don't like the tables-based |
|---|
| 175 | format produced by these tools, and I'd like to be able to customize the |
|---|
| 176 | format easily. That's the goal of stylist transforms, which are independent |
|---|
| 177 | from the Reader component itself. One stylist transform could produce |
|---|
| 178 | HappyDoc-like output, another could produce output similar to module docs in |
|---|
| 179 | the Python library reference manual, and so on. |
|---|
| 180 | |
|---|
| 181 | It's for exactly this reason:: |
|---|
| 182 | |
|---|
| 183 | >> It's very important to keep all docstring processing out of this, so that |
|---|
| 184 | >> it's a completely generic and not tool-specific. |
|---|
| 185 | |
|---|
| 186 | ... but it goes past docstring processing. It's also important to keep style |
|---|
| 187 | decisions and tool-specific data transforms out of this module parser. |
|---|
| 188 | |
|---|
| 189 | |
|---|
| 190 | Issues |
|---|
| 191 | ====== |
|---|
| 192 | |
|---|
| 193 | * At what point should namespaces be computed? Should they be part of the |
|---|
| 194 | basic AST produced by the ASTVisitor walk, or generated by another tree |
|---|
| 195 | traversal? |
|---|
| 196 | |
|---|
| 197 | * At what point should a distinction be made between local variables & |
|---|
| 198 | instance attributes in __init__ methods? |
|---|
| 199 | |
|---|
| 200 | * Docstrings are getting their lineno from their parents. Should the |
|---|
| 201 | TokenParser find the real line no's? |
|---|
| 202 | |
|---|
| 203 | * Comments: include them? How and when? Only full-line comments, or |
|---|
| 204 | parameter comments too? (See function "f" above for an example.) |
|---|
| 205 | |
|---|
| 206 | * Module could use more docstrings & refactoring in places. |
|---|
| 207 | |
|---|
| 208 | """ |
|---|
| 209 | |
|---|
| 210 | __docformat__ = 'reStructuredText' |
|---|
| 211 | |
|---|
| 212 | import sys |
|---|
| 213 | import compiler |
|---|
| 214 | import compiler.ast |
|---|
| 215 | import tokenize |
|---|
| 216 | import token |
|---|
| 217 | from compiler.consts import OP_ASSIGN |
|---|
| 218 | from compiler.visitor import ASTVisitor |
|---|
| 219 | from types import StringType, UnicodeType, TupleType |
|---|
| 220 | from docutils.readers.python import pynodes |
|---|
| 221 | from docutils.nodes import Text |
|---|
| 222 | |
|---|
| 223 | |
|---|
| 224 | def parse_module(module_text, filename): |
|---|
| 225 | """Return a module documentation tree from `module_text`.""" |
|---|
| 226 | ast = compiler.parse(module_text) |
|---|
| 227 | token_parser = TokenParser(module_text) |
|---|
| 228 | visitor = ModuleVisitor(filename, token_parser) |
|---|
| 229 | compiler.walk(ast, visitor, walker=visitor) |
|---|
| 230 | return visitor.module |
|---|
| 231 | |
|---|
| 232 | class BaseVisitor(ASTVisitor): |
|---|
| 233 | |
|---|
| 234 | def __init__(self, token_parser): |
|---|
| 235 | ASTVisitor.__init__(self) |
|---|
| 236 | self.token_parser = token_parser |
|---|
| 237 | self.context = [] |
|---|
| 238 | self.documentable = None |
|---|
| 239 | |
|---|
| 240 | def default(self, node, *args): |
|---|
| 241 | self.documentable = None |
|---|
| 242 | #print 'in default (%s)' % node.__class__.__name__ |
|---|
| 243 | #ASTVisitor.default(self, node, *args) |
|---|
| 244 | |
|---|
| 245 | def default_visit(self, node, *args): |
|---|
| 246 | #print 'in default_visit (%s)' % node.__class__.__name__ |
|---|
| 247 | ASTVisitor.default(self, node, *args) |
|---|
| 248 | |
|---|
| 249 | |
|---|
| 250 | class DocstringVisitor(BaseVisitor): |
|---|
| 251 | |
|---|
| 252 | def visitDiscard(self, node): |
|---|
| 253 | if self.documentable: |
|---|
| 254 | self.visit(node.expr) |
|---|
| 255 | |
|---|
| 256 | def visitConst(self, node): |
|---|
| 257 | if self.documentable: |
|---|
| 258 | if type(node.value) in (StringType, UnicodeType): |
|---|
| 259 | self.documentable.append(make_docstring(node.value, node.lineno)) |
|---|
| 260 | else: |
|---|
| 261 | self.documentable = None |
|---|
| 262 | |
|---|
| 263 | def visitStmt(self, node): |
|---|
| 264 | self.default_visit(node) |
|---|
| 265 | |
|---|
| 266 | |
|---|
| 267 | class AssignmentVisitor(DocstringVisitor): |
|---|
| 268 | |
|---|
| 269 | def visitAssign(self, node): |
|---|
| 270 | visitor = AttributeVisitor(self.token_parser) |
|---|
| 271 | compiler.walk(node, visitor, walker=visitor) |
|---|
| 272 | if visitor.attributes: |
|---|
| 273 | self.context[-1].extend(visitor.attributes) |
|---|
| 274 | if len(visitor.attributes) == 1: |
|---|
| 275 | self.documentable = visitor.attributes[0] |
|---|
| 276 | else: |
|---|
| 277 | self.documentable = None |
|---|
| 278 | |
|---|
| 279 | |
|---|
| 280 | class ModuleVisitor(AssignmentVisitor): |
|---|
| 281 | |
|---|
| 282 | def __init__(self, filename, token_parser): |
|---|
| 283 | AssignmentVisitor.__init__(self, token_parser) |
|---|
| 284 | self.filename = filename |
|---|
| 285 | self.module = None |
|---|
| 286 | |
|---|
| 287 | def visitModule(self, node): |
|---|
| 288 | self.module = module = pynodes.module_section() |
|---|
| 289 | module['filename'] = self.filename |
|---|
| 290 | append_docstring(module, node.doc, node.lineno) |
|---|
| 291 | self.context.append(module) |
|---|
| 292 | self.documentable = module |
|---|
| 293 | self.visit(node.node) |
|---|
| 294 | self.context.pop() |
|---|
| 295 | |
|---|
| 296 | def visitImport(self, node): |
|---|
| 297 | self.context[-1] += make_import_group(names=node.names, |
|---|
| 298 | lineno=node.lineno) |
|---|
| 299 | self.documentable = None |
|---|
| 300 | |
|---|
| 301 | def visitFrom(self, node): |
|---|
| 302 | self.context[-1].append( |
|---|
| 303 | make_import_group(names=node.names, from_name=node.modname, |
|---|
| 304 | lineno=node.lineno)) |
|---|
| 305 | self.documentable = None |
|---|
| 306 | |
|---|
| 307 | def visitFunction(self, node): |
|---|
| 308 | visitor = FunctionVisitor(self.token_parser, |
|---|
| 309 | function_class=pynodes.function_section) |
|---|
| 310 | compiler.walk(node, visitor, walker=visitor) |
|---|
| 311 | self.context[-1].append(visitor.function) |
|---|
| 312 | |
|---|
| 313 | def visitClass(self, node): |
|---|
| 314 | visitor = ClassVisitor(self.token_parser) |
|---|
| 315 | compiler.walk(node, visitor, walker=visitor) |
|---|
| 316 | self.context[-1].append(visitor.klass) |
|---|
| 317 | |
|---|
| 318 | |
|---|
| 319 | class AttributeVisitor(BaseVisitor): |
|---|
| 320 | |
|---|
| 321 | def __init__(self, token_parser): |
|---|
| 322 | BaseVisitor.__init__(self, token_parser) |
|---|
| 323 | self.attributes = pynodes.class_attribute_section() |
|---|
| 324 | |
|---|
| 325 | def visitAssign(self, node): |
|---|
| 326 | # Don't visit the expression itself, just the attribute nodes: |
|---|
| 327 | for child in node.nodes: |
|---|
| 328 | self.dispatch(child) |
|---|
| 329 | expression_text = self.token_parser.rhs(node.lineno) |
|---|
| 330 | expression = pynodes.expression_value() |
|---|
| 331 | expression.append(Text(expression_text)) |
|---|
| 332 | for attribute in self.attributes: |
|---|
| 333 | attribute.append(expression) |
|---|
| 334 | |
|---|
| 335 | def visitAssName(self, node): |
|---|
| 336 | self.attributes.append(make_attribute(node.name, |
|---|
| 337 | lineno=node.lineno)) |
|---|
| 338 | |
|---|
| 339 | def visitAssTuple(self, node): |
|---|
| 340 | attributes = self.attributes |
|---|
| 341 | self.attributes = [] |
|---|
| 342 | self.default_visit(node) |
|---|
| 343 | n = pynodes.attribute_tuple() |
|---|
| 344 | n.extend(self.attributes) |
|---|
| 345 | n['lineno'] = self.attributes[0]['lineno'] |
|---|
| 346 | attributes.append(n) |
|---|
| 347 | self.attributes = attributes |
|---|
| 348 | #self.attributes.append(att_tuple) |
|---|
| 349 | |
|---|
| 350 | def visitAssAttr(self, node): |
|---|
| 351 | self.default_visit(node, node.attrname) |
|---|
| 352 | |
|---|
| 353 | def visitGetattr(self, node, suffix): |
|---|
| 354 | self.default_visit(node, node.attrname + '.' + suffix) |
|---|
| 355 | |
|---|
| 356 | def visitName(self, node, suffix): |
|---|
| 357 | self.attributes.append(make_attribute(node.name + '.' + suffix, |
|---|
| 358 | lineno=node.lineno)) |
|---|
| 359 | |
|---|
| 360 | |
|---|
| 361 | class FunctionVisitor(DocstringVisitor): |
|---|
| 362 | |
|---|
| 363 | in_function = 0 |
|---|
| 364 | |
|---|
| 365 | def __init__(self, token_parser, function_class): |
|---|
| 366 | DocstringVisitor.__init__(self, token_parser) |
|---|
| 367 | self.function_class = function_class |
|---|
| 368 | |
|---|
| 369 | def visitFunction(self, node): |
|---|
| 370 | if self.in_function: |
|---|
| 371 | self.documentable = None |
|---|
| 372 | # Don't bother with nested function definitions. |
|---|
| 373 | return |
|---|
| 374 | self.in_function = 1 |
|---|
| 375 | self.function = function = make_function_like_section( |
|---|
| 376 | name=node.name, |
|---|
| 377 | lineno=node.lineno, |
|---|
| 378 | doc=node.doc, |
|---|
| 379 | function_class=self.function_class) |
|---|
| 380 | self.context.append(function) |
|---|
| 381 | self.documentable = function |
|---|
| 382 | self.parse_parameter_list(node) |
|---|
| 383 | self.visit(node.code) |
|---|
| 384 | self.context.pop() |
|---|
| 385 | |
|---|
| 386 | def parse_parameter_list(self, node): |
|---|
| 387 | parameters = [] |
|---|
| 388 | special = [] |
|---|
| 389 | argnames = list(node.argnames) |
|---|
| 390 | if node.kwargs: |
|---|
| 391 | special.append(make_parameter(argnames[-1], excess_keyword=1)) |
|---|
| 392 | argnames.pop() |
|---|
| 393 | if node.varargs: |
|---|
| 394 | special.append(make_parameter(argnames[-1], |
|---|
| 395 | excess_positional=1)) |
|---|
| 396 | argnames.pop() |
|---|
| 397 | defaults = list(node.defaults) |
|---|
| 398 | defaults = [None] * (len(argnames) - len(defaults)) + defaults |
|---|
| 399 | function_parameters = self.token_parser.function_parameters( |
|---|
| 400 | node.lineno) |
|---|
| 401 | #print >>sys.stderr, function_parameters |
|---|
| 402 | for argname, default in zip(argnames, defaults): |
|---|
| 403 | if type(argname) is TupleType: |
|---|
| 404 | parameter = pynodes.parameter_tuple() |
|---|
| 405 | for tuplearg in argname: |
|---|
| 406 | parameter.append(make_parameter(tuplearg)) |
|---|
| 407 | argname = normalize_parameter_name(argname) |
|---|
| 408 | else: |
|---|
| 409 | parameter = make_parameter(argname) |
|---|
| 410 | if default: |
|---|
| 411 | n_default = pynodes.parameter_default() |
|---|
| 412 | n_default.append(Text(function_parameters[argname])) |
|---|
| 413 | parameter.append(n_default) |
|---|
| 414 | parameters.append(parameter) |
|---|
| 415 | if parameters or special: |
|---|
| 416 | special.reverse() |
|---|
| 417 | parameters.extend(special) |
|---|
| 418 | parameter_list = pynodes.parameter_list() |
|---|
| 419 | parameter_list.extend(parameters) |
|---|
| 420 | self.function.append(parameter_list) |
|---|
| 421 | |
|---|
| 422 | |
|---|
| 423 | class ClassVisitor(AssignmentVisitor): |
|---|
| 424 | |
|---|
| 425 | in_class = 0 |
|---|
| 426 | |
|---|
| 427 | def __init__(self, token_parser): |
|---|
| 428 | AssignmentVisitor.__init__(self, token_parser) |
|---|
| 429 | self.bases = [] |
|---|
| 430 | |
|---|
| 431 | def visitClass(self, node): |
|---|
| 432 | if self.in_class: |
|---|
| 433 | self.documentable = None |
|---|
| 434 | # Don't bother with nested class definitions. |
|---|
| 435 | return |
|---|
| 436 | self.in_class = 1 |
|---|
| 437 | #import mypdb as pdb |
|---|
| 438 | #pdb.set_trace() |
|---|
| 439 | for base in node.bases: |
|---|
| 440 | self.visit(base) |
|---|
| 441 | self.klass = klass = make_class_section(node.name, self.bases, |
|---|
| 442 | doc=node.doc, |
|---|
| 443 | lineno=node.lineno) |
|---|
| 444 | self.context.append(klass) |
|---|
| 445 | self.documentable = klass |
|---|
| 446 | self.visit(node.code) |
|---|
| 447 | self.context.pop() |
|---|
| 448 | |
|---|
| 449 | def visitGetattr(self, node, suffix=None): |
|---|
| 450 | if suffix: |
|---|
| 451 | name = node.attrname + '.' + suffix |
|---|
| 452 | else: |
|---|
| 453 | name = node.attrname |
|---|
| 454 | self.default_visit(node, name) |
|---|
| 455 | |
|---|
| 456 | def visitName(self, node, suffix=None): |
|---|
| 457 | if suffix: |
|---|
| 458 | name = node.name + '.' + suffix |
|---|
| 459 | else: |
|---|
| 460 | name = node.name |
|---|
| 461 | self.bases.append(name) |
|---|
| 462 | |
|---|
| 463 | def visitFunction(self, node): |
|---|
| 464 | if node.name == '__init__': |
|---|
| 465 | visitor = InitMethodVisitor(self.token_parser, |
|---|
| 466 | function_class=pynodes.method_section) |
|---|
| 467 | compiler.walk(node, visitor, walker=visitor) |
|---|
| 468 | else: |
|---|
| 469 | visitor = FunctionVisitor(self.token_parser, |
|---|
| 470 | function_class=pynodes.method_section) |
|---|
| 471 | compiler.walk(node, visitor, walker=visitor) |
|---|
| 472 | self.context[-1].append(visitor.function) |
|---|
| 473 | |
|---|
| 474 | |
|---|
| 475 | class InitMethodVisitor(FunctionVisitor, AssignmentVisitor): pass |
|---|
| 476 | |
|---|
| 477 | |
|---|
| 478 | class TokenParser: |
|---|
| 479 | |
|---|
| 480 | def __init__(self, text): |
|---|
| 481 | self.text = text + '\n\n' |
|---|
| 482 | self.lines = self.text.splitlines(1) |
|---|
| 483 | self.generator = tokenize.generate_tokens(iter(self.lines).next) |
|---|
| 484 | self.next() |
|---|
| 485 | |
|---|
| 486 | def __iter__(self): |
|---|
| 487 | return self |
|---|
| 488 | |
|---|
| 489 | def next(self): |
|---|
| 490 | self.token = self.generator.next() |
|---|
| 491 | self.type, self.string, self.start, self.end, self.line = self.token |
|---|
| 492 | return self.token |
|---|
| 493 | |
|---|
| 494 | def goto_line(self, lineno): |
|---|
| 495 | while self.start[0] < lineno: |
|---|
| 496 | self.next() |
|---|
| 497 | return token |
|---|
| 498 | |
|---|
| 499 | def rhs(self, lineno): |
|---|
| 500 | """ |
|---|
| 501 | Return a whitespace-normalized expression string from the right-hand |
|---|
| 502 | side of an assignment at line `lineno`. |
|---|
| 503 | """ |
|---|
| 504 | self.goto_line(lineno) |
|---|
| 505 | while self.string != '=': |
|---|
| 506 | self.next() |
|---|
| 507 | self.stack = None |
|---|
| 508 | while self.type != token.NEWLINE and self.string != ';': |
|---|
| 509 | if self.string == '=' and not self.stack: |
|---|
| 510 | self.tokens = [] |
|---|
| 511 | self.stack = [] |
|---|
| 512 | self._type = None |
|---|
| 513 | self._string = None |
|---|
| 514 | self._backquote = 0 |
|---|
| 515 | else: |
|---|
| 516 | self.note_token() |
|---|
| 517 | self.next() |
|---|
| 518 | self.next() |
|---|
| 519 | text = ''.join(self.tokens) |
|---|
| 520 | return text.strip() |
|---|
| 521 | |
|---|
| 522 | closers = {')': '(', ']': '[', '}': '{'} |
|---|
| 523 | openers = {'(': 1, '[': 1, '{': 1} |
|---|
| 524 | del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1} |
|---|
| 525 | no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1} |
|---|
| 526 | |
|---|
| 527 | def note_token(self): |
|---|
| 528 | if self.type == tokenize.NL: |
|---|
| 529 | return |
|---|
| 530 | del_ws = self.del_ws_prefix.has_key(self.string) |
|---|
| 531 | append_ws = not self.no_ws_suffix.has_key(self.string) |
|---|
| 532 | if self.openers.has_key(self.string): |
|---|
| 533 | self.stack.append(self.string) |
|---|
| 534 | if (self._type == token.NAME |
|---|
| 535 | or self.closers.has_key(self._string)): |
|---|
| 536 | del_ws = 1 |
|---|
| 537 | elif self.closers.has_key(self.string): |
|---|
| 538 | assert self.stack[-1] == self.closers[self.string] |
|---|
| 539 | self.stack.pop() |
|---|
| 540 | elif self.string == '`': |
|---|
| 541 | if self._backquote: |
|---|
| 542 | del_ws = 1 |
|---|
| 543 | assert self.stack[-1] == '`' |
|---|
| 544 | self.stack.pop() |
|---|
| 545 | else: |
|---|
| 546 | append_ws = 0 |
|---|
| 547 | self.stack.append('`') |
|---|
| 548 | self._backquote = not self._backquote |
|---|
| 549 | if del_ws and self.tokens and self.tokens[-1] == ' ': |
|---|
| 550 | del self.tokens[-1] |
|---|
| 551 | self.tokens.append(self.string) |
|---|
| 552 | self._type = self.type |
|---|
| 553 | self._string = self.string |
|---|
| 554 | if append_ws: |
|---|
| 555 | self.tokens.append(' ') |
|---|
| 556 | |
|---|
| 557 | def function_parameters(self, lineno): |
|---|
| 558 | """ |
|---|
| 559 | Return a dictionary mapping parameters to defaults |
|---|
| 560 | (whitespace-normalized strings). |
|---|
| 561 | """ |
|---|
| 562 | self.goto_line(lineno) |
|---|
| 563 | while self.string != 'def': |
|---|
| 564 | self.next() |
|---|
| 565 | while self.string != '(': |
|---|
| 566 | self.next() |
|---|
| 567 | name = None |
|---|
| 568 | default = None |
|---|
| 569 | parameter_tuple = None |
|---|
| 570 | self.tokens = [] |
|---|
| 571 | parameters = {} |
|---|
| 572 | self.stack = [self.string] |
|---|
| 573 | self.next() |
|---|
| 574 | while 1: |
|---|
| 575 | if len(self.stack) == 1: |
|---|
| 576 | if parameter_tuple: |
|---|
| 577 | # Just encountered ")". |
|---|
| 578 | #print >>sys.stderr, 'parameter_tuple: %r' % self.tokens |
|---|
| 579 | name = ''.join(self.tokens).strip() |
|---|
| 580 | self.tokens = [] |
|---|
| 581 | parameter_tuple = None |
|---|
| 582 | if self.string in (')', ','): |
|---|
| 583 | if name: |
|---|
| 584 | if self.tokens: |
|---|
| 585 | default_text = ''.join(self.tokens).strip() |
|---|
| 586 | else: |
|---|
| 587 | default_text = None |
|---|
| 588 | parameters[name] = default_text |
|---|
| 589 | self.tokens = [] |
|---|
| 590 | name = None |
|---|
| 591 | default = None |
|---|
| 592 | if self.string == ')': |
|---|
| 593 | break |
|---|
| 594 | elif self.type == token.NAME: |
|---|
| 595 | if name and default: |
|---|
| 596 | self.note_token() |
|---|
| 597 | else: |
|---|
| 598 | assert name is None, ( |
|---|
| 599 | 'token=%r name=%r parameters=%r stack=%r' |
|---|
| 600 | % (self.token, name, parameters, self.stack)) |
|---|
| 601 | name = self.string |
|---|
| 602 | #print >>sys.stderr, 'name=%r' % name |
|---|
| 603 | elif self.string == '=': |
|---|
| 604 | assert name is not None, 'token=%r' % (self.token,) |
|---|
| 605 | assert default is None, 'token=%r' % (self.token,) |
|---|
| 606 | assert self.tokens == [], 'token=%r' % (self.token,) |
|---|
| 607 | default = 1 |
|---|
| 608 | self._type = None |
|---|
| 609 | self._string = None |
|---|
| 610 | self._backquote = 0 |
|---|
| 611 | elif name: |
|---|
| 612 | self.note_token() |
|---|
| 613 | elif self.string == '(': |
|---|
| 614 | parameter_tuple = 1 |
|---|
| 615 | self._type = None |
|---|
| 616 | self._string = None |
|---|
| 617 | self._backquote = 0 |
|---|
| 618 | self.note_token() |
|---|
| 619 | else: # ignore these tokens: |
|---|
| 620 | assert (self.string in ('*', '**', '\n') |
|---|
| 621 | or self.type == tokenize.COMMENT), ( |
|---|
| 622 | 'token=%r' % (self.token,)) |
|---|
| 623 | else: |
|---|
| 624 | self.note_token() |
|---|
| 625 | self.next() |
|---|
| 626 | return parameters |
|---|
| 627 | |
|---|
| 628 | |
|---|
| 629 | def make_docstring(doc, lineno): |
|---|
| 630 | n = pynodes.docstring() |
|---|
| 631 | if lineno: |
|---|
| 632 | # Really, only module docstrings don't have a line |
|---|
| 633 | # (@@: but maybe they should) |
|---|
| 634 | n['lineno'] = lineno |
|---|
| 635 | n.append(Text(doc)) |
|---|
| 636 | return n |
|---|
| 637 | |
|---|
| 638 | def append_docstring(node, doc, lineno): |
|---|
| 639 | if doc: |
|---|
| 640 | node.append(make_docstring(doc, lineno)) |
|---|
| 641 | |
|---|
| 642 | def make_class_section(name, bases, lineno, doc): |
|---|
| 643 | n = pynodes.class_section() |
|---|
| 644 | n['lineno'] = lineno |
|---|
| 645 | n.append(make_object_name(name)) |
|---|
| 646 | for base in bases: |
|---|
| 647 | b = pynodes.class_base() |
|---|
| 648 | b.append(make_object_name(base)) |
|---|
| 649 | n.append(b) |
|---|
| 650 | append_docstring(n, doc, lineno) |
|---|
| 651 | return n |
|---|
| 652 | |
|---|
| 653 | def make_object_name(name): |
|---|
| 654 | n = pynodes.object_name() |
|---|
| 655 | n.append(Text(name)) |
|---|
| 656 | return n |
|---|
| 657 | |
|---|
| 658 | def make_function_like_section(name, lineno, doc, function_class): |
|---|
| 659 | n = function_class() |
|---|
| 660 | n['lineno'] = lineno |
|---|
| 661 | n.append(make_object_name(name)) |
|---|
| 662 | append_docstring(n, doc, lineno) |
|---|
| 663 | return n |
|---|
| 664 | |
|---|
| 665 | def make_import_group(names, lineno, from_name=None): |
|---|
| 666 | n = pynodes.import_group() |
|---|
| 667 | n['lineno'] = lineno |
|---|
| 668 | if from_name: |
|---|
| 669 | n_from = pynodes.import_from() |
|---|
| 670 | n_from.append(Text(from_name)) |
|---|
| 671 | n.append(n_from) |
|---|
| 672 | for name, alias in names: |
|---|
| 673 | n_name = pynodes.import_name() |
|---|
| 674 | n_name.append(Text(name)) |
|---|
| 675 | if alias: |
|---|
| 676 | n_alias = pynodes.import_alias() |
|---|
| 677 | n_alias.append(Text(alias)) |
|---|
| 678 | n_name.append(n_alias) |
|---|
| 679 | n.append(n_name) |
|---|
| 680 | return n |
|---|
| 681 | |
|---|
| 682 | def make_class_attribute(name, lineno): |
|---|
| 683 | n = pynodes.class_attribute() |
|---|
| 684 | n['lineno'] = lineno |
|---|
| 685 | n.append(Text(name)) |
|---|
| 686 | return n |
|---|
| 687 | |
|---|
| 688 | def make_attribute(name, lineno): |
|---|
| 689 | n = pynodes.attribute() |
|---|
| 690 | n['lineno'] = lineno |
|---|
| 691 | n.append(make_object_name(name)) |
|---|
| 692 | return n |
|---|
| 693 | |
|---|
| 694 | def make_parameter(name, excess_keyword=0, excess_positional=0): |
|---|
| 695 | """ |
|---|
| 696 | excess_keyword and excess_positional must be either 1 or 0, and |
|---|
| 697 | not both of them can be 1. |
|---|
| 698 | """ |
|---|
| 699 | n = pynodes.parameter() |
|---|
| 700 | n.append(make_object_name(name)) |
|---|
| 701 | assert not excess_keyword or not excess_positional |
|---|
| 702 | if excess_keyword: |
|---|
| 703 | n['excess_keyword'] = 1 |
|---|
| 704 | if excess_positional: |
|---|
| 705 | n['excess_positional'] = 1 |
|---|
| 706 | return n |
|---|
| 707 | |
|---|
| 708 | def trim_docstring(text): |
|---|
| 709 | """ |
|---|
| 710 | Trim indentation and blank lines from docstring text & return it. |
|---|
| 711 | |
|---|
| 712 | See PEP 257. |
|---|
| 713 | """ |
|---|
| 714 | if not text: |
|---|
| 715 | return text |
|---|
| 716 | # Convert tabs to spaces (following the normal Python rules) |
|---|
| 717 | # and split into a list of lines: |
|---|
| 718 | lines = text.expandtabs().splitlines() |
|---|
| 719 | # Determine minimum indentation (first line doesn't count): |
|---|
| 720 | indent = sys.maxint |
|---|
| 721 | for line in lines[1:]: |
|---|
| 722 | stripped = line.lstrip() |
|---|
| 723 | if stripped: |
|---|
| 724 | indent = min(indent, len(line) - len(stripped)) |
|---|
| 725 | # Remove indentation (first line is special): |
|---|
| 726 | trimmed = [lines[0].strip()] |
|---|
| 727 | if indent < sys.maxint: |
|---|
| 728 | for line in lines[1:]: |
|---|
| 729 | trimmed.append(line[indent:].rstrip()) |
|---|
| 730 | # Strip off trailing and leading blank lines: |
|---|
| 731 | while trimmed and not trimmed[-1]: |
|---|
| 732 | trimmed.pop() |
|---|
| 733 | while trimmed and not trimmed[0]: |
|---|
| 734 | trimmed.pop(0) |
|---|
| 735 | # Return a single string: |
|---|
| 736 | return '\n'.join(trimmed) |
|---|
| 737 | |
|---|
| 738 | def normalize_parameter_name(name): |
|---|
| 739 | """ |
|---|
| 740 | Converts a tuple like ``('a', ('b', 'c'), 'd')`` into ``'(a, (b, c), d)'`` |
|---|
| 741 | """ |
|---|
| 742 | if type(name) is TupleType: |
|---|
| 743 | return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name]) |
|---|
| 744 | else: |
|---|
| 745 | return name |
|---|
| 746 | |
|---|
| 747 | if __name__ == '__main__': |
|---|
| 748 | import sys |
|---|
| 749 | args = sys.argv[1:] |
|---|
| 750 | if args[0] == '-v': |
|---|
| 751 | filename = args[1] |
|---|
| 752 | module_text = open(filename).read() |
|---|
| 753 | ast = compiler.parse(module_text) |
|---|
| 754 | visitor = compiler.visitor.ExampleASTVisitor() |
|---|
| 755 | compiler.walk(ast, visitor, walker=visitor, verbose=1) |
|---|
| 756 | else: |
|---|
| 757 | filename = args[0] |
|---|
| 758 | content = open(filename).read() |
|---|
| 759 | print parse_module(content, filename).pformat() |
|---|
| 760 | |
|---|