[3] | 1 | # Author: David Goodger |
---|
| 2 | # Contact: goodger@users.sourceforge.net |
---|
| 3 | # Revision: $Revision: 4242 $ |
---|
| 4 | # Date: $Date: 2006-01-06 00:28:53 +0100 (Fri, 06 Jan 2006) $ |
---|
| 5 | # Copyright: This module has been placed in the public domain. |
---|
| 6 | |
---|
| 7 | """ |
---|
| 8 | Parser for Python modules. Requires Python 2.2 or higher. |
---|
| 9 | |
---|
| 10 | The `parse_module()` function takes a module's text and file name, |
---|
| 11 | runs it through the module parser (using compiler.py and tokenize.py) |
---|
| 12 | and produces a parse tree of the source code, using the nodes as found |
---|
| 13 | in pynodes.py. For example, given this module (x.py):: |
---|
| 14 | |
---|
| 15 | # comment |
---|
| 16 | |
---|
| 17 | '''Docstring''' |
---|
| 18 | |
---|
| 19 | '''Additional docstring''' |
---|
| 20 | |
---|
| 21 | __docformat__ = 'reStructuredText' |
---|
| 22 | |
---|
| 23 | a = 1 |
---|
| 24 | '''Attribute docstring''' |
---|
| 25 | |
---|
| 26 | class C(Super): |
---|
| 27 | |
---|
| 28 | '''C's docstring''' |
---|
| 29 | |
---|
| 30 | class_attribute = 1 |
---|
| 31 | '''class_attribute's docstring''' |
---|
| 32 | |
---|
| 33 | def __init__(self, text=None): |
---|
| 34 | '''__init__'s docstring''' |
---|
| 35 | |
---|
| 36 | self.instance_attribute = (text * 7 |
---|
| 37 | + ' whaddyaknow') |
---|
| 38 | '''instance_attribute's docstring''' |
---|
| 39 | |
---|
| 40 | |
---|
| 41 | def f(x, # parameter x |
---|
| 42 | y=a*5, # parameter y |
---|
| 43 | *args): # parameter args |
---|
| 44 | '''f's docstring''' |
---|
| 45 | return [x + item for item in args] |
---|
| 46 | |
---|
| 47 | f.function_attribute = 1 |
---|
| 48 | '''f.function_attribute's docstring''' |
---|
| 49 | |
---|
| 50 | The module parser will produce this module documentation tree:: |
---|
| 51 | |
---|
| 52 | <module_section filename="test data"> |
---|
| 53 | <docstring> |
---|
| 54 | Docstring |
---|
| 55 | <docstring lineno="5"> |
---|
| 56 | Additional docstring |
---|
| 57 | <attribute lineno="7"> |
---|
| 58 | <object_name> |
---|
| 59 | __docformat__ |
---|
| 60 | <expression_value lineno="7"> |
---|
| 61 | 'reStructuredText' |
---|
| 62 | <attribute lineno="9"> |
---|
| 63 | <object_name> |
---|
| 64 | a |
---|
| 65 | <expression_value lineno="9"> |
---|
| 66 | 1 |
---|
| 67 | <docstring lineno="10"> |
---|
| 68 | Attribute docstring |
---|
| 69 | <class_section lineno="12"> |
---|
| 70 | <object_name> |
---|
| 71 | C |
---|
| 72 | <class_base> |
---|
| 73 | Super |
---|
| 74 | <docstring lineno="12"> |
---|
| 75 | C's docstring |
---|
| 76 | <attribute lineno="16"> |
---|
| 77 | <object_name> |
---|
| 78 | class_attribute |
---|
| 79 | <expression_value lineno="16"> |
---|
| 80 | 1 |
---|
| 81 | <docstring lineno="17"> |
---|
| 82 | class_attribute's docstring |
---|
| 83 | <method_section lineno="19"> |
---|
| 84 | <object_name> |
---|
| 85 | __init__ |
---|
| 86 | <docstring lineno="19"> |
---|
| 87 | __init__'s docstring |
---|
| 88 | <parameter_list lineno="19"> |
---|
| 89 | <parameter lineno="19"> |
---|
| 90 | <object_name> |
---|
| 91 | self |
---|
| 92 | <parameter lineno="19"> |
---|
| 93 | <object_name> |
---|
| 94 | text |
---|
| 95 | <parameter_default lineno="19"> |
---|
| 96 | None |
---|
| 97 | <attribute lineno="22"> |
---|
| 98 | <object_name> |
---|
| 99 | self.instance_attribute |
---|
| 100 | <expression_value lineno="22"> |
---|
| 101 | (text * 7 + ' whaddyaknow') |
---|
| 102 | <docstring lineno="24"> |
---|
| 103 | instance_attribute's docstring |
---|
| 104 | <function_section lineno="27"> |
---|
| 105 | <object_name> |
---|
| 106 | f |
---|
| 107 | <docstring lineno="27"> |
---|
| 108 | f's docstring |
---|
| 109 | <parameter_list lineno="27"> |
---|
| 110 | <parameter lineno="27"> |
---|
| 111 | <object_name> |
---|
| 112 | x |
---|
| 113 | <comment> |
---|
| 114 | # parameter x |
---|
| 115 | <parameter lineno="27"> |
---|
| 116 | <object_name> |
---|
| 117 | y |
---|
| 118 | <parameter_default lineno="27"> |
---|
| 119 | a * 5 |
---|
| 120 | <comment> |
---|
| 121 | # parameter y |
---|
| 122 | <parameter excess_positional="1" lineno="27"> |
---|
| 123 | <object_name> |
---|
| 124 | args |
---|
| 125 | <comment> |
---|
| 126 | # parameter args |
---|
| 127 | <attribute lineno="33"> |
---|
| 128 | <object_name> |
---|
| 129 | f.function_attribute |
---|
| 130 | <expression_value lineno="33"> |
---|
| 131 | 1 |
---|
| 132 | <docstring lineno="34"> |
---|
| 133 | f.function_attribute's docstring |
---|
| 134 | |
---|
| 135 | (Comments are not implemented yet.) |
---|
| 136 | |
---|
| 137 | compiler.parse() provides most of what's needed for this doctree, and |
---|
| 138 | "tokenize" can be used to get the rest. We can determine the line |
---|
| 139 | number from the compiler.parse() AST, and the TokenParser.rhs(lineno) |
---|
| 140 | method provides the rest. |
---|
| 141 | |
---|
| 142 | The Docutils Python reader component will transform this module doctree into a |
---|
| 143 | Python-specific Docutils doctree, and then a "stylist transform" will |
---|
| 144 | further transform it into a generic doctree. Namespaces will have to be |
---|
| 145 | compiled for each of the scopes, but I'm not certain at what stage of |
---|
| 146 | processing. |
---|
| 147 | |
---|
| 148 | It's very important to keep all docstring processing out of this, so that it's |
---|
| 149 | a completely generic and not tool-specific. |
---|
| 150 | |
---|
| 151 | :: |
---|
| 152 | |
---|
| 153 | > Why perform all of those transformations? Why not go from the AST to a |
---|
| 154 | > generic doctree? Or, even from the AST to the final output? |
---|
| 155 | |
---|
| 156 | I want the docutils.readers.python.moduleparser.parse_module() function to |
---|
| 157 | produce a standard documentation-oriented tree that can be used by any tool. |
---|
| 158 | We can develop it together without having to compromise on the rest of our |
---|
| 159 | design (i.e., HappyDoc doesn't have to be made to work like Docutils, and |
---|
| 160 | vice-versa). It would be a higher-level version of what compiler.py provides. |
---|
| 161 | |
---|
| 162 | The Python reader component transforms this generic AST into a Python-specific |
---|
| 163 | doctree (it knows about modules, classes, functions, etc.), but this is |
---|
| 164 | specific to Docutils and cannot be used by HappyDoc or others. The stylist |
---|
| 165 | transform does the final layout, converting Python-specific structures |
---|
| 166 | ("class" sections, etc.) into a generic doctree using primitives (tables, |
---|
| 167 | sections, lists, etc.). This generic doctree does *not* know about Python |
---|
| 168 | structures any more. The advantage is that this doctree can be handed off to |
---|
| 169 | any of the output writers to create any output format we like. |
---|
| 170 | |
---|
| 171 | The latter two transforms are separate because I want to be able to have |
---|
| 172 | multiple independent layout styles (multiple runtime-selectable "stylist |
---|
| 173 | transforms"). Each of the existing tools (HappyDoc, pydoc, epydoc, Crystal, |
---|
| 174 | etc.) has its own fixed format. I personally don't like the tables-based |
---|
| 175 | format produced by these tools, and I'd like to be able to customize the |
---|
| 176 | format easily. That's the goal of stylist transforms, which are independent |
---|
| 177 | from the Reader component itself. One stylist transform could produce |
---|
| 178 | HappyDoc-like output, another could produce output similar to module docs in |
---|
| 179 | the Python library reference manual, and so on. |
---|
| 180 | |
---|
| 181 | It's for exactly this reason:: |
---|
| 182 | |
---|
| 183 | >> It's very important to keep all docstring processing out of this, so that |
---|
| 184 | >> it's a completely generic and not tool-specific. |
---|
| 185 | |
---|
| 186 | ... but it goes past docstring processing. It's also important to keep style |
---|
| 187 | decisions and tool-specific data transforms out of this module parser. |
---|
| 188 | |
---|
| 189 | |
---|
| 190 | Issues |
---|
| 191 | ====== |
---|
| 192 | |
---|
| 193 | * At what point should namespaces be computed? Should they be part of the |
---|
| 194 | basic AST produced by the ASTVisitor walk, or generated by another tree |
---|
| 195 | traversal? |
---|
| 196 | |
---|
| 197 | * At what point should a distinction be made between local variables & |
---|
| 198 | instance attributes in __init__ methods? |
---|
| 199 | |
---|
| 200 | * Docstrings are getting their lineno from their parents. Should the |
---|
| 201 | TokenParser find the real line no's? |
---|
| 202 | |
---|
| 203 | * Comments: include them? How and when? Only full-line comments, or |
---|
| 204 | parameter comments too? (See function "f" above for an example.) |
---|
| 205 | |
---|
| 206 | * Module could use more docstrings & refactoring in places. |
---|
| 207 | |
---|
| 208 | """ |
---|
| 209 | |
---|
| 210 | __docformat__ = 'reStructuredText' |
---|
| 211 | |
---|
| 212 | import sys |
---|
| 213 | import compiler |
---|
| 214 | import compiler.ast |
---|
| 215 | import tokenize |
---|
| 216 | import token |
---|
| 217 | from compiler.consts import OP_ASSIGN |
---|
| 218 | from compiler.visitor import ASTVisitor |
---|
| 219 | from types import StringType, UnicodeType, TupleType |
---|
| 220 | from docutils.readers.python import pynodes |
---|
| 221 | from docutils.nodes import Text |
---|
| 222 | |
---|
| 223 | |
---|
| 224 | def parse_module(module_text, filename): |
---|
| 225 | """Return a module documentation tree from `module_text`.""" |
---|
| 226 | ast = compiler.parse(module_text) |
---|
| 227 | token_parser = TokenParser(module_text) |
---|
| 228 | visitor = ModuleVisitor(filename, token_parser) |
---|
| 229 | compiler.walk(ast, visitor, walker=visitor) |
---|
| 230 | return visitor.module |
---|
| 231 | |
---|
| 232 | class BaseVisitor(ASTVisitor): |
---|
| 233 | |
---|
| 234 | def __init__(self, token_parser): |
---|
| 235 | ASTVisitor.__init__(self) |
---|
| 236 | self.token_parser = token_parser |
---|
| 237 | self.context = [] |
---|
| 238 | self.documentable = None |
---|
| 239 | |
---|
| 240 | def default(self, node, *args): |
---|
| 241 | self.documentable = None |
---|
| 242 | #print 'in default (%s)' % node.__class__.__name__ |
---|
| 243 | #ASTVisitor.default(self, node, *args) |
---|
| 244 | |
---|
| 245 | def default_visit(self, node, *args): |
---|
| 246 | #print 'in default_visit (%s)' % node.__class__.__name__ |
---|
| 247 | ASTVisitor.default(self, node, *args) |
---|
| 248 | |
---|
| 249 | |
---|
| 250 | class DocstringVisitor(BaseVisitor): |
---|
| 251 | |
---|
| 252 | def visitDiscard(self, node): |
---|
| 253 | if self.documentable: |
---|
| 254 | self.visit(node.expr) |
---|
| 255 | |
---|
| 256 | def visitConst(self, node): |
---|
| 257 | if self.documentable: |
---|
| 258 | if type(node.value) in (StringType, UnicodeType): |
---|
| 259 | self.documentable.append(make_docstring(node.value, node.lineno)) |
---|
| 260 | else: |
---|
| 261 | self.documentable = None |
---|
| 262 | |
---|
| 263 | def visitStmt(self, node): |
---|
| 264 | self.default_visit(node) |
---|
| 265 | |
---|
| 266 | |
---|
| 267 | class AssignmentVisitor(DocstringVisitor): |
---|
| 268 | |
---|
| 269 | def visitAssign(self, node): |
---|
| 270 | visitor = AttributeVisitor(self.token_parser) |
---|
| 271 | compiler.walk(node, visitor, walker=visitor) |
---|
| 272 | if visitor.attributes: |
---|
| 273 | self.context[-1].extend(visitor.attributes) |
---|
| 274 | if len(visitor.attributes) == 1: |
---|
| 275 | self.documentable = visitor.attributes[0] |
---|
| 276 | else: |
---|
| 277 | self.documentable = None |
---|
| 278 | |
---|
| 279 | |
---|
| 280 | class ModuleVisitor(AssignmentVisitor): |
---|
| 281 | |
---|
| 282 | def __init__(self, filename, token_parser): |
---|
| 283 | AssignmentVisitor.__init__(self, token_parser) |
---|
| 284 | self.filename = filename |
---|
| 285 | self.module = None |
---|
| 286 | |
---|
| 287 | def visitModule(self, node): |
---|
| 288 | self.module = module = pynodes.module_section() |
---|
| 289 | module['filename'] = self.filename |
---|
| 290 | append_docstring(module, node.doc, node.lineno) |
---|
| 291 | self.context.append(module) |
---|
| 292 | self.documentable = module |
---|
| 293 | self.visit(node.node) |
---|
| 294 | self.context.pop() |
---|
| 295 | |
---|
| 296 | def visitImport(self, node): |
---|
| 297 | self.context[-1] += make_import_group(names=node.names, |
---|
| 298 | lineno=node.lineno) |
---|
| 299 | self.documentable = None |
---|
| 300 | |
---|
| 301 | def visitFrom(self, node): |
---|
| 302 | self.context[-1].append( |
---|
| 303 | make_import_group(names=node.names, from_name=node.modname, |
---|
| 304 | lineno=node.lineno)) |
---|
| 305 | self.documentable = None |
---|
| 306 | |
---|
| 307 | def visitFunction(self, node): |
---|
| 308 | visitor = FunctionVisitor(self.token_parser, |
---|
| 309 | function_class=pynodes.function_section) |
---|
| 310 | compiler.walk(node, visitor, walker=visitor) |
---|
| 311 | self.context[-1].append(visitor.function) |
---|
| 312 | |
---|
| 313 | def visitClass(self, node): |
---|
| 314 | visitor = ClassVisitor(self.token_parser) |
---|
| 315 | compiler.walk(node, visitor, walker=visitor) |
---|
| 316 | self.context[-1].append(visitor.klass) |
---|
| 317 | |
---|
| 318 | |
---|
| 319 | class AttributeVisitor(BaseVisitor): |
---|
| 320 | |
---|
| 321 | def __init__(self, token_parser): |
---|
| 322 | BaseVisitor.__init__(self, token_parser) |
---|
| 323 | self.attributes = pynodes.class_attribute_section() |
---|
| 324 | |
---|
| 325 | def visitAssign(self, node): |
---|
| 326 | # Don't visit the expression itself, just the attribute nodes: |
---|
| 327 | for child in node.nodes: |
---|
| 328 | self.dispatch(child) |
---|
| 329 | expression_text = self.token_parser.rhs(node.lineno) |
---|
| 330 | expression = pynodes.expression_value() |
---|
| 331 | expression.append(Text(expression_text)) |
---|
| 332 | for attribute in self.attributes: |
---|
| 333 | attribute.append(expression) |
---|
| 334 | |
---|
| 335 | def visitAssName(self, node): |
---|
| 336 | self.attributes.append(make_attribute(node.name, |
---|
| 337 | lineno=node.lineno)) |
---|
| 338 | |
---|
| 339 | def visitAssTuple(self, node): |
---|
| 340 | attributes = self.attributes |
---|
| 341 | self.attributes = [] |
---|
| 342 | self.default_visit(node) |
---|
| 343 | n = pynodes.attribute_tuple() |
---|
| 344 | n.extend(self.attributes) |
---|
| 345 | n['lineno'] = self.attributes[0]['lineno'] |
---|
| 346 | attributes.append(n) |
---|
| 347 | self.attributes = attributes |
---|
| 348 | #self.attributes.append(att_tuple) |
---|
| 349 | |
---|
| 350 | def visitAssAttr(self, node): |
---|
| 351 | self.default_visit(node, node.attrname) |
---|
| 352 | |
---|
| 353 | def visitGetattr(self, node, suffix): |
---|
| 354 | self.default_visit(node, node.attrname + '.' + suffix) |
---|
| 355 | |
---|
| 356 | def visitName(self, node, suffix): |
---|
| 357 | self.attributes.append(make_attribute(node.name + '.' + suffix, |
---|
| 358 | lineno=node.lineno)) |
---|
| 359 | |
---|
| 360 | |
---|
| 361 | class FunctionVisitor(DocstringVisitor): |
---|
| 362 | |
---|
| 363 | in_function = 0 |
---|
| 364 | |
---|
| 365 | def __init__(self, token_parser, function_class): |
---|
| 366 | DocstringVisitor.__init__(self, token_parser) |
---|
| 367 | self.function_class = function_class |
---|
| 368 | |
---|
| 369 | def visitFunction(self, node): |
---|
| 370 | if self.in_function: |
---|
| 371 | self.documentable = None |
---|
| 372 | # Don't bother with nested function definitions. |
---|
| 373 | return |
---|
| 374 | self.in_function = 1 |
---|
| 375 | self.function = function = make_function_like_section( |
---|
| 376 | name=node.name, |
---|
| 377 | lineno=node.lineno, |
---|
| 378 | doc=node.doc, |
---|
| 379 | function_class=self.function_class) |
---|
| 380 | self.context.append(function) |
---|
| 381 | self.documentable = function |
---|
| 382 | self.parse_parameter_list(node) |
---|
| 383 | self.visit(node.code) |
---|
| 384 | self.context.pop() |
---|
| 385 | |
---|
| 386 | def parse_parameter_list(self, node): |
---|
| 387 | parameters = [] |
---|
| 388 | special = [] |
---|
| 389 | argnames = list(node.argnames) |
---|
| 390 | if node.kwargs: |
---|
| 391 | special.append(make_parameter(argnames[-1], excess_keyword=1)) |
---|
| 392 | argnames.pop() |
---|
| 393 | if node.varargs: |
---|
| 394 | special.append(make_parameter(argnames[-1], |
---|
| 395 | excess_positional=1)) |
---|
| 396 | argnames.pop() |
---|
| 397 | defaults = list(node.defaults) |
---|
| 398 | defaults = [None] * (len(argnames) - len(defaults)) + defaults |
---|
| 399 | function_parameters = self.token_parser.function_parameters( |
---|
| 400 | node.lineno) |
---|
| 401 | #print >>sys.stderr, function_parameters |
---|
| 402 | for argname, default in zip(argnames, defaults): |
---|
| 403 | if type(argname) is TupleType: |
---|
| 404 | parameter = pynodes.parameter_tuple() |
---|
| 405 | for tuplearg in argname: |
---|
| 406 | parameter.append(make_parameter(tuplearg)) |
---|
| 407 | argname = normalize_parameter_name(argname) |
---|
| 408 | else: |
---|
| 409 | parameter = make_parameter(argname) |
---|
| 410 | if default: |
---|
| 411 | n_default = pynodes.parameter_default() |
---|
| 412 | n_default.append(Text(function_parameters[argname])) |
---|
| 413 | parameter.append(n_default) |
---|
| 414 | parameters.append(parameter) |
---|
| 415 | if parameters or special: |
---|
| 416 | special.reverse() |
---|
| 417 | parameters.extend(special) |
---|
| 418 | parameter_list = pynodes.parameter_list() |
---|
| 419 | parameter_list.extend(parameters) |
---|
| 420 | self.function.append(parameter_list) |
---|
| 421 | |
---|
| 422 | |
---|
| 423 | class ClassVisitor(AssignmentVisitor): |
---|
| 424 | |
---|
| 425 | in_class = 0 |
---|
| 426 | |
---|
| 427 | def __init__(self, token_parser): |
---|
| 428 | AssignmentVisitor.__init__(self, token_parser) |
---|
| 429 | self.bases = [] |
---|
| 430 | |
---|
| 431 | def visitClass(self, node): |
---|
| 432 | if self.in_class: |
---|
| 433 | self.documentable = None |
---|
| 434 | # Don't bother with nested class definitions. |
---|
| 435 | return |
---|
| 436 | self.in_class = 1 |
---|
| 437 | #import mypdb as pdb |
---|
| 438 | #pdb.set_trace() |
---|
| 439 | for base in node.bases: |
---|
| 440 | self.visit(base) |
---|
| 441 | self.klass = klass = make_class_section(node.name, self.bases, |
---|
| 442 | doc=node.doc, |
---|
| 443 | lineno=node.lineno) |
---|
| 444 | self.context.append(klass) |
---|
| 445 | self.documentable = klass |
---|
| 446 | self.visit(node.code) |
---|
| 447 | self.context.pop() |
---|
| 448 | |
---|
| 449 | def visitGetattr(self, node, suffix=None): |
---|
| 450 | if suffix: |
---|
| 451 | name = node.attrname + '.' + suffix |
---|
| 452 | else: |
---|
| 453 | name = node.attrname |
---|
| 454 | self.default_visit(node, name) |
---|
| 455 | |
---|
| 456 | def visitName(self, node, suffix=None): |
---|
| 457 | if suffix: |
---|
| 458 | name = node.name + '.' + suffix |
---|
| 459 | else: |
---|
| 460 | name = node.name |
---|
| 461 | self.bases.append(name) |
---|
| 462 | |
---|
| 463 | def visitFunction(self, node): |
---|
| 464 | if node.name == '__init__': |
---|
| 465 | visitor = InitMethodVisitor(self.token_parser, |
---|
| 466 | function_class=pynodes.method_section) |
---|
| 467 | compiler.walk(node, visitor, walker=visitor) |
---|
| 468 | else: |
---|
| 469 | visitor = FunctionVisitor(self.token_parser, |
---|
| 470 | function_class=pynodes.method_section) |
---|
| 471 | compiler.walk(node, visitor, walker=visitor) |
---|
| 472 | self.context[-1].append(visitor.function) |
---|
| 473 | |
---|
| 474 | |
---|
| 475 | class InitMethodVisitor(FunctionVisitor, AssignmentVisitor): pass |
---|
| 476 | |
---|
| 477 | |
---|
| 478 | class TokenParser: |
---|
| 479 | |
---|
| 480 | def __init__(self, text): |
---|
| 481 | self.text = text + '\n\n' |
---|
| 482 | self.lines = self.text.splitlines(1) |
---|
| 483 | self.generator = tokenize.generate_tokens(iter(self.lines).next) |
---|
| 484 | self.next() |
---|
| 485 | |
---|
| 486 | def __iter__(self): |
---|
| 487 | return self |
---|
| 488 | |
---|
| 489 | def next(self): |
---|
| 490 | self.token = self.generator.next() |
---|
| 491 | self.type, self.string, self.start, self.end, self.line = self.token |
---|
| 492 | return self.token |
---|
| 493 | |
---|
| 494 | def goto_line(self, lineno): |
---|
| 495 | while self.start[0] < lineno: |
---|
| 496 | self.next() |
---|
| 497 | return token |
---|
| 498 | |
---|
| 499 | def rhs(self, lineno): |
---|
| 500 | """ |
---|
| 501 | Return a whitespace-normalized expression string from the right-hand |
---|
| 502 | side of an assignment at line `lineno`. |
---|
| 503 | """ |
---|
| 504 | self.goto_line(lineno) |
---|
| 505 | while self.string != '=': |
---|
| 506 | self.next() |
---|
| 507 | self.stack = None |
---|
| 508 | while self.type != token.NEWLINE and self.string != ';': |
---|
| 509 | if self.string == '=' and not self.stack: |
---|
| 510 | self.tokens = [] |
---|
| 511 | self.stack = [] |
---|
| 512 | self._type = None |
---|
| 513 | self._string = None |
---|
| 514 | self._backquote = 0 |
---|
| 515 | else: |
---|
| 516 | self.note_token() |
---|
| 517 | self.next() |
---|
| 518 | self.next() |
---|
| 519 | text = ''.join(self.tokens) |
---|
| 520 | return text.strip() |
---|
| 521 | |
---|
| 522 | closers = {')': '(', ']': '[', '}': '{'} |
---|
| 523 | openers = {'(': 1, '[': 1, '{': 1} |
---|
| 524 | del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1} |
---|
| 525 | no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1} |
---|
| 526 | |
---|
| 527 | def note_token(self): |
---|
| 528 | if self.type == tokenize.NL: |
---|
| 529 | return |
---|
| 530 | del_ws = self.del_ws_prefix.has_key(self.string) |
---|
| 531 | append_ws = not self.no_ws_suffix.has_key(self.string) |
---|
| 532 | if self.openers.has_key(self.string): |
---|
| 533 | self.stack.append(self.string) |
---|
| 534 | if (self._type == token.NAME |
---|
| 535 | or self.closers.has_key(self._string)): |
---|
| 536 | del_ws = 1 |
---|
| 537 | elif self.closers.has_key(self.string): |
---|
| 538 | assert self.stack[-1] == self.closers[self.string] |
---|
| 539 | self.stack.pop() |
---|
| 540 | elif self.string == '`': |
---|
| 541 | if self._backquote: |
---|
| 542 | del_ws = 1 |
---|
| 543 | assert self.stack[-1] == '`' |
---|
| 544 | self.stack.pop() |
---|
| 545 | else: |
---|
| 546 | append_ws = 0 |
---|
| 547 | self.stack.append('`') |
---|
| 548 | self._backquote = not self._backquote |
---|
| 549 | if del_ws and self.tokens and self.tokens[-1] == ' ': |
---|
| 550 | del self.tokens[-1] |
---|
| 551 | self.tokens.append(self.string) |
---|
| 552 | self._type = self.type |
---|
| 553 | self._string = self.string |
---|
| 554 | if append_ws: |
---|
| 555 | self.tokens.append(' ') |
---|
| 556 | |
---|
| 557 | def function_parameters(self, lineno): |
---|
| 558 | """ |
---|
| 559 | Return a dictionary mapping parameters to defaults |
---|
| 560 | (whitespace-normalized strings). |
---|
| 561 | """ |
---|
| 562 | self.goto_line(lineno) |
---|
| 563 | while self.string != 'def': |
---|
| 564 | self.next() |
---|
| 565 | while self.string != '(': |
---|
| 566 | self.next() |
---|
| 567 | name = None |
---|
| 568 | default = None |
---|
| 569 | parameter_tuple = None |
---|
| 570 | self.tokens = [] |
---|
| 571 | parameters = {} |
---|
| 572 | self.stack = [self.string] |
---|
| 573 | self.next() |
---|
| 574 | while 1: |
---|
| 575 | if len(self.stack) == 1: |
---|
| 576 | if parameter_tuple: |
---|
| 577 | # Just encountered ")". |
---|
| 578 | #print >>sys.stderr, 'parameter_tuple: %r' % self.tokens |
---|
| 579 | name = ''.join(self.tokens).strip() |
---|
| 580 | self.tokens = [] |
---|
| 581 | parameter_tuple = None |
---|
| 582 | if self.string in (')', ','): |
---|
| 583 | if name: |
---|
| 584 | if self.tokens: |
---|
| 585 | default_text = ''.join(self.tokens).strip() |
---|
| 586 | else: |
---|
| 587 | default_text = None |
---|
| 588 | parameters[name] = default_text |
---|
| 589 | self.tokens = [] |
---|
| 590 | name = None |
---|
| 591 | default = None |
---|
| 592 | if self.string == ')': |
---|
| 593 | break |
---|
| 594 | elif self.type == token.NAME: |
---|
| 595 | if name and default: |
---|
| 596 | self.note_token() |
---|
| 597 | else: |
---|
| 598 | assert name is None, ( |
---|
| 599 | 'token=%r name=%r parameters=%r stack=%r' |
---|
| 600 | % (self.token, name, parameters, self.stack)) |
---|
| 601 | name = self.string |
---|
| 602 | #print >>sys.stderr, 'name=%r' % name |
---|
| 603 | elif self.string == '=': |
---|
| 604 | assert name is not None, 'token=%r' % (self.token,) |
---|
| 605 | assert default is None, 'token=%r' % (self.token,) |
---|
| 606 | assert self.tokens == [], 'token=%r' % (self.token,) |
---|
| 607 | default = 1 |
---|
| 608 | self._type = None |
---|
| 609 | self._string = None |
---|
| 610 | self._backquote = 0 |
---|
| 611 | elif name: |
---|
| 612 | self.note_token() |
---|
| 613 | elif self.string == '(': |
---|
| 614 | parameter_tuple = 1 |
---|
| 615 | self._type = None |
---|
| 616 | self._string = None |
---|
| 617 | self._backquote = 0 |
---|
| 618 | self.note_token() |
---|
| 619 | else: # ignore these tokens: |
---|
| 620 | assert (self.string in ('*', '**', '\n') |
---|
| 621 | or self.type == tokenize.COMMENT), ( |
---|
| 622 | 'token=%r' % (self.token,)) |
---|
| 623 | else: |
---|
| 624 | self.note_token() |
---|
| 625 | self.next() |
---|
| 626 | return parameters |
---|
| 627 | |
---|
| 628 | |
---|
| 629 | def make_docstring(doc, lineno): |
---|
| 630 | n = pynodes.docstring() |
---|
| 631 | if lineno: |
---|
| 632 | # Really, only module docstrings don't have a line |
---|
| 633 | # (@@: but maybe they should) |
---|
| 634 | n['lineno'] = lineno |
---|
| 635 | n.append(Text(doc)) |
---|
| 636 | return n |
---|
| 637 | |
---|
| 638 | def append_docstring(node, doc, lineno): |
---|
| 639 | if doc: |
---|
| 640 | node.append(make_docstring(doc, lineno)) |
---|
| 641 | |
---|
| 642 | def make_class_section(name, bases, lineno, doc): |
---|
| 643 | n = pynodes.class_section() |
---|
| 644 | n['lineno'] = lineno |
---|
| 645 | n.append(make_object_name(name)) |
---|
| 646 | for base in bases: |
---|
| 647 | b = pynodes.class_base() |
---|
| 648 | b.append(make_object_name(base)) |
---|
| 649 | n.append(b) |
---|
| 650 | append_docstring(n, doc, lineno) |
---|
| 651 | return n |
---|
| 652 | |
---|
| 653 | def make_object_name(name): |
---|
| 654 | n = pynodes.object_name() |
---|
| 655 | n.append(Text(name)) |
---|
| 656 | return n |
---|
| 657 | |
---|
| 658 | def make_function_like_section(name, lineno, doc, function_class): |
---|
| 659 | n = function_class() |
---|
| 660 | n['lineno'] = lineno |
---|
| 661 | n.append(make_object_name(name)) |
---|
| 662 | append_docstring(n, doc, lineno) |
---|
| 663 | return n |
---|
| 664 | |
---|
| 665 | def make_import_group(names, lineno, from_name=None): |
---|
| 666 | n = pynodes.import_group() |
---|
| 667 | n['lineno'] = lineno |
---|
| 668 | if from_name: |
---|
| 669 | n_from = pynodes.import_from() |
---|
| 670 | n_from.append(Text(from_name)) |
---|
| 671 | n.append(n_from) |
---|
| 672 | for name, alias in names: |
---|
| 673 | n_name = pynodes.import_name() |
---|
| 674 | n_name.append(Text(name)) |
---|
| 675 | if alias: |
---|
| 676 | n_alias = pynodes.import_alias() |
---|
| 677 | n_alias.append(Text(alias)) |
---|
| 678 | n_name.append(n_alias) |
---|
| 679 | n.append(n_name) |
---|
| 680 | return n |
---|
| 681 | |
---|
| 682 | def make_class_attribute(name, lineno): |
---|
| 683 | n = pynodes.class_attribute() |
---|
| 684 | n['lineno'] = lineno |
---|
| 685 | n.append(Text(name)) |
---|
| 686 | return n |
---|
| 687 | |
---|
| 688 | def make_attribute(name, lineno): |
---|
| 689 | n = pynodes.attribute() |
---|
| 690 | n['lineno'] = lineno |
---|
| 691 | n.append(make_object_name(name)) |
---|
| 692 | return n |
---|
| 693 | |
---|
| 694 | def make_parameter(name, excess_keyword=0, excess_positional=0): |
---|
| 695 | """ |
---|
| 696 | excess_keyword and excess_positional must be either 1 or 0, and |
---|
| 697 | not both of them can be 1. |
---|
| 698 | """ |
---|
| 699 | n = pynodes.parameter() |
---|
| 700 | n.append(make_object_name(name)) |
---|
| 701 | assert not excess_keyword or not excess_positional |
---|
| 702 | if excess_keyword: |
---|
| 703 | n['excess_keyword'] = 1 |
---|
| 704 | if excess_positional: |
---|
| 705 | n['excess_positional'] = 1 |
---|
| 706 | return n |
---|
| 707 | |
---|
| 708 | def trim_docstring(text): |
---|
| 709 | """ |
---|
| 710 | Trim indentation and blank lines from docstring text & return it. |
---|
| 711 | |
---|
| 712 | See PEP 257. |
---|
| 713 | """ |
---|
| 714 | if not text: |
---|
| 715 | return text |
---|
| 716 | # Convert tabs to spaces (following the normal Python rules) |
---|
| 717 | # and split into a list of lines: |
---|
| 718 | lines = text.expandtabs().splitlines() |
---|
| 719 | # Determine minimum indentation (first line doesn't count): |
---|
| 720 | indent = sys.maxint |
---|
| 721 | for line in lines[1:]: |
---|
| 722 | stripped = line.lstrip() |
---|
| 723 | if stripped: |
---|
| 724 | indent = min(indent, len(line) - len(stripped)) |
---|
| 725 | # Remove indentation (first line is special): |
---|
| 726 | trimmed = [lines[0].strip()] |
---|
| 727 | if indent < sys.maxint: |
---|
| 728 | for line in lines[1:]: |
---|
| 729 | trimmed.append(line[indent:].rstrip()) |
---|
| 730 | # Strip off trailing and leading blank lines: |
---|
| 731 | while trimmed and not trimmed[-1]: |
---|
| 732 | trimmed.pop() |
---|
| 733 | while trimmed and not trimmed[0]: |
---|
| 734 | trimmed.pop(0) |
---|
| 735 | # Return a single string: |
---|
| 736 | return '\n'.join(trimmed) |
---|
| 737 | |
---|
| 738 | def normalize_parameter_name(name): |
---|
| 739 | """ |
---|
| 740 | Converts a tuple like ``('a', ('b', 'c'), 'd')`` into ``'(a, (b, c), d)'`` |
---|
| 741 | """ |
---|
| 742 | if type(name) is TupleType: |
---|
| 743 | return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name]) |
---|
| 744 | else: |
---|
| 745 | return name |
---|
| 746 | |
---|
| 747 | if __name__ == '__main__': |
---|
| 748 | import sys |
---|
| 749 | args = sys.argv[1:] |
---|
| 750 | if args[0] == '-v': |
---|
| 751 | filename = args[1] |
---|
| 752 | module_text = open(filename).read() |
---|
| 753 | ast = compiler.parse(module_text) |
---|
| 754 | visitor = compiler.visitor.ExampleASTVisitor() |
---|
| 755 | compiler.walk(ast, visitor, walker=visitor, verbose=1) |
---|
| 756 | else: |
---|
| 757 | filename = args[0] |
---|
| 758 | content = open(filename).read() |
---|
| 759 | print parse_module(content, filename).pformat() |
---|
| 760 | |
---|