1 | # Author: David Goodger |
---|
2 | # Contact: goodger@users.sourceforge.net |
---|
3 | # Revision: $Revision: 4242 $ |
---|
4 | # Date: $Date: 2006-01-06 00:28:53 +0100 (Fri, 06 Jan 2006) $ |
---|
5 | # Copyright: This module has been placed in the public domain. |
---|
6 | |
---|
7 | """ |
---|
8 | Parser for Python modules. Requires Python 2.2 or higher. |
---|
9 | |
---|
10 | The `parse_module()` function takes a module's text and file name, |
---|
11 | runs it through the module parser (using compiler.py and tokenize.py) |
---|
12 | and produces a parse tree of the source code, using the nodes as found |
---|
13 | in pynodes.py. For example, given this module (x.py):: |
---|
14 | |
---|
15 | # comment |
---|
16 | |
---|
17 | '''Docstring''' |
---|
18 | |
---|
19 | '''Additional docstring''' |
---|
20 | |
---|
21 | __docformat__ = 'reStructuredText' |
---|
22 | |
---|
23 | a = 1 |
---|
24 | '''Attribute docstring''' |
---|
25 | |
---|
26 | class C(Super): |
---|
27 | |
---|
28 | '''C's docstring''' |
---|
29 | |
---|
30 | class_attribute = 1 |
---|
31 | '''class_attribute's docstring''' |
---|
32 | |
---|
33 | def __init__(self, text=None): |
---|
34 | '''__init__'s docstring''' |
---|
35 | |
---|
36 | self.instance_attribute = (text * 7 |
---|
37 | + ' whaddyaknow') |
---|
38 | '''instance_attribute's docstring''' |
---|
39 | |
---|
40 | |
---|
41 | def f(x, # parameter x |
---|
42 | y=a*5, # parameter y |
---|
43 | *args): # parameter args |
---|
44 | '''f's docstring''' |
---|
45 | return [x + item for item in args] |
---|
46 | |
---|
47 | f.function_attribute = 1 |
---|
48 | '''f.function_attribute's docstring''' |
---|
49 | |
---|
50 | The module parser will produce this module documentation tree:: |
---|
51 | |
---|
52 | <module_section filename="test data"> |
---|
53 | <docstring> |
---|
54 | Docstring |
---|
55 | <docstring lineno="5"> |
---|
56 | Additional docstring |
---|
57 | <attribute lineno="7"> |
---|
58 | <object_name> |
---|
59 | __docformat__ |
---|
60 | <expression_value lineno="7"> |
---|
61 | 'reStructuredText' |
---|
62 | <attribute lineno="9"> |
---|
63 | <object_name> |
---|
64 | a |
---|
65 | <expression_value lineno="9"> |
---|
66 | 1 |
---|
67 | <docstring lineno="10"> |
---|
68 | Attribute docstring |
---|
69 | <class_section lineno="12"> |
---|
70 | <object_name> |
---|
71 | C |
---|
72 | <class_base> |
---|
73 | Super |
---|
74 | <docstring lineno="12"> |
---|
75 | C's docstring |
---|
76 | <attribute lineno="16"> |
---|
77 | <object_name> |
---|
78 | class_attribute |
---|
79 | <expression_value lineno="16"> |
---|
80 | 1 |
---|
81 | <docstring lineno="17"> |
---|
82 | class_attribute's docstring |
---|
83 | <method_section lineno="19"> |
---|
84 | <object_name> |
---|
85 | __init__ |
---|
86 | <docstring lineno="19"> |
---|
87 | __init__'s docstring |
---|
88 | <parameter_list lineno="19"> |
---|
89 | <parameter lineno="19"> |
---|
90 | <object_name> |
---|
91 | self |
---|
92 | <parameter lineno="19"> |
---|
93 | <object_name> |
---|
94 | text |
---|
95 | <parameter_default lineno="19"> |
---|
96 | None |
---|
97 | <attribute lineno="22"> |
---|
98 | <object_name> |
---|
99 | self.instance_attribute |
---|
100 | <expression_value lineno="22"> |
---|
101 | (text * 7 + ' whaddyaknow') |
---|
102 | <docstring lineno="24"> |
---|
103 | instance_attribute's docstring |
---|
104 | <function_section lineno="27"> |
---|
105 | <object_name> |
---|
106 | f |
---|
107 | <docstring lineno="27"> |
---|
108 | f's docstring |
---|
109 | <parameter_list lineno="27"> |
---|
110 | <parameter lineno="27"> |
---|
111 | <object_name> |
---|
112 | x |
---|
113 | <comment> |
---|
114 | # parameter x |
---|
115 | <parameter lineno="27"> |
---|
116 | <object_name> |
---|
117 | y |
---|
118 | <parameter_default lineno="27"> |
---|
119 | a * 5 |
---|
120 | <comment> |
---|
121 | # parameter y |
---|
122 | <parameter excess_positional="1" lineno="27"> |
---|
123 | <object_name> |
---|
124 | args |
---|
125 | <comment> |
---|
126 | # parameter args |
---|
127 | <attribute lineno="33"> |
---|
128 | <object_name> |
---|
129 | f.function_attribute |
---|
130 | <expression_value lineno="33"> |
---|
131 | 1 |
---|
132 | <docstring lineno="34"> |
---|
133 | f.function_attribute's docstring |
---|
134 | |
---|
135 | (Comments are not implemented yet.) |
---|
136 | |
---|
137 | compiler.parse() provides most of what's needed for this doctree, and |
---|
138 | "tokenize" can be used to get the rest. We can determine the line |
---|
139 | number from the compiler.parse() AST, and the TokenParser.rhs(lineno) |
---|
140 | method provides the rest. |
---|
141 | |
---|
142 | The Docutils Python reader component will transform this module doctree into a |
---|
143 | Python-specific Docutils doctree, and then a "stylist transform" will |
---|
144 | further transform it into a generic doctree. Namespaces will have to be |
---|
145 | compiled for each of the scopes, but I'm not certain at what stage of |
---|
146 | processing. |
---|
147 | |
---|
148 | It's very important to keep all docstring processing out of this, so that it's |
---|
149 | a completely generic and not tool-specific. |
---|
150 | |
---|
151 | :: |
---|
152 | |
---|
153 | > Why perform all of those transformations? Why not go from the AST to a |
---|
154 | > generic doctree? Or, even from the AST to the final output? |
---|
155 | |
---|
156 | I want the docutils.readers.python.moduleparser.parse_module() function to |
---|
157 | produce a standard documentation-oriented tree that can be used by any tool. |
---|
158 | We can develop it together without having to compromise on the rest of our |
---|
159 | design (i.e., HappyDoc doesn't have to be made to work like Docutils, and |
---|
160 | vice-versa). It would be a higher-level version of what compiler.py provides. |
---|
161 | |
---|
162 | The Python reader component transforms this generic AST into a Python-specific |
---|
163 | doctree (it knows about modules, classes, functions, etc.), but this is |
---|
164 | specific to Docutils and cannot be used by HappyDoc or others. The stylist |
---|
165 | transform does the final layout, converting Python-specific structures |
---|
166 | ("class" sections, etc.) into a generic doctree using primitives (tables, |
---|
167 | sections, lists, etc.). This generic doctree does *not* know about Python |
---|
168 | structures any more. The advantage is that this doctree can be handed off to |
---|
169 | any of the output writers to create any output format we like. |
---|
170 | |
---|
171 | The latter two transforms are separate because I want to be able to have |
---|
172 | multiple independent layout styles (multiple runtime-selectable "stylist |
---|
173 | transforms"). Each of the existing tools (HappyDoc, pydoc, epydoc, Crystal, |
---|
174 | etc.) has its own fixed format. I personally don't like the tables-based |
---|
175 | format produced by these tools, and I'd like to be able to customize the |
---|
176 | format easily. That's the goal of stylist transforms, which are independent |
---|
177 | from the Reader component itself. One stylist transform could produce |
---|
178 | HappyDoc-like output, another could produce output similar to module docs in |
---|
179 | the Python library reference manual, and so on. |
---|
180 | |
---|
181 | It's for exactly this reason:: |
---|
182 | |
---|
183 | >> It's very important to keep all docstring processing out of this, so that |
---|
184 | >> it's a completely generic and not tool-specific. |
---|
185 | |
---|
186 | ... but it goes past docstring processing. It's also important to keep style |
---|
187 | decisions and tool-specific data transforms out of this module parser. |
---|
188 | |
---|
189 | |
---|
190 | Issues |
---|
191 | ====== |
---|
192 | |
---|
193 | * At what point should namespaces be computed? Should they be part of the |
---|
194 | basic AST produced by the ASTVisitor walk, or generated by another tree |
---|
195 | traversal? |
---|
196 | |
---|
197 | * At what point should a distinction be made between local variables & |
---|
198 | instance attributes in __init__ methods? |
---|
199 | |
---|
200 | * Docstrings are getting their lineno from their parents. Should the |
---|
201 | TokenParser find the real line no's? |
---|
202 | |
---|
203 | * Comments: include them? How and when? Only full-line comments, or |
---|
204 | parameter comments too? (See function "f" above for an example.) |
---|
205 | |
---|
206 | * Module could use more docstrings & refactoring in places. |
---|
207 | |
---|
208 | """ |
---|
209 | |
---|
210 | __docformat__ = 'reStructuredText' |
---|
211 | |
---|
212 | import sys |
---|
213 | import compiler |
---|
214 | import compiler.ast |
---|
215 | import tokenize |
---|
216 | import token |
---|
217 | from compiler.consts import OP_ASSIGN |
---|
218 | from compiler.visitor import ASTVisitor |
---|
219 | from types import StringType, UnicodeType, TupleType |
---|
220 | from docutils.readers.python import pynodes |
---|
221 | from docutils.nodes import Text |
---|
222 | |
---|
223 | |
---|
224 | def parse_module(module_text, filename): |
---|
225 | """Return a module documentation tree from `module_text`.""" |
---|
226 | ast = compiler.parse(module_text) |
---|
227 | token_parser = TokenParser(module_text) |
---|
228 | visitor = ModuleVisitor(filename, token_parser) |
---|
229 | compiler.walk(ast, visitor, walker=visitor) |
---|
230 | return visitor.module |
---|
231 | |
---|
232 | class BaseVisitor(ASTVisitor): |
---|
233 | |
---|
234 | def __init__(self, token_parser): |
---|
235 | ASTVisitor.__init__(self) |
---|
236 | self.token_parser = token_parser |
---|
237 | self.context = [] |
---|
238 | self.documentable = None |
---|
239 | |
---|
240 | def default(self, node, *args): |
---|
241 | self.documentable = None |
---|
242 | #print 'in default (%s)' % node.__class__.__name__ |
---|
243 | #ASTVisitor.default(self, node, *args) |
---|
244 | |
---|
245 | def default_visit(self, node, *args): |
---|
246 | #print 'in default_visit (%s)' % node.__class__.__name__ |
---|
247 | ASTVisitor.default(self, node, *args) |
---|
248 | |
---|
249 | |
---|
250 | class DocstringVisitor(BaseVisitor): |
---|
251 | |
---|
252 | def visitDiscard(self, node): |
---|
253 | if self.documentable: |
---|
254 | self.visit(node.expr) |
---|
255 | |
---|
256 | def visitConst(self, node): |
---|
257 | if self.documentable: |
---|
258 | if type(node.value) in (StringType, UnicodeType): |
---|
259 | self.documentable.append(make_docstring(node.value, node.lineno)) |
---|
260 | else: |
---|
261 | self.documentable = None |
---|
262 | |
---|
263 | def visitStmt(self, node): |
---|
264 | self.default_visit(node) |
---|
265 | |
---|
266 | |
---|
267 | class AssignmentVisitor(DocstringVisitor): |
---|
268 | |
---|
269 | def visitAssign(self, node): |
---|
270 | visitor = AttributeVisitor(self.token_parser) |
---|
271 | compiler.walk(node, visitor, walker=visitor) |
---|
272 | if visitor.attributes: |
---|
273 | self.context[-1].extend(visitor.attributes) |
---|
274 | if len(visitor.attributes) == 1: |
---|
275 | self.documentable = visitor.attributes[0] |
---|
276 | else: |
---|
277 | self.documentable = None |
---|
278 | |
---|
279 | |
---|
280 | class ModuleVisitor(AssignmentVisitor): |
---|
281 | |
---|
282 | def __init__(self, filename, token_parser): |
---|
283 | AssignmentVisitor.__init__(self, token_parser) |
---|
284 | self.filename = filename |
---|
285 | self.module = None |
---|
286 | |
---|
287 | def visitModule(self, node): |
---|
288 | self.module = module = pynodes.module_section() |
---|
289 | module['filename'] = self.filename |
---|
290 | append_docstring(module, node.doc, node.lineno) |
---|
291 | self.context.append(module) |
---|
292 | self.documentable = module |
---|
293 | self.visit(node.node) |
---|
294 | self.context.pop() |
---|
295 | |
---|
296 | def visitImport(self, node): |
---|
297 | self.context[-1] += make_import_group(names=node.names, |
---|
298 | lineno=node.lineno) |
---|
299 | self.documentable = None |
---|
300 | |
---|
301 | def visitFrom(self, node): |
---|
302 | self.context[-1].append( |
---|
303 | make_import_group(names=node.names, from_name=node.modname, |
---|
304 | lineno=node.lineno)) |
---|
305 | self.documentable = None |
---|
306 | |
---|
307 | def visitFunction(self, node): |
---|
308 | visitor = FunctionVisitor(self.token_parser, |
---|
309 | function_class=pynodes.function_section) |
---|
310 | compiler.walk(node, visitor, walker=visitor) |
---|
311 | self.context[-1].append(visitor.function) |
---|
312 | |
---|
313 | def visitClass(self, node): |
---|
314 | visitor = ClassVisitor(self.token_parser) |
---|
315 | compiler.walk(node, visitor, walker=visitor) |
---|
316 | self.context[-1].append(visitor.klass) |
---|
317 | |
---|
318 | |
---|
319 | class AttributeVisitor(BaseVisitor): |
---|
320 | |
---|
321 | def __init__(self, token_parser): |
---|
322 | BaseVisitor.__init__(self, token_parser) |
---|
323 | self.attributes = pynodes.class_attribute_section() |
---|
324 | |
---|
325 | def visitAssign(self, node): |
---|
326 | # Don't visit the expression itself, just the attribute nodes: |
---|
327 | for child in node.nodes: |
---|
328 | self.dispatch(child) |
---|
329 | expression_text = self.token_parser.rhs(node.lineno) |
---|
330 | expression = pynodes.expression_value() |
---|
331 | expression.append(Text(expression_text)) |
---|
332 | for attribute in self.attributes: |
---|
333 | attribute.append(expression) |
---|
334 | |
---|
335 | def visitAssName(self, node): |
---|
336 | self.attributes.append(make_attribute(node.name, |
---|
337 | lineno=node.lineno)) |
---|
338 | |
---|
339 | def visitAssTuple(self, node): |
---|
340 | attributes = self.attributes |
---|
341 | self.attributes = [] |
---|
342 | self.default_visit(node) |
---|
343 | n = pynodes.attribute_tuple() |
---|
344 | n.extend(self.attributes) |
---|
345 | n['lineno'] = self.attributes[0]['lineno'] |
---|
346 | attributes.append(n) |
---|
347 | self.attributes = attributes |
---|
348 | #self.attributes.append(att_tuple) |
---|
349 | |
---|
350 | def visitAssAttr(self, node): |
---|
351 | self.default_visit(node, node.attrname) |
---|
352 | |
---|
353 | def visitGetattr(self, node, suffix): |
---|
354 | self.default_visit(node, node.attrname + '.' + suffix) |
---|
355 | |
---|
356 | def visitName(self, node, suffix): |
---|
357 | self.attributes.append(make_attribute(node.name + '.' + suffix, |
---|
358 | lineno=node.lineno)) |
---|
359 | |
---|
360 | |
---|
361 | class FunctionVisitor(DocstringVisitor): |
---|
362 | |
---|
363 | in_function = 0 |
---|
364 | |
---|
365 | def __init__(self, token_parser, function_class): |
---|
366 | DocstringVisitor.__init__(self, token_parser) |
---|
367 | self.function_class = function_class |
---|
368 | |
---|
369 | def visitFunction(self, node): |
---|
370 | if self.in_function: |
---|
371 | self.documentable = None |
---|
372 | # Don't bother with nested function definitions. |
---|
373 | return |
---|
374 | self.in_function = 1 |
---|
375 | self.function = function = make_function_like_section( |
---|
376 | name=node.name, |
---|
377 | lineno=node.lineno, |
---|
378 | doc=node.doc, |
---|
379 | function_class=self.function_class) |
---|
380 | self.context.append(function) |
---|
381 | self.documentable = function |
---|
382 | self.parse_parameter_list(node) |
---|
383 | self.visit(node.code) |
---|
384 | self.context.pop() |
---|
385 | |
---|
386 | def parse_parameter_list(self, node): |
---|
387 | parameters = [] |
---|
388 | special = [] |
---|
389 | argnames = list(node.argnames) |
---|
390 | if node.kwargs: |
---|
391 | special.append(make_parameter(argnames[-1], excess_keyword=1)) |
---|
392 | argnames.pop() |
---|
393 | if node.varargs: |
---|
394 | special.append(make_parameter(argnames[-1], |
---|
395 | excess_positional=1)) |
---|
396 | argnames.pop() |
---|
397 | defaults = list(node.defaults) |
---|
398 | defaults = [None] * (len(argnames) - len(defaults)) + defaults |
---|
399 | function_parameters = self.token_parser.function_parameters( |
---|
400 | node.lineno) |
---|
401 | #print >>sys.stderr, function_parameters |
---|
402 | for argname, default in zip(argnames, defaults): |
---|
403 | if type(argname) is TupleType: |
---|
404 | parameter = pynodes.parameter_tuple() |
---|
405 | for tuplearg in argname: |
---|
406 | parameter.append(make_parameter(tuplearg)) |
---|
407 | argname = normalize_parameter_name(argname) |
---|
408 | else: |
---|
409 | parameter = make_parameter(argname) |
---|
410 | if default: |
---|
411 | n_default = pynodes.parameter_default() |
---|
412 | n_default.append(Text(function_parameters[argname])) |
---|
413 | parameter.append(n_default) |
---|
414 | parameters.append(parameter) |
---|
415 | if parameters or special: |
---|
416 | special.reverse() |
---|
417 | parameters.extend(special) |
---|
418 | parameter_list = pynodes.parameter_list() |
---|
419 | parameter_list.extend(parameters) |
---|
420 | self.function.append(parameter_list) |
---|
421 | |
---|
422 | |
---|
423 | class ClassVisitor(AssignmentVisitor): |
---|
424 | |
---|
425 | in_class = 0 |
---|
426 | |
---|
427 | def __init__(self, token_parser): |
---|
428 | AssignmentVisitor.__init__(self, token_parser) |
---|
429 | self.bases = [] |
---|
430 | |
---|
431 | def visitClass(self, node): |
---|
432 | if self.in_class: |
---|
433 | self.documentable = None |
---|
434 | # Don't bother with nested class definitions. |
---|
435 | return |
---|
436 | self.in_class = 1 |
---|
437 | #import mypdb as pdb |
---|
438 | #pdb.set_trace() |
---|
439 | for base in node.bases: |
---|
440 | self.visit(base) |
---|
441 | self.klass = klass = make_class_section(node.name, self.bases, |
---|
442 | doc=node.doc, |
---|
443 | lineno=node.lineno) |
---|
444 | self.context.append(klass) |
---|
445 | self.documentable = klass |
---|
446 | self.visit(node.code) |
---|
447 | self.context.pop() |
---|
448 | |
---|
449 | def visitGetattr(self, node, suffix=None): |
---|
450 | if suffix: |
---|
451 | name = node.attrname + '.' + suffix |
---|
452 | else: |
---|
453 | name = node.attrname |
---|
454 | self.default_visit(node, name) |
---|
455 | |
---|
456 | def visitName(self, node, suffix=None): |
---|
457 | if suffix: |
---|
458 | name = node.name + '.' + suffix |
---|
459 | else: |
---|
460 | name = node.name |
---|
461 | self.bases.append(name) |
---|
462 | |
---|
463 | def visitFunction(self, node): |
---|
464 | if node.name == '__init__': |
---|
465 | visitor = InitMethodVisitor(self.token_parser, |
---|
466 | function_class=pynodes.method_section) |
---|
467 | compiler.walk(node, visitor, walker=visitor) |
---|
468 | else: |
---|
469 | visitor = FunctionVisitor(self.token_parser, |
---|
470 | function_class=pynodes.method_section) |
---|
471 | compiler.walk(node, visitor, walker=visitor) |
---|
472 | self.context[-1].append(visitor.function) |
---|
473 | |
---|
474 | |
---|
475 | class InitMethodVisitor(FunctionVisitor, AssignmentVisitor): pass |
---|
476 | |
---|
477 | |
---|
478 | class TokenParser: |
---|
479 | |
---|
480 | def __init__(self, text): |
---|
481 | self.text = text + '\n\n' |
---|
482 | self.lines = self.text.splitlines(1) |
---|
483 | self.generator = tokenize.generate_tokens(iter(self.lines).next) |
---|
484 | self.next() |
---|
485 | |
---|
486 | def __iter__(self): |
---|
487 | return self |
---|
488 | |
---|
489 | def next(self): |
---|
490 | self.token = self.generator.next() |
---|
491 | self.type, self.string, self.start, self.end, self.line = self.token |
---|
492 | return self.token |
---|
493 | |
---|
494 | def goto_line(self, lineno): |
---|
495 | while self.start[0] < lineno: |
---|
496 | self.next() |
---|
497 | return token |
---|
498 | |
---|
499 | def rhs(self, lineno): |
---|
500 | """ |
---|
501 | Return a whitespace-normalized expression string from the right-hand |
---|
502 | side of an assignment at line `lineno`. |
---|
503 | """ |
---|
504 | self.goto_line(lineno) |
---|
505 | while self.string != '=': |
---|
506 | self.next() |
---|
507 | self.stack = None |
---|
508 | while self.type != token.NEWLINE and self.string != ';': |
---|
509 | if self.string == '=' and not self.stack: |
---|
510 | self.tokens = [] |
---|
511 | self.stack = [] |
---|
512 | self._type = None |
---|
513 | self._string = None |
---|
514 | self._backquote = 0 |
---|
515 | else: |
---|
516 | self.note_token() |
---|
517 | self.next() |
---|
518 | self.next() |
---|
519 | text = ''.join(self.tokens) |
---|
520 | return text.strip() |
---|
521 | |
---|
522 | closers = {')': '(', ']': '[', '}': '{'} |
---|
523 | openers = {'(': 1, '[': 1, '{': 1} |
---|
524 | del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1} |
---|
525 | no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1} |
---|
526 | |
---|
527 | def note_token(self): |
---|
528 | if self.type == tokenize.NL: |
---|
529 | return |
---|
530 | del_ws = self.del_ws_prefix.has_key(self.string) |
---|
531 | append_ws = not self.no_ws_suffix.has_key(self.string) |
---|
532 | if self.openers.has_key(self.string): |
---|
533 | self.stack.append(self.string) |
---|
534 | if (self._type == token.NAME |
---|
535 | or self.closers.has_key(self._string)): |
---|
536 | del_ws = 1 |
---|
537 | elif self.closers.has_key(self.string): |
---|
538 | assert self.stack[-1] == self.closers[self.string] |
---|
539 | self.stack.pop() |
---|
540 | elif self.string == '`': |
---|
541 | if self._backquote: |
---|
542 | del_ws = 1 |
---|
543 | assert self.stack[-1] == '`' |
---|
544 | self.stack.pop() |
---|
545 | else: |
---|
546 | append_ws = 0 |
---|
547 | self.stack.append('`') |
---|
548 | self._backquote = not self._backquote |
---|
549 | if del_ws and self.tokens and self.tokens[-1] == ' ': |
---|
550 | del self.tokens[-1] |
---|
551 | self.tokens.append(self.string) |
---|
552 | self._type = self.type |
---|
553 | self._string = self.string |
---|
554 | if append_ws: |
---|
555 | self.tokens.append(' ') |
---|
556 | |
---|
557 | def function_parameters(self, lineno): |
---|
558 | """ |
---|
559 | Return a dictionary mapping parameters to defaults |
---|
560 | (whitespace-normalized strings). |
---|
561 | """ |
---|
562 | self.goto_line(lineno) |
---|
563 | while self.string != 'def': |
---|
564 | self.next() |
---|
565 | while self.string != '(': |
---|
566 | self.next() |
---|
567 | name = None |
---|
568 | default = None |
---|
569 | parameter_tuple = None |
---|
570 | self.tokens = [] |
---|
571 | parameters = {} |
---|
572 | self.stack = [self.string] |
---|
573 | self.next() |
---|
574 | while 1: |
---|
575 | if len(self.stack) == 1: |
---|
576 | if parameter_tuple: |
---|
577 | # Just encountered ")". |
---|
578 | #print >>sys.stderr, 'parameter_tuple: %r' % self.tokens |
---|
579 | name = ''.join(self.tokens).strip() |
---|
580 | self.tokens = [] |
---|
581 | parameter_tuple = None |
---|
582 | if self.string in (')', ','): |
---|
583 | if name: |
---|
584 | if self.tokens: |
---|
585 | default_text = ''.join(self.tokens).strip() |
---|
586 | else: |
---|
587 | default_text = None |
---|
588 | parameters[name] = default_text |
---|
589 | self.tokens = [] |
---|
590 | name = None |
---|
591 | default = None |
---|
592 | if self.string == ')': |
---|
593 | break |
---|
594 | elif self.type == token.NAME: |
---|
595 | if name and default: |
---|
596 | self.note_token() |
---|
597 | else: |
---|
598 | assert name is None, ( |
---|
599 | 'token=%r name=%r parameters=%r stack=%r' |
---|
600 | % (self.token, name, parameters, self.stack)) |
---|
601 | name = self.string |
---|
602 | #print >>sys.stderr, 'name=%r' % name |
---|
603 | elif self.string == '=': |
---|
604 | assert name is not None, 'token=%r' % (self.token,) |
---|
605 | assert default is None, 'token=%r' % (self.token,) |
---|
606 | assert self.tokens == [], 'token=%r' % (self.token,) |
---|
607 | default = 1 |
---|
608 | self._type = None |
---|
609 | self._string = None |
---|
610 | self._backquote = 0 |
---|
611 | elif name: |
---|
612 | self.note_token() |
---|
613 | elif self.string == '(': |
---|
614 | parameter_tuple = 1 |
---|
615 | self._type = None |
---|
616 | self._string = None |
---|
617 | self._backquote = 0 |
---|
618 | self.note_token() |
---|
619 | else: # ignore these tokens: |
---|
620 | assert (self.string in ('*', '**', '\n') |
---|
621 | or self.type == tokenize.COMMENT), ( |
---|
622 | 'token=%r' % (self.token,)) |
---|
623 | else: |
---|
624 | self.note_token() |
---|
625 | self.next() |
---|
626 | return parameters |
---|
627 | |
---|
628 | |
---|
629 | def make_docstring(doc, lineno): |
---|
630 | n = pynodes.docstring() |
---|
631 | if lineno: |
---|
632 | # Really, only module docstrings don't have a line |
---|
633 | # (@@: but maybe they should) |
---|
634 | n['lineno'] = lineno |
---|
635 | n.append(Text(doc)) |
---|
636 | return n |
---|
637 | |
---|
638 | def append_docstring(node, doc, lineno): |
---|
639 | if doc: |
---|
640 | node.append(make_docstring(doc, lineno)) |
---|
641 | |
---|
642 | def make_class_section(name, bases, lineno, doc): |
---|
643 | n = pynodes.class_section() |
---|
644 | n['lineno'] = lineno |
---|
645 | n.append(make_object_name(name)) |
---|
646 | for base in bases: |
---|
647 | b = pynodes.class_base() |
---|
648 | b.append(make_object_name(base)) |
---|
649 | n.append(b) |
---|
650 | append_docstring(n, doc, lineno) |
---|
651 | return n |
---|
652 | |
---|
653 | def make_object_name(name): |
---|
654 | n = pynodes.object_name() |
---|
655 | n.append(Text(name)) |
---|
656 | return n |
---|
657 | |
---|
658 | def make_function_like_section(name, lineno, doc, function_class): |
---|
659 | n = function_class() |
---|
660 | n['lineno'] = lineno |
---|
661 | n.append(make_object_name(name)) |
---|
662 | append_docstring(n, doc, lineno) |
---|
663 | return n |
---|
664 | |
---|
665 | def make_import_group(names, lineno, from_name=None): |
---|
666 | n = pynodes.import_group() |
---|
667 | n['lineno'] = lineno |
---|
668 | if from_name: |
---|
669 | n_from = pynodes.import_from() |
---|
670 | n_from.append(Text(from_name)) |
---|
671 | n.append(n_from) |
---|
672 | for name, alias in names: |
---|
673 | n_name = pynodes.import_name() |
---|
674 | n_name.append(Text(name)) |
---|
675 | if alias: |
---|
676 | n_alias = pynodes.import_alias() |
---|
677 | n_alias.append(Text(alias)) |
---|
678 | n_name.append(n_alias) |
---|
679 | n.append(n_name) |
---|
680 | return n |
---|
681 | |
---|
682 | def make_class_attribute(name, lineno): |
---|
683 | n = pynodes.class_attribute() |
---|
684 | n['lineno'] = lineno |
---|
685 | n.append(Text(name)) |
---|
686 | return n |
---|
687 | |
---|
688 | def make_attribute(name, lineno): |
---|
689 | n = pynodes.attribute() |
---|
690 | n['lineno'] = lineno |
---|
691 | n.append(make_object_name(name)) |
---|
692 | return n |
---|
693 | |
---|
694 | def make_parameter(name, excess_keyword=0, excess_positional=0): |
---|
695 | """ |
---|
696 | excess_keyword and excess_positional must be either 1 or 0, and |
---|
697 | not both of them can be 1. |
---|
698 | """ |
---|
699 | n = pynodes.parameter() |
---|
700 | n.append(make_object_name(name)) |
---|
701 | assert not excess_keyword or not excess_positional |
---|
702 | if excess_keyword: |
---|
703 | n['excess_keyword'] = 1 |
---|
704 | if excess_positional: |
---|
705 | n['excess_positional'] = 1 |
---|
706 | return n |
---|
707 | |
---|
708 | def trim_docstring(text): |
---|
709 | """ |
---|
710 | Trim indentation and blank lines from docstring text & return it. |
---|
711 | |
---|
712 | See PEP 257. |
---|
713 | """ |
---|
714 | if not text: |
---|
715 | return text |
---|
716 | # Convert tabs to spaces (following the normal Python rules) |
---|
717 | # and split into a list of lines: |
---|
718 | lines = text.expandtabs().splitlines() |
---|
719 | # Determine minimum indentation (first line doesn't count): |
---|
720 | indent = sys.maxint |
---|
721 | for line in lines[1:]: |
---|
722 | stripped = line.lstrip() |
---|
723 | if stripped: |
---|
724 | indent = min(indent, len(line) - len(stripped)) |
---|
725 | # Remove indentation (first line is special): |
---|
726 | trimmed = [lines[0].strip()] |
---|
727 | if indent < sys.maxint: |
---|
728 | for line in lines[1:]: |
---|
729 | trimmed.append(line[indent:].rstrip()) |
---|
730 | # Strip off trailing and leading blank lines: |
---|
731 | while trimmed and not trimmed[-1]: |
---|
732 | trimmed.pop() |
---|
733 | while trimmed and not trimmed[0]: |
---|
734 | trimmed.pop(0) |
---|
735 | # Return a single string: |
---|
736 | return '\n'.join(trimmed) |
---|
737 | |
---|
738 | def normalize_parameter_name(name): |
---|
739 | """ |
---|
740 | Converts a tuple like ``('a', ('b', 'c'), 'd')`` into ``'(a, (b, c), d)'`` |
---|
741 | """ |
---|
742 | if type(name) is TupleType: |
---|
743 | return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name]) |
---|
744 | else: |
---|
745 | return name |
---|
746 | |
---|
747 | if __name__ == '__main__': |
---|
748 | import sys |
---|
749 | args = sys.argv[1:] |
---|
750 | if args[0] == '-v': |
---|
751 | filename = args[1] |
---|
752 | module_text = open(filename).read() |
---|
753 | ast = compiler.parse(module_text) |
---|
754 | visitor = compiler.visitor.ExampleASTVisitor() |
---|
755 | compiler.walk(ast, visitor, walker=visitor, verbose=1) |
---|
756 | else: |
---|
757 | filename = args[0] |
---|
758 | content = open(filename).read() |
---|
759 | print parse_module(content, filename).pformat() |
---|
760 | |
---|