root/galaxy-central/eggs/docutils-0.4-py2.6.egg/docutils/readers/python/moduleparser.py

リビジョン 3, 25.2 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# Author: David Goodger
2# Contact: goodger@users.sourceforge.net
3# Revision: $Revision: 4242 $
4# Date: $Date: 2006-01-06 00:28:53 +0100 (Fri, 06 Jan 2006) $
5# Copyright: This module has been placed in the public domain.
6
7"""
8Parser for Python modules.  Requires Python 2.2 or higher.
9
10The `parse_module()` function takes a module's text and file name,
11runs it through the module parser (using compiler.py and tokenize.py)
12and produces a parse tree of the source code, using the nodes as found
13in pynodes.py.  For example, given this module (x.py)::
14
15    # comment
16
17    '''Docstring'''
18
19    '''Additional docstring'''
20
21    __docformat__ = 'reStructuredText'
22
23    a = 1
24    '''Attribute docstring'''
25
26    class C(Super):
27
28        '''C's docstring'''
29
30        class_attribute = 1
31        '''class_attribute's docstring'''
32
33        def __init__(self, text=None):
34            '''__init__'s docstring'''
35
36            self.instance_attribute = (text * 7
37                                       + ' whaddyaknow')
38            '''instance_attribute's docstring'''
39
40
41    def f(x,                            # parameter x
42          y=a*5,                        # parameter y
43          *args):                       # parameter args
44        '''f's docstring'''
45        return [x + item for item in args]
46
47    f.function_attribute = 1
48    '''f.function_attribute's docstring'''
49
50The module parser will produce this module documentation tree::
51
52    <module_section filename="test data">
53        <docstring>
54            Docstring
55        <docstring lineno="5">
56            Additional docstring
57        <attribute lineno="7">
58            <object_name>
59                __docformat__
60            <expression_value lineno="7">
61                'reStructuredText'
62        <attribute lineno="9">
63            <object_name>
64                a
65            <expression_value lineno="9">
66                1
67            <docstring lineno="10">
68                Attribute docstring
69        <class_section lineno="12">
70            <object_name>
71                C
72            <class_base>
73                Super
74            <docstring lineno="12">
75                C's docstring
76            <attribute lineno="16">
77                <object_name>
78                    class_attribute
79                <expression_value lineno="16">
80                    1
81                <docstring lineno="17">
82                    class_attribute's docstring
83            <method_section lineno="19">
84                <object_name>
85                    __init__
86                <docstring lineno="19">
87                    __init__'s docstring
88                <parameter_list lineno="19">
89                    <parameter lineno="19">
90                        <object_name>
91                            self
92                    <parameter lineno="19">
93                        <object_name>
94                            text
95                        <parameter_default lineno="19">
96                            None
97                <attribute lineno="22">
98                    <object_name>
99                        self.instance_attribute
100                    <expression_value lineno="22">
101                        (text * 7 + ' whaddyaknow')
102                    <docstring lineno="24">
103                        instance_attribute's docstring
104        <function_section lineno="27">
105            <object_name>
106                f
107            <docstring lineno="27">
108                f's docstring
109            <parameter_list lineno="27">
110                <parameter lineno="27">
111                    <object_name>
112                        x
113                    <comment>
114                        # parameter x
115                <parameter lineno="27">
116                    <object_name>
117                        y
118                    <parameter_default lineno="27">
119                        a * 5
120                    <comment>
121                        # parameter y
122                <parameter excess_positional="1" lineno="27">
123                    <object_name>
124                        args
125                    <comment>
126                        # parameter args
127        <attribute lineno="33">
128            <object_name>
129                f.function_attribute
130            <expression_value lineno="33">
131                1
132            <docstring lineno="34">
133                f.function_attribute's docstring
134
135(Comments are not implemented yet.)
136
137compiler.parse() provides most of what's needed for this doctree, and
138"tokenize" can be used to get the rest.  We can determine the line
139number from the compiler.parse() AST, and the TokenParser.rhs(lineno)
140method provides the rest.
141
142The Docutils Python reader component will transform this module doctree into a
143Python-specific Docutils doctree, and then a "stylist transform" will
144further transform it into a generic doctree.  Namespaces will have to be
145compiled for each of the scopes, but I'm not certain at what stage of
146processing.
147
148It's very important to keep all docstring processing out of this, so that it's
149a completely generic and not tool-specific.
150
151::
152
153> Why perform all of those transformations?  Why not go from the AST to a
154> generic doctree?  Or, even from the AST to the final output?
155
156I want the docutils.readers.python.moduleparser.parse_module() function to
157produce a standard documentation-oriented tree that can be used by any tool.
158We can develop it together without having to compromise on the rest of our
159design (i.e., HappyDoc doesn't have to be made to work like Docutils, and
160vice-versa).  It would be a higher-level version of what compiler.py provides.
161
162The Python reader component transforms this generic AST into a Python-specific
163doctree (it knows about modules, classes, functions, etc.), but this is
164specific to Docutils and cannot be used by HappyDoc or others.  The stylist
165transform does the final layout, converting Python-specific structures
166("class" sections, etc.) into a generic doctree using primitives (tables,
167sections, lists, etc.).  This generic doctree does *not* know about Python
168structures any more.  The advantage is that this doctree can be handed off to
169any of the output writers to create any output format we like.
170
171The latter two transforms are separate because I want to be able to have
172multiple independent layout styles (multiple runtime-selectable "stylist
173transforms").  Each of the existing tools (HappyDoc, pydoc, epydoc, Crystal,
174etc.) has its own fixed format.  I personally don't like the tables-based
175format produced by these tools, and I'd like to be able to customize the
176format easily.  That's the goal of stylist transforms, which are independent
177from the Reader component itself.  One stylist transform could produce
178HappyDoc-like output, another could produce output similar to module docs in
179the Python library reference manual, and so on.
180
181It's for exactly this reason::
182
183>> It's very important to keep all docstring processing out of this, so that
184>> it's a completely generic and not tool-specific.
185
186... but it goes past docstring processing.  It's also important to keep style
187decisions and tool-specific data transforms out of this module parser.
188
189
190Issues
191======
192
193* At what point should namespaces be computed?  Should they be part of the
194  basic AST produced by the ASTVisitor walk, or generated by another tree
195  traversal?
196
197* At what point should a distinction be made between local variables &
198  instance attributes in __init__ methods?
199
200* Docstrings are getting their lineno from their parents.  Should the
201  TokenParser find the real line no's?
202
203* Comments: include them?  How and when?  Only full-line comments, or
204  parameter comments too?  (See function "f" above for an example.)
205
206* Module could use more docstrings & refactoring in places.
207
208"""
209
210__docformat__ = 'reStructuredText'
211
212import sys
213import compiler
214import compiler.ast
215import tokenize
216import token
217from compiler.consts import OP_ASSIGN
218from compiler.visitor import ASTVisitor
219from types import StringType, UnicodeType, TupleType
220from docutils.readers.python import pynodes
221from docutils.nodes import Text
222
223
224def parse_module(module_text, filename):
225    """Return a module documentation tree from `module_text`."""
226    ast = compiler.parse(module_text)
227    token_parser = TokenParser(module_text)
228    visitor = ModuleVisitor(filename, token_parser)
229    compiler.walk(ast, visitor, walker=visitor)
230    return visitor.module
231
232class BaseVisitor(ASTVisitor):
233
234    def __init__(self, token_parser):
235        ASTVisitor.__init__(self)
236        self.token_parser = token_parser
237        self.context = []
238        self.documentable = None
239
240    def default(self, node, *args):
241        self.documentable = None
242        #print 'in default (%s)' % node.__class__.__name__
243        #ASTVisitor.default(self, node, *args)
244
245    def default_visit(self, node, *args):
246        #print 'in default_visit (%s)' % node.__class__.__name__
247        ASTVisitor.default(self, node, *args)
248
249
250class DocstringVisitor(BaseVisitor):
251
252    def visitDiscard(self, node):
253        if self.documentable:
254            self.visit(node.expr)
255
256    def visitConst(self, node):
257        if self.documentable:
258            if type(node.value) in (StringType, UnicodeType):
259                self.documentable.append(make_docstring(node.value, node.lineno))
260            else:
261                self.documentable = None
262
263    def visitStmt(self, node):
264        self.default_visit(node)
265
266
267class AssignmentVisitor(DocstringVisitor):
268
269    def visitAssign(self, node):
270        visitor = AttributeVisitor(self.token_parser)
271        compiler.walk(node, visitor, walker=visitor)
272        if visitor.attributes:
273            self.context[-1].extend(visitor.attributes)
274        if len(visitor.attributes) == 1:
275            self.documentable = visitor.attributes[0]
276        else:
277            self.documentable = None
278
279
280class ModuleVisitor(AssignmentVisitor):
281
282    def __init__(self, filename, token_parser):
283        AssignmentVisitor.__init__(self, token_parser)
284        self.filename = filename
285        self.module = None
286
287    def visitModule(self, node):
288        self.module = module = pynodes.module_section()
289        module['filename'] = self.filename
290        append_docstring(module, node.doc, node.lineno)
291        self.context.append(module)
292        self.documentable = module
293        self.visit(node.node)
294        self.context.pop()
295
296    def visitImport(self, node):
297        self.context[-1] += make_import_group(names=node.names,
298                                              lineno=node.lineno)
299        self.documentable = None
300
301    def visitFrom(self, node):
302        self.context[-1].append(
303            make_import_group(names=node.names, from_name=node.modname,
304                              lineno=node.lineno))
305        self.documentable = None
306
307    def visitFunction(self, node):
308        visitor = FunctionVisitor(self.token_parser,
309                                  function_class=pynodes.function_section)
310        compiler.walk(node, visitor, walker=visitor)
311        self.context[-1].append(visitor.function)
312
313    def visitClass(self, node):
314        visitor = ClassVisitor(self.token_parser)
315        compiler.walk(node, visitor, walker=visitor)
316        self.context[-1].append(visitor.klass)
317
318
319class AttributeVisitor(BaseVisitor):
320
321    def __init__(self, token_parser):
322        BaseVisitor.__init__(self, token_parser)
323        self.attributes = pynodes.class_attribute_section()
324
325    def visitAssign(self, node):
326        # Don't visit the expression itself, just the attribute nodes:
327        for child in node.nodes:
328            self.dispatch(child)
329        expression_text = self.token_parser.rhs(node.lineno)
330        expression = pynodes.expression_value()
331        expression.append(Text(expression_text))
332        for attribute in self.attributes:
333            attribute.append(expression)
334
335    def visitAssName(self, node):
336        self.attributes.append(make_attribute(node.name,
337                                              lineno=node.lineno))
338
339    def visitAssTuple(self, node):
340        attributes = self.attributes
341        self.attributes = []
342        self.default_visit(node)
343        n = pynodes.attribute_tuple()
344        n.extend(self.attributes)
345        n['lineno'] = self.attributes[0]['lineno']
346        attributes.append(n)
347        self.attributes = attributes
348        #self.attributes.append(att_tuple)
349
350    def visitAssAttr(self, node):
351        self.default_visit(node, node.attrname)
352
353    def visitGetattr(self, node, suffix):
354        self.default_visit(node, node.attrname + '.' + suffix)
355
356    def visitName(self, node, suffix):
357        self.attributes.append(make_attribute(node.name + '.' + suffix,
358                                              lineno=node.lineno))
359
360
361class FunctionVisitor(DocstringVisitor):
362
363    in_function = 0
364
365    def __init__(self, token_parser, function_class):
366        DocstringVisitor.__init__(self, token_parser)
367        self.function_class = function_class
368
369    def visitFunction(self, node):
370        if self.in_function:
371            self.documentable = None
372            # Don't bother with nested function definitions.
373            return
374        self.in_function = 1
375        self.function = function = make_function_like_section(
376            name=node.name,
377            lineno=node.lineno,
378            doc=node.doc,
379            function_class=self.function_class)
380        self.context.append(function)
381        self.documentable = function
382        self.parse_parameter_list(node)
383        self.visit(node.code)
384        self.context.pop()
385
386    def parse_parameter_list(self, node):
387        parameters = []
388        special = []
389        argnames = list(node.argnames)
390        if node.kwargs:
391            special.append(make_parameter(argnames[-1], excess_keyword=1))
392            argnames.pop()
393        if node.varargs:
394            special.append(make_parameter(argnames[-1],
395                                          excess_positional=1))
396            argnames.pop()
397        defaults = list(node.defaults)
398        defaults = [None] * (len(argnames) - len(defaults)) + defaults
399        function_parameters = self.token_parser.function_parameters(
400            node.lineno)
401        #print >>sys.stderr, function_parameters
402        for argname, default in zip(argnames, defaults):
403            if type(argname) is TupleType:
404                parameter = pynodes.parameter_tuple()
405                for tuplearg in argname:
406                    parameter.append(make_parameter(tuplearg))
407                argname = normalize_parameter_name(argname)
408            else:
409                parameter = make_parameter(argname)
410            if default:
411                n_default = pynodes.parameter_default()
412                n_default.append(Text(function_parameters[argname]))
413                parameter.append(n_default)
414            parameters.append(parameter)
415        if parameters or special:
416            special.reverse()
417            parameters.extend(special)
418            parameter_list = pynodes.parameter_list()
419            parameter_list.extend(parameters)
420            self.function.append(parameter_list)
421
422
423class ClassVisitor(AssignmentVisitor):
424
425    in_class = 0
426
427    def __init__(self, token_parser):
428        AssignmentVisitor.__init__(self, token_parser)
429        self.bases = []
430
431    def visitClass(self, node):
432        if self.in_class:
433            self.documentable = None
434            # Don't bother with nested class definitions.
435            return
436        self.in_class = 1
437        #import mypdb as pdb
438        #pdb.set_trace()
439        for base in node.bases:
440            self.visit(base)
441        self.klass = klass = make_class_section(node.name, self.bases,
442                                                doc=node.doc,
443                                                lineno=node.lineno)
444        self.context.append(klass)
445        self.documentable = klass
446        self.visit(node.code)
447        self.context.pop()
448
449    def visitGetattr(self, node, suffix=None):
450        if suffix:
451            name = node.attrname + '.' + suffix
452        else:
453            name = node.attrname
454        self.default_visit(node, name)
455
456    def visitName(self, node, suffix=None):
457        if suffix:
458            name = node.name + '.' + suffix
459        else:
460            name = node.name
461        self.bases.append(name)
462
463    def visitFunction(self, node):
464        if node.name == '__init__':
465            visitor = InitMethodVisitor(self.token_parser,
466                                        function_class=pynodes.method_section)
467            compiler.walk(node, visitor, walker=visitor)
468        else:
469            visitor = FunctionVisitor(self.token_parser,
470                                      function_class=pynodes.method_section)
471            compiler.walk(node, visitor, walker=visitor)
472        self.context[-1].append(visitor.function)
473
474
475class InitMethodVisitor(FunctionVisitor, AssignmentVisitor): pass
476
477
478class TokenParser:
479
480    def __init__(self, text):
481        self.text = text + '\n\n'
482        self.lines = self.text.splitlines(1)
483        self.generator = tokenize.generate_tokens(iter(self.lines).next)
484        self.next()
485
486    def __iter__(self):
487        return self
488
489    def next(self):
490        self.token = self.generator.next()
491        self.type, self.string, self.start, self.end, self.line = self.token
492        return self.token
493
494    def goto_line(self, lineno):
495        while self.start[0] < lineno:
496            self.next()
497        return token
498
499    def rhs(self, lineno):
500        """
501        Return a whitespace-normalized expression string from the right-hand
502        side of an assignment at line `lineno`.
503        """
504        self.goto_line(lineno)
505        while self.string != '=':
506            self.next()
507        self.stack = None
508        while self.type != token.NEWLINE and self.string != ';':
509            if self.string == '=' and not self.stack:
510                self.tokens = []
511                self.stack = []
512                self._type = None
513                self._string = None
514                self._backquote = 0
515            else:
516                self.note_token()
517            self.next()
518        self.next()
519        text = ''.join(self.tokens)
520        return text.strip()
521
522    closers = {')': '(', ']': '[', '}': '{'}
523    openers = {'(': 1, '[': 1, '{': 1}
524    del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1}
525    no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1}
526
527    def note_token(self):
528        if self.type == tokenize.NL:
529            return
530        del_ws = self.del_ws_prefix.has_key(self.string)
531        append_ws = not self.no_ws_suffix.has_key(self.string)
532        if self.openers.has_key(self.string):
533            self.stack.append(self.string)
534            if (self._type == token.NAME
535                or self.closers.has_key(self._string)):
536                del_ws = 1
537        elif self.closers.has_key(self.string):
538            assert self.stack[-1] == self.closers[self.string]
539            self.stack.pop()
540        elif self.string == '`':
541            if self._backquote:
542                del_ws = 1
543                assert self.stack[-1] == '`'
544                self.stack.pop()
545            else:
546                append_ws = 0
547                self.stack.append('`')
548            self._backquote = not self._backquote
549        if del_ws and self.tokens and self.tokens[-1] == ' ':
550            del self.tokens[-1]
551        self.tokens.append(self.string)
552        self._type = self.type
553        self._string = self.string
554        if append_ws:
555            self.tokens.append(' ')
556
557    def function_parameters(self, lineno):
558        """
559        Return a dictionary mapping parameters to defaults
560        (whitespace-normalized strings).
561        """
562        self.goto_line(lineno)
563        while self.string != 'def':
564            self.next()
565        while self.string != '(':
566            self.next()
567        name = None
568        default = None
569        parameter_tuple = None
570        self.tokens = []
571        parameters = {}
572        self.stack = [self.string]
573        self.next()
574        while 1:
575            if len(self.stack) == 1:
576                if parameter_tuple:
577                    # Just encountered ")".
578                    #print >>sys.stderr, 'parameter_tuple: %r' % self.tokens
579                    name = ''.join(self.tokens).strip()
580                    self.tokens = []
581                    parameter_tuple = None
582                if self.string in (')', ','):
583                    if name:
584                        if self.tokens:
585                            default_text = ''.join(self.tokens).strip()
586                        else:
587                            default_text = None
588                        parameters[name] = default_text
589                        self.tokens = []
590                        name = None
591                        default = None
592                    if self.string == ')':
593                        break
594                elif self.type == token.NAME:
595                    if name and default:
596                        self.note_token()
597                    else:
598                        assert name is None, (
599                            'token=%r name=%r parameters=%r stack=%r'
600                            % (self.token, name, parameters, self.stack))
601                        name = self.string
602                        #print >>sys.stderr, 'name=%r' % name
603                elif self.string == '=':
604                    assert name is not None, 'token=%r' % (self.token,)
605                    assert default is None, 'token=%r' % (self.token,)
606                    assert self.tokens == [], 'token=%r' % (self.token,)
607                    default = 1
608                    self._type = None
609                    self._string = None
610                    self._backquote = 0
611                elif name:
612                    self.note_token()
613                elif self.string == '(':
614                    parameter_tuple = 1
615                    self._type = None
616                    self._string = None
617                    self._backquote = 0
618                    self.note_token()
619                else:                   # ignore these tokens:
620                    assert (self.string in ('*', '**', '\n')
621                            or self.type == tokenize.COMMENT), (
622                        'token=%r' % (self.token,))
623            else:
624                self.note_token()
625            self.next()
626        return parameters
627
628
629def make_docstring(doc, lineno):
630    n = pynodes.docstring()
631    if lineno:
632        # Really, only module docstrings don't have a line
633        # (@@: but maybe they should)
634        n['lineno'] = lineno
635    n.append(Text(doc))
636    return n
637
638def append_docstring(node, doc, lineno):
639    if doc:
640        node.append(make_docstring(doc, lineno))
641
642def make_class_section(name, bases, lineno, doc):
643    n = pynodes.class_section()
644    n['lineno'] = lineno
645    n.append(make_object_name(name))
646    for base in bases:
647        b = pynodes.class_base()
648        b.append(make_object_name(base))
649        n.append(b)
650    append_docstring(n, doc, lineno)
651    return n
652
653def make_object_name(name):
654    n = pynodes.object_name()
655    n.append(Text(name))
656    return n
657
658def make_function_like_section(name, lineno, doc, function_class):
659    n = function_class()
660    n['lineno'] = lineno
661    n.append(make_object_name(name))
662    append_docstring(n, doc, lineno)
663    return n
664
665def make_import_group(names, lineno, from_name=None):
666    n = pynodes.import_group()
667    n['lineno'] = lineno
668    if from_name:
669        n_from = pynodes.import_from()
670        n_from.append(Text(from_name))
671        n.append(n_from)
672    for name, alias in names:
673        n_name = pynodes.import_name()
674        n_name.append(Text(name))
675        if alias:
676            n_alias = pynodes.import_alias()
677            n_alias.append(Text(alias))
678            n_name.append(n_alias)
679        n.append(n_name)
680    return n
681
682def make_class_attribute(name, lineno):
683    n = pynodes.class_attribute()
684    n['lineno'] = lineno
685    n.append(Text(name))
686    return n
687
688def make_attribute(name, lineno):
689    n = pynodes.attribute()
690    n['lineno'] = lineno
691    n.append(make_object_name(name))
692    return n
693
694def make_parameter(name, excess_keyword=0, excess_positional=0):
695    """
696    excess_keyword and excess_positional must be either 1 or 0, and
697    not both of them can be 1.
698    """
699    n = pynodes.parameter()
700    n.append(make_object_name(name))
701    assert not excess_keyword or not excess_positional
702    if excess_keyword:
703        n['excess_keyword'] = 1
704    if excess_positional:
705        n['excess_positional'] = 1
706    return n
707
708def trim_docstring(text):
709    """
710    Trim indentation and blank lines from docstring text & return it.
711
712    See PEP 257.
713    """
714    if not text:
715        return text
716    # Convert tabs to spaces (following the normal Python rules)
717    # and split into a list of lines:
718    lines = text.expandtabs().splitlines()
719    # Determine minimum indentation (first line doesn't count):
720    indent = sys.maxint
721    for line in lines[1:]:
722        stripped = line.lstrip()
723        if stripped:
724            indent = min(indent, len(line) - len(stripped))
725    # Remove indentation (first line is special):
726    trimmed = [lines[0].strip()]
727    if indent < sys.maxint:
728        for line in lines[1:]:
729            trimmed.append(line[indent:].rstrip())
730    # Strip off trailing and leading blank lines:
731    while trimmed and not trimmed[-1]:
732        trimmed.pop()
733    while trimmed and not trimmed[0]:
734        trimmed.pop(0)
735    # Return a single string:
736    return '\n'.join(trimmed)
737
738def normalize_parameter_name(name):
739    """
740    Converts a tuple like ``('a', ('b', 'c'), 'd')`` into ``'(a, (b, c), d)'``
741    """
742    if type(name) is TupleType:
743        return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name])
744    else:
745        return name
746
747if __name__ == '__main__':
748    import sys
749    args = sys.argv[1:]
750    if args[0] == '-v':
751        filename = args[1]
752        module_text = open(filename).read()
753        ast = compiler.parse(module_text)
754        visitor = compiler.visitor.ExampleASTVisitor()
755        compiler.walk(ast, visitor, walker=visitor, verbose=1)
756    else:
757        filename = args[0]
758        content = open(filename).read()
759        print parse_module(content, filename).pformat()
760
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。