root/galaxy-central/eggs/docutils-0.4-py2.6.egg/docutils/transforms/frontmatter.py @ 3

リビジョン 3, 18.5 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# Authors: David Goodger, Ueli Schlaepfer
2# Contact: goodger@users.sourceforge.net
3# Revision: $Revision: 4242 $
4# Date: $Date: 2006-01-06 00:28:53 +0100 (Fri, 06 Jan 2006) $
5# Copyright: This module has been placed in the public domain.
6
7"""
8Transforms related to the front matter of a document or a section
9(information found before the main text):
10
11- `DocTitle`: Used to transform a lone top level section's title to
12  the document title, promote a remaining lone top-level section's
13  title to the document subtitle, and determine the document's title
14  metadata (document['title']) based on the document title and/or the
15  "title" setting.
16
17- `SectionSubTitle`: Used to transform a lone subsection into a
18  subtitle.
19
20- `DocInfo`: Used to transform a bibliographic field list into docinfo
21  elements.
22"""
23
24__docformat__ = 'reStructuredText'
25
26import re
27from docutils import nodes, utils
28from docutils.transforms import TransformError, Transform
29
30
31class TitlePromoter(Transform):
32
33    """
34    Abstract base class for DocTitle and SectionSubTitle transforms.
35    """
36
37    def promote_title(self, node):
38        """
39        Transform the following tree::
40
41            <node>
42                <section>
43                    <title>
44                    ...
45
46        into ::
47
48            <node>
49                <title>
50                ...
51
52        `node` is normally a document.
53        """
54        # `node` must not have a title yet.
55        assert not (len(node) and isinstance(node[0], nodes.title))
56        section, index = self.candidate_index(node)
57        if index is None:
58            return None
59        # Transfer the section's attributes to the node:
60        node.attributes.update(section.attributes)
61        # setup_child is called automatically for all nodes.
62        node[:] = (section[:1]        # section title
63                   + node[:index]     # everything that was in the
64                                      # node before the section
65                   + section[1:])     # everything that was in the section
66        assert isinstance(node[0], nodes.title)
67        return 1
68
69    def promote_subtitle(self, node):
70        """
71        Transform the following node tree::
72
73            <node>
74                <title>
75                <section>
76                    <title>
77                    ...
78
79        into ::
80
81            <node>
82                <title>
83                <subtitle>
84                ...
85        """
86        subsection, index = self.candidate_index(node)
87        if index is None:
88            return None
89        subtitle = nodes.subtitle()
90        # Transfer the subsection's attributes to the new subtitle:
91        # This causes trouble with list attributes!  To do: Write a
92        # test case which catches direct access to the `attributes`
93        # dictionary and/or write a test case which shows problems in
94        # this particular case.
95        subtitle.attributes.update(subsection.attributes)
96        # We're losing the subtitle's attributes here!  To do: Write a
97        # test case which shows this behavior.
98        # Transfer the contents of the subsection's title to the
99        # subtitle:
100        subtitle[:] = subsection[0][:]
101        node[:] = (node[:1]       # title
102                   + [subtitle]
103                   # everything that was before the section:
104                   + node[1:index]
105                   # everything that was in the subsection:
106                   + subsection[1:])
107        return 1
108
109    def candidate_index(self, node):
110        """
111        Find and return the promotion candidate and its index.
112
113        Return (None, None) if no valid candidate was found.
114        """
115        index = node.first_child_not_matching_class(
116            nodes.PreBibliographic)
117        if index is None or len(node) > (index + 1) or \
118               not isinstance(node[index], nodes.section):
119            return None, None
120        else:
121            return node[index], index
122
123
124class DocTitle(TitlePromoter):
125
126    """
127    In reStructuredText_, there is no way to specify a document title
128    and subtitle explicitly. Instead, we can supply the document title
129    (and possibly the subtitle as well) implicitly, and use this
130    two-step transform to "raise" or "promote" the title(s) (and their
131    corresponding section contents) to the document level.
132
133    1. If the document contains a single top-level section as its
134       first non-comment element, the top-level section's title
135       becomes the document's title, and the top-level section's
136       contents become the document's immediate contents. The lone
137       top-level section header must be the first non-comment element
138       in the document.
139
140       For example, take this input text::
141
142           =================
143            Top-Level Title
144           =================
145
146           A paragraph.
147
148       Once parsed, it looks like this::
149
150           <document>
151               <section names="top-level title">
152                   <title>
153                       Top-Level Title
154                   <paragraph>
155                       A paragraph.
156
157       After running the DocTitle transform, we have::
158
159           <document names="top-level title">
160               <title>
161                   Top-Level Title
162               <paragraph>
163                   A paragraph.
164
165    2. If step 1 successfully determines the document title, we
166       continue by checking for a subtitle.
167
168       If the lone top-level section itself contains a single
169       second-level section as its first non-comment element, that
170       section's title is promoted to the document's subtitle, and
171       that section's contents become the document's immediate
172       contents. Given this input text::
173
174           =================
175            Top-Level Title
176           =================
177
178           Second-Level Title
179           ~~~~~~~~~~~~~~~~~~
180
181           A paragraph.
182
183       After parsing and running the Section Promotion transform, the
184       result is::
185
186           <document names="top-level title">
187               <title>
188                   Top-Level Title
189               <subtitle names="second-level title">
190                   Second-Level Title
191               <paragraph>
192                   A paragraph.
193
194       (Note that the implicit hyperlink target generated by the
195       "Second-Level Title" is preserved on the "subtitle" element
196       itself.)
197
198    Any comment elements occurring before the document title or
199    subtitle are accumulated and inserted as the first body elements
200    after the title(s).
201
202    This transform also sets the document's metadata title
203    (document['title']).
204
205    .. _reStructuredText: http://docutils.sf.net/rst.html
206    """
207
208    default_priority = 320
209
210    def set_metadata(self):
211        """
212        Set document['title'] metadata title from the following
213        sources, listed in order of priority:
214
215        * Existing document['title'] attribute.
216        * "title" setting.
217        * Document title node (as promoted by promote_title).
218        """
219        if not self.document.hasattr('title'):
220            if self.document.settings.title is not None:
221                self.document['title'] = self.document.settings.title
222            elif len(self.document) and isinstance(self.document[0], nodes.title):
223                self.document['title'] = self.document[0].astext()
224
225    def apply(self):
226        if getattr(self.document.settings, 'doctitle_xform', 1):
227            # promote_(sub)title defined in TitlePromoter base class.
228            if self.promote_title(self.document):
229                # If a title has been promoted, also try to promote a
230                # subtitle.
231                self.promote_subtitle(self.document)
232        # Set document['title'].
233        self.set_metadata()
234
235
236class SectionSubTitle(TitlePromoter):
237
238    """
239    This works like document subtitles, but for sections.  For example, ::
240
241        <section>
242            <title>
243                Title
244            <section>
245                <title>
246                    Subtitle
247                ...
248
249    is transformed into ::
250
251        <section>
252            <title>
253                Title
254            <subtitle>
255                Subtitle
256            ...
257
258    For details refer to the docstring of DocTitle.
259    """
260
261    default_priority = 350
262
263    def apply(self):
264        if not getattr(self.document.settings, 'sectsubtitle_xform', 1):
265            return
266        for section in self.document.traverse(nodes.section):
267            # On our way through the node tree, we are deleting
268            # sections, but we call self.promote_subtitle for those
269            # sections nonetheless.  To do: Write a test case which
270            # shows the problem and discuss on Docutils-develop.
271            self.promote_subtitle(section)
272
273
274class DocInfo(Transform):
275
276    """
277    This transform is specific to the reStructuredText_ markup syntax;
278    see "Bibliographic Fields" in the `reStructuredText Markup
279    Specification`_ for a high-level description. This transform
280    should be run *after* the `DocTitle` transform.
281
282    Given a field list as the first non-comment element after the
283    document title and subtitle (if present), registered bibliographic
284    field names are transformed to the corresponding DTD elements,
285    becoming child elements of the "docinfo" element (except for a
286    dedication and/or an abstract, which become "topic" elements after
287    "docinfo").
288
289    For example, given this document fragment after parsing::
290
291        <document>
292            <title>
293                Document Title
294            <field_list>
295                <field>
296                    <field_name>
297                        Author
298                    <field_body>
299                        <paragraph>
300                            A. Name
301                <field>
302                    <field_name>
303                        Status
304                    <field_body>
305                        <paragraph>
306                            $RCSfile$
307            ...
308
309    After running the bibliographic field list transform, the
310    resulting document tree would look like this::
311
312        <document>
313            <title>
314                Document Title
315            <docinfo>
316                <author>
317                    A. Name
318                <status>
319                    frontmatter.py
320            ...
321
322    The "Status" field contained an expanded RCS keyword, which is
323    normally (but optionally) cleaned up by the transform. The sole
324    contents of the field body must be a paragraph containing an
325    expanded RCS keyword of the form "$keyword: expansion text $". Any
326    RCS keyword can be processed in any bibliographic field. The
327    dollar signs and leading RCS keyword name are removed. Extra
328    processing is done for the following RCS keywords:
329
330    - "RCSfile" expands to the name of the file in the RCS or CVS
331      repository, which is the name of the source file with a ",v"
332      suffix appended. The transform will remove the ",v" suffix.
333
334    - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC
335      time zone). The RCS Keywords transform will extract just the
336      date itself and transform it to an ISO 8601 format date, as in
337      "2000-12-31".
338
339      (Since the source file for this text is itself stored under CVS,
340      we can't show an example of the "Date" RCS keyword because we
341      can't prevent any RCS keywords used in this explanation from
342      being expanded. Only the "RCSfile" keyword is stable; its
343      expansion text changes only if the file name changes.)
344
345    .. _reStructuredText: http://docutils.sf.net/rst.html
346    .. _reStructuredText Markup Specification:
347       http://docutils.sf.net/docs/ref/rst/restructuredtext.html
348    """
349
350    default_priority = 340
351
352    biblio_nodes = {
353          'author': nodes.author,
354          'authors': nodes.authors,
355          'organization': nodes.organization,
356          'address': nodes.address,
357          'contact': nodes.contact,
358          'version': nodes.version,
359          'revision': nodes.revision,
360          'status': nodes.status,
361          'date': nodes.date,
362          'copyright': nodes.copyright,
363          'dedication': nodes.topic,
364          'abstract': nodes.topic}
365    """Canonical field name (lowcased) to node class name mapping for
366    bibliographic fields (field_list)."""
367
368    def apply(self):
369        if not getattr(self.document.settings, 'docinfo_xform', 1):
370            return
371        document = self.document
372        index = document.first_child_not_matching_class(
373              nodes.PreBibliographic)
374        if index is None:
375            return
376        candidate = document[index]
377        if isinstance(candidate, nodes.field_list):
378            biblioindex = document.first_child_not_matching_class(
379                  (nodes.Titular, nodes.Decorative))
380            nodelist = self.extract_bibliographic(candidate)
381            del document[index]         # untransformed field list (candidate)
382            document[biblioindex:biblioindex] = nodelist
383
384    def extract_bibliographic(self, field_list):
385        docinfo = nodes.docinfo()
386        bibliofields = self.language.bibliographic_fields
387        labels = self.language.labels
388        topics = {'dedication': None, 'abstract': None}
389        for field in field_list:
390            try:
391                name = field[0][0].astext()
392                normedname = nodes.fully_normalize_name(name)
393                if not (len(field) == 2 and bibliofields.has_key(normedname)
394                        and self.check_empty_biblio_field(field, name)):
395                    raise TransformError
396                canonical = bibliofields[normedname]
397                biblioclass = self.biblio_nodes[canonical]
398                if issubclass(biblioclass, nodes.TextElement):
399                    if not self.check_compound_biblio_field(field, name):
400                        raise TransformError
401                    utils.clean_rcs_keywords(
402                          field[1][0], self.rcs_keyword_substitutions)
403                    docinfo.append(biblioclass('', '', *field[1][0]))
404                elif issubclass(biblioclass, nodes.authors):
405                    self.extract_authors(field, name, docinfo)
406                elif issubclass(biblioclass, nodes.topic):
407                    if topics[canonical]:
408                        field[-1] += self.document.reporter.warning(
409                            'There can only be one "%s" field.' % name,
410                            base_node=field)
411                        raise TransformError
412                    title = nodes.title(name, labels[canonical])
413                    topics[canonical] = biblioclass(
414                        '', title, classes=[canonical], *field[1].children)
415                else:
416                    docinfo.append(biblioclass('', *field[1].children))
417            except TransformError:
418                if len(field[-1]) == 1 \
419                       and isinstance(field[-1][0], nodes.paragraph):
420                    utils.clean_rcs_keywords(
421                        field[-1][0], self.rcs_keyword_substitutions)
422                docinfo.append(field)
423        nodelist = []
424        if len(docinfo) != 0:
425            nodelist.append(docinfo)
426        for name in ('dedication', 'abstract'):
427            if topics[name]:
428                nodelist.append(topics[name])
429        return nodelist
430
431    def check_empty_biblio_field(self, field, name):
432        if len(field[-1]) < 1:
433            field[-1] += self.document.reporter.warning(
434                  'Cannot extract empty bibliographic field "%s".' % name,
435                  base_node=field)
436            return None
437        return 1
438
439    def check_compound_biblio_field(self, field, name):
440        if len(field[-1]) > 1:
441            field[-1] += self.document.reporter.warning(
442                  'Cannot extract compound bibliographic field "%s".' % name,
443                  base_node=field)
444            return None
445        if not isinstance(field[-1][0], nodes.paragraph):
446            field[-1] += self.document.reporter.warning(
447                  'Cannot extract bibliographic field "%s" containing '
448                  'anything other than a single paragraph.' % name,
449                  base_node=field)
450            return None
451        return 1
452
453    rcs_keyword_substitutions = [
454          (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+'
455                      r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'),
456          (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'),
457          (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),]
458
459    def extract_authors(self, field, name, docinfo):
460        try:
461            if len(field[1]) == 1:
462                if isinstance(field[1][0], nodes.paragraph):
463                    authors = self.authors_from_one_paragraph(field)
464                elif isinstance(field[1][0], nodes.bullet_list):
465                    authors = self.authors_from_bullet_list(field)
466                else:
467                    raise TransformError
468            else:
469                authors = self.authors_from_paragraphs(field)
470            authornodes = [nodes.author('', '', *author)
471                           for author in authors if author]
472            if len(authornodes) >= 1:
473                docinfo.append(nodes.authors('', *authornodes))
474            else:
475                raise TransformError
476        except TransformError:
477            field[-1] += self.document.reporter.warning(
478                  'Bibliographic field "%s" incompatible with extraction: '
479                  'it must contain either a single paragraph (with authors '
480                  'separated by one of "%s"), multiple paragraphs (one per '
481                  'author), or a bullet list with one paragraph (one author) '
482                  'per item.'
483                  % (name, ''.join(self.language.author_separators)),
484                  base_node=field)
485            raise
486
487    def authors_from_one_paragraph(self, field):
488        text = field[1][0].astext().strip()
489        if not text:
490            raise TransformError
491        for authorsep in self.language.author_separators:
492            authornames = text.split(authorsep)
493            if len(authornames) > 1:
494                break
495        authornames = [author.strip() for author in authornames]
496        authors = [[nodes.Text(author)] for author in authornames if author]
497        return authors
498
499    def authors_from_bullet_list(self, field):
500        authors = []
501        for item in field[1][0]:
502            if len(item) != 1 or not isinstance(item[0], nodes.paragraph):
503                raise TransformError
504            authors.append(item[0].children)
505        if not authors:
506            raise TransformError
507        return authors
508
509    def authors_from_paragraphs(self, field):
510        for item in field[1]:
511            if not isinstance(item, nodes.paragraph):
512                raise TransformError
513        authors = [item.children for item in field[1]]
514        return authors
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。