1 | # Authors: David Goodger, Ueli Schlaepfer |
---|
2 | # Contact: goodger@users.sourceforge.net |
---|
3 | # Revision: $Revision: 4242 $ |
---|
4 | # Date: $Date: 2006-01-06 00:28:53 +0100 (Fri, 06 Jan 2006) $ |
---|
5 | # Copyright: This module has been placed in the public domain. |
---|
6 | |
---|
7 | """ |
---|
8 | Transforms related to the front matter of a document or a section |
---|
9 | (information found before the main text): |
---|
10 | |
---|
11 | - `DocTitle`: Used to transform a lone top level section's title to |
---|
12 | the document title, promote a remaining lone top-level section's |
---|
13 | title to the document subtitle, and determine the document's title |
---|
14 | metadata (document['title']) based on the document title and/or the |
---|
15 | "title" setting. |
---|
16 | |
---|
17 | - `SectionSubTitle`: Used to transform a lone subsection into a |
---|
18 | subtitle. |
---|
19 | |
---|
20 | - `DocInfo`: Used to transform a bibliographic field list into docinfo |
---|
21 | elements. |
---|
22 | """ |
---|
23 | |
---|
24 | __docformat__ = 'reStructuredText' |
---|
25 | |
---|
26 | import re |
---|
27 | from docutils import nodes, utils |
---|
28 | from docutils.transforms import TransformError, Transform |
---|
29 | |
---|
30 | |
---|
31 | class TitlePromoter(Transform): |
---|
32 | |
---|
33 | """ |
---|
34 | Abstract base class for DocTitle and SectionSubTitle transforms. |
---|
35 | """ |
---|
36 | |
---|
37 | def promote_title(self, node): |
---|
38 | """ |
---|
39 | Transform the following tree:: |
---|
40 | |
---|
41 | <node> |
---|
42 | <section> |
---|
43 | <title> |
---|
44 | ... |
---|
45 | |
---|
46 | into :: |
---|
47 | |
---|
48 | <node> |
---|
49 | <title> |
---|
50 | ... |
---|
51 | |
---|
52 | `node` is normally a document. |
---|
53 | """ |
---|
54 | # `node` must not have a title yet. |
---|
55 | assert not (len(node) and isinstance(node[0], nodes.title)) |
---|
56 | section, index = self.candidate_index(node) |
---|
57 | if index is None: |
---|
58 | return None |
---|
59 | # Transfer the section's attributes to the node: |
---|
60 | node.attributes.update(section.attributes) |
---|
61 | # setup_child is called automatically for all nodes. |
---|
62 | node[:] = (section[:1] # section title |
---|
63 | + node[:index] # everything that was in the |
---|
64 | # node before the section |
---|
65 | + section[1:]) # everything that was in the section |
---|
66 | assert isinstance(node[0], nodes.title) |
---|
67 | return 1 |
---|
68 | |
---|
69 | def promote_subtitle(self, node): |
---|
70 | """ |
---|
71 | Transform the following node tree:: |
---|
72 | |
---|
73 | <node> |
---|
74 | <title> |
---|
75 | <section> |
---|
76 | <title> |
---|
77 | ... |
---|
78 | |
---|
79 | into :: |
---|
80 | |
---|
81 | <node> |
---|
82 | <title> |
---|
83 | <subtitle> |
---|
84 | ... |
---|
85 | """ |
---|
86 | subsection, index = self.candidate_index(node) |
---|
87 | if index is None: |
---|
88 | return None |
---|
89 | subtitle = nodes.subtitle() |
---|
90 | # Transfer the subsection's attributes to the new subtitle: |
---|
91 | # This causes trouble with list attributes! To do: Write a |
---|
92 | # test case which catches direct access to the `attributes` |
---|
93 | # dictionary and/or write a test case which shows problems in |
---|
94 | # this particular case. |
---|
95 | subtitle.attributes.update(subsection.attributes) |
---|
96 | # We're losing the subtitle's attributes here! To do: Write a |
---|
97 | # test case which shows this behavior. |
---|
98 | # Transfer the contents of the subsection's title to the |
---|
99 | # subtitle: |
---|
100 | subtitle[:] = subsection[0][:] |
---|
101 | node[:] = (node[:1] # title |
---|
102 | + [subtitle] |
---|
103 | # everything that was before the section: |
---|
104 | + node[1:index] |
---|
105 | # everything that was in the subsection: |
---|
106 | + subsection[1:]) |
---|
107 | return 1 |
---|
108 | |
---|
109 | def candidate_index(self, node): |
---|
110 | """ |
---|
111 | Find and return the promotion candidate and its index. |
---|
112 | |
---|
113 | Return (None, None) if no valid candidate was found. |
---|
114 | """ |
---|
115 | index = node.first_child_not_matching_class( |
---|
116 | nodes.PreBibliographic) |
---|
117 | if index is None or len(node) > (index + 1) or \ |
---|
118 | not isinstance(node[index], nodes.section): |
---|
119 | return None, None |
---|
120 | else: |
---|
121 | return node[index], index |
---|
122 | |
---|
123 | |
---|
124 | class DocTitle(TitlePromoter): |
---|
125 | |
---|
126 | """ |
---|
127 | In reStructuredText_, there is no way to specify a document title |
---|
128 | and subtitle explicitly. Instead, we can supply the document title |
---|
129 | (and possibly the subtitle as well) implicitly, and use this |
---|
130 | two-step transform to "raise" or "promote" the title(s) (and their |
---|
131 | corresponding section contents) to the document level. |
---|
132 | |
---|
133 | 1. If the document contains a single top-level section as its |
---|
134 | first non-comment element, the top-level section's title |
---|
135 | becomes the document's title, and the top-level section's |
---|
136 | contents become the document's immediate contents. The lone |
---|
137 | top-level section header must be the first non-comment element |
---|
138 | in the document. |
---|
139 | |
---|
140 | For example, take this input text:: |
---|
141 | |
---|
142 | ================= |
---|
143 | Top-Level Title |
---|
144 | ================= |
---|
145 | |
---|
146 | A paragraph. |
---|
147 | |
---|
148 | Once parsed, it looks like this:: |
---|
149 | |
---|
150 | <document> |
---|
151 | <section names="top-level title"> |
---|
152 | <title> |
---|
153 | Top-Level Title |
---|
154 | <paragraph> |
---|
155 | A paragraph. |
---|
156 | |
---|
157 | After running the DocTitle transform, we have:: |
---|
158 | |
---|
159 | <document names="top-level title"> |
---|
160 | <title> |
---|
161 | Top-Level Title |
---|
162 | <paragraph> |
---|
163 | A paragraph. |
---|
164 | |
---|
165 | 2. If step 1 successfully determines the document title, we |
---|
166 | continue by checking for a subtitle. |
---|
167 | |
---|
168 | If the lone top-level section itself contains a single |
---|
169 | second-level section as its first non-comment element, that |
---|
170 | section's title is promoted to the document's subtitle, and |
---|
171 | that section's contents become the document's immediate |
---|
172 | contents. Given this input text:: |
---|
173 | |
---|
174 | ================= |
---|
175 | Top-Level Title |
---|
176 | ================= |
---|
177 | |
---|
178 | Second-Level Title |
---|
179 | ~~~~~~~~~~~~~~~~~~ |
---|
180 | |
---|
181 | A paragraph. |
---|
182 | |
---|
183 | After parsing and running the Section Promotion transform, the |
---|
184 | result is:: |
---|
185 | |
---|
186 | <document names="top-level title"> |
---|
187 | <title> |
---|
188 | Top-Level Title |
---|
189 | <subtitle names="second-level title"> |
---|
190 | Second-Level Title |
---|
191 | <paragraph> |
---|
192 | A paragraph. |
---|
193 | |
---|
194 | (Note that the implicit hyperlink target generated by the |
---|
195 | "Second-Level Title" is preserved on the "subtitle" element |
---|
196 | itself.) |
---|
197 | |
---|
198 | Any comment elements occurring before the document title or |
---|
199 | subtitle are accumulated and inserted as the first body elements |
---|
200 | after the title(s). |
---|
201 | |
---|
202 | This transform also sets the document's metadata title |
---|
203 | (document['title']). |
---|
204 | |
---|
205 | .. _reStructuredText: http://docutils.sf.net/rst.html |
---|
206 | """ |
---|
207 | |
---|
208 | default_priority = 320 |
---|
209 | |
---|
210 | def set_metadata(self): |
---|
211 | """ |
---|
212 | Set document['title'] metadata title from the following |
---|
213 | sources, listed in order of priority: |
---|
214 | |
---|
215 | * Existing document['title'] attribute. |
---|
216 | * "title" setting. |
---|
217 | * Document title node (as promoted by promote_title). |
---|
218 | """ |
---|
219 | if not self.document.hasattr('title'): |
---|
220 | if self.document.settings.title is not None: |
---|
221 | self.document['title'] = self.document.settings.title |
---|
222 | elif len(self.document) and isinstance(self.document[0], nodes.title): |
---|
223 | self.document['title'] = self.document[0].astext() |
---|
224 | |
---|
225 | def apply(self): |
---|
226 | if getattr(self.document.settings, 'doctitle_xform', 1): |
---|
227 | # promote_(sub)title defined in TitlePromoter base class. |
---|
228 | if self.promote_title(self.document): |
---|
229 | # If a title has been promoted, also try to promote a |
---|
230 | # subtitle. |
---|
231 | self.promote_subtitle(self.document) |
---|
232 | # Set document['title']. |
---|
233 | self.set_metadata() |
---|
234 | |
---|
235 | |
---|
236 | class SectionSubTitle(TitlePromoter): |
---|
237 | |
---|
238 | """ |
---|
239 | This works like document subtitles, but for sections. For example, :: |
---|
240 | |
---|
241 | <section> |
---|
242 | <title> |
---|
243 | Title |
---|
244 | <section> |
---|
245 | <title> |
---|
246 | Subtitle |
---|
247 | ... |
---|
248 | |
---|
249 | is transformed into :: |
---|
250 | |
---|
251 | <section> |
---|
252 | <title> |
---|
253 | Title |
---|
254 | <subtitle> |
---|
255 | Subtitle |
---|
256 | ... |
---|
257 | |
---|
258 | For details refer to the docstring of DocTitle. |
---|
259 | """ |
---|
260 | |
---|
261 | default_priority = 350 |
---|
262 | |
---|
263 | def apply(self): |
---|
264 | if not getattr(self.document.settings, 'sectsubtitle_xform', 1): |
---|
265 | return |
---|
266 | for section in self.document.traverse(nodes.section): |
---|
267 | # On our way through the node tree, we are deleting |
---|
268 | # sections, but we call self.promote_subtitle for those |
---|
269 | # sections nonetheless. To do: Write a test case which |
---|
270 | # shows the problem and discuss on Docutils-develop. |
---|
271 | self.promote_subtitle(section) |
---|
272 | |
---|
273 | |
---|
274 | class DocInfo(Transform): |
---|
275 | |
---|
276 | """ |
---|
277 | This transform is specific to the reStructuredText_ markup syntax; |
---|
278 | see "Bibliographic Fields" in the `reStructuredText Markup |
---|
279 | Specification`_ for a high-level description. This transform |
---|
280 | should be run *after* the `DocTitle` transform. |
---|
281 | |
---|
282 | Given a field list as the first non-comment element after the |
---|
283 | document title and subtitle (if present), registered bibliographic |
---|
284 | field names are transformed to the corresponding DTD elements, |
---|
285 | becoming child elements of the "docinfo" element (except for a |
---|
286 | dedication and/or an abstract, which become "topic" elements after |
---|
287 | "docinfo"). |
---|
288 | |
---|
289 | For example, given this document fragment after parsing:: |
---|
290 | |
---|
291 | <document> |
---|
292 | <title> |
---|
293 | Document Title |
---|
294 | <field_list> |
---|
295 | <field> |
---|
296 | <field_name> |
---|
297 | Author |
---|
298 | <field_body> |
---|
299 | <paragraph> |
---|
300 | A. Name |
---|
301 | <field> |
---|
302 | <field_name> |
---|
303 | Status |
---|
304 | <field_body> |
---|
305 | <paragraph> |
---|
306 | $RCSfile$ |
---|
307 | ... |
---|
308 | |
---|
309 | After running the bibliographic field list transform, the |
---|
310 | resulting document tree would look like this:: |
---|
311 | |
---|
312 | <document> |
---|
313 | <title> |
---|
314 | Document Title |
---|
315 | <docinfo> |
---|
316 | <author> |
---|
317 | A. Name |
---|
318 | <status> |
---|
319 | frontmatter.py |
---|
320 | ... |
---|
321 | |
---|
322 | The "Status" field contained an expanded RCS keyword, which is |
---|
323 | normally (but optionally) cleaned up by the transform. The sole |
---|
324 | contents of the field body must be a paragraph containing an |
---|
325 | expanded RCS keyword of the form "$keyword: expansion text $". Any |
---|
326 | RCS keyword can be processed in any bibliographic field. The |
---|
327 | dollar signs and leading RCS keyword name are removed. Extra |
---|
328 | processing is done for the following RCS keywords: |
---|
329 | |
---|
330 | - "RCSfile" expands to the name of the file in the RCS or CVS |
---|
331 | repository, which is the name of the source file with a ",v" |
---|
332 | suffix appended. The transform will remove the ",v" suffix. |
---|
333 | |
---|
334 | - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC |
---|
335 | time zone). The RCS Keywords transform will extract just the |
---|
336 | date itself and transform it to an ISO 8601 format date, as in |
---|
337 | "2000-12-31". |
---|
338 | |
---|
339 | (Since the source file for this text is itself stored under CVS, |
---|
340 | we can't show an example of the "Date" RCS keyword because we |
---|
341 | can't prevent any RCS keywords used in this explanation from |
---|
342 | being expanded. Only the "RCSfile" keyword is stable; its |
---|
343 | expansion text changes only if the file name changes.) |
---|
344 | |
---|
345 | .. _reStructuredText: http://docutils.sf.net/rst.html |
---|
346 | .. _reStructuredText Markup Specification: |
---|
347 | http://docutils.sf.net/docs/ref/rst/restructuredtext.html |
---|
348 | """ |
---|
349 | |
---|
350 | default_priority = 340 |
---|
351 | |
---|
352 | biblio_nodes = { |
---|
353 | 'author': nodes.author, |
---|
354 | 'authors': nodes.authors, |
---|
355 | 'organization': nodes.organization, |
---|
356 | 'address': nodes.address, |
---|
357 | 'contact': nodes.contact, |
---|
358 | 'version': nodes.version, |
---|
359 | 'revision': nodes.revision, |
---|
360 | 'status': nodes.status, |
---|
361 | 'date': nodes.date, |
---|
362 | 'copyright': nodes.copyright, |
---|
363 | 'dedication': nodes.topic, |
---|
364 | 'abstract': nodes.topic} |
---|
365 | """Canonical field name (lowcased) to node class name mapping for |
---|
366 | bibliographic fields (field_list).""" |
---|
367 | |
---|
368 | def apply(self): |
---|
369 | if not getattr(self.document.settings, 'docinfo_xform', 1): |
---|
370 | return |
---|
371 | document = self.document |
---|
372 | index = document.first_child_not_matching_class( |
---|
373 | nodes.PreBibliographic) |
---|
374 | if index is None: |
---|
375 | return |
---|
376 | candidate = document[index] |
---|
377 | if isinstance(candidate, nodes.field_list): |
---|
378 | biblioindex = document.first_child_not_matching_class( |
---|
379 | (nodes.Titular, nodes.Decorative)) |
---|
380 | nodelist = self.extract_bibliographic(candidate) |
---|
381 | del document[index] # untransformed field list (candidate) |
---|
382 | document[biblioindex:biblioindex] = nodelist |
---|
383 | |
---|
384 | def extract_bibliographic(self, field_list): |
---|
385 | docinfo = nodes.docinfo() |
---|
386 | bibliofields = self.language.bibliographic_fields |
---|
387 | labels = self.language.labels |
---|
388 | topics = {'dedication': None, 'abstract': None} |
---|
389 | for field in field_list: |
---|
390 | try: |
---|
391 | name = field[0][0].astext() |
---|
392 | normedname = nodes.fully_normalize_name(name) |
---|
393 | if not (len(field) == 2 and bibliofields.has_key(normedname) |
---|
394 | and self.check_empty_biblio_field(field, name)): |
---|
395 | raise TransformError |
---|
396 | canonical = bibliofields[normedname] |
---|
397 | biblioclass = self.biblio_nodes[canonical] |
---|
398 | if issubclass(biblioclass, nodes.TextElement): |
---|
399 | if not self.check_compound_biblio_field(field, name): |
---|
400 | raise TransformError |
---|
401 | utils.clean_rcs_keywords( |
---|
402 | field[1][0], self.rcs_keyword_substitutions) |
---|
403 | docinfo.append(biblioclass('', '', *field[1][0])) |
---|
404 | elif issubclass(biblioclass, nodes.authors): |
---|
405 | self.extract_authors(field, name, docinfo) |
---|
406 | elif issubclass(biblioclass, nodes.topic): |
---|
407 | if topics[canonical]: |
---|
408 | field[-1] += self.document.reporter.warning( |
---|
409 | 'There can only be one "%s" field.' % name, |
---|
410 | base_node=field) |
---|
411 | raise TransformError |
---|
412 | title = nodes.title(name, labels[canonical]) |
---|
413 | topics[canonical] = biblioclass( |
---|
414 | '', title, classes=[canonical], *field[1].children) |
---|
415 | else: |
---|
416 | docinfo.append(biblioclass('', *field[1].children)) |
---|
417 | except TransformError: |
---|
418 | if len(field[-1]) == 1 \ |
---|
419 | and isinstance(field[-1][0], nodes.paragraph): |
---|
420 | utils.clean_rcs_keywords( |
---|
421 | field[-1][0], self.rcs_keyword_substitutions) |
---|
422 | docinfo.append(field) |
---|
423 | nodelist = [] |
---|
424 | if len(docinfo) != 0: |
---|
425 | nodelist.append(docinfo) |
---|
426 | for name in ('dedication', 'abstract'): |
---|
427 | if topics[name]: |
---|
428 | nodelist.append(topics[name]) |
---|
429 | return nodelist |
---|
430 | |
---|
431 | def check_empty_biblio_field(self, field, name): |
---|
432 | if len(field[-1]) < 1: |
---|
433 | field[-1] += self.document.reporter.warning( |
---|
434 | 'Cannot extract empty bibliographic field "%s".' % name, |
---|
435 | base_node=field) |
---|
436 | return None |
---|
437 | return 1 |
---|
438 | |
---|
439 | def check_compound_biblio_field(self, field, name): |
---|
440 | if len(field[-1]) > 1: |
---|
441 | field[-1] += self.document.reporter.warning( |
---|
442 | 'Cannot extract compound bibliographic field "%s".' % name, |
---|
443 | base_node=field) |
---|
444 | return None |
---|
445 | if not isinstance(field[-1][0], nodes.paragraph): |
---|
446 | field[-1] += self.document.reporter.warning( |
---|
447 | 'Cannot extract bibliographic field "%s" containing ' |
---|
448 | 'anything other than a single paragraph.' % name, |
---|
449 | base_node=field) |
---|
450 | return None |
---|
451 | return 1 |
---|
452 | |
---|
453 | rcs_keyword_substitutions = [ |
---|
454 | (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' |
---|
455 | r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'), |
---|
456 | (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'), |
---|
457 | (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),] |
---|
458 | |
---|
459 | def extract_authors(self, field, name, docinfo): |
---|
460 | try: |
---|
461 | if len(field[1]) == 1: |
---|
462 | if isinstance(field[1][0], nodes.paragraph): |
---|
463 | authors = self.authors_from_one_paragraph(field) |
---|
464 | elif isinstance(field[1][0], nodes.bullet_list): |
---|
465 | authors = self.authors_from_bullet_list(field) |
---|
466 | else: |
---|
467 | raise TransformError |
---|
468 | else: |
---|
469 | authors = self.authors_from_paragraphs(field) |
---|
470 | authornodes = [nodes.author('', '', *author) |
---|
471 | for author in authors if author] |
---|
472 | if len(authornodes) >= 1: |
---|
473 | docinfo.append(nodes.authors('', *authornodes)) |
---|
474 | else: |
---|
475 | raise TransformError |
---|
476 | except TransformError: |
---|
477 | field[-1] += self.document.reporter.warning( |
---|
478 | 'Bibliographic field "%s" incompatible with extraction: ' |
---|
479 | 'it must contain either a single paragraph (with authors ' |
---|
480 | 'separated by one of "%s"), multiple paragraphs (one per ' |
---|
481 | 'author), or a bullet list with one paragraph (one author) ' |
---|
482 | 'per item.' |
---|
483 | % (name, ''.join(self.language.author_separators)), |
---|
484 | base_node=field) |
---|
485 | raise |
---|
486 | |
---|
487 | def authors_from_one_paragraph(self, field): |
---|
488 | text = field[1][0].astext().strip() |
---|
489 | if not text: |
---|
490 | raise TransformError |
---|
491 | for authorsep in self.language.author_separators: |
---|
492 | authornames = text.split(authorsep) |
---|
493 | if len(authornames) > 1: |
---|
494 | break |
---|
495 | authornames = [author.strip() for author in authornames] |
---|
496 | authors = [[nodes.Text(author)] for author in authornames if author] |
---|
497 | return authors |
---|
498 | |
---|
499 | def authors_from_bullet_list(self, field): |
---|
500 | authors = [] |
---|
501 | for item in field[1][0]: |
---|
502 | if len(item) != 1 or not isinstance(item[0], nodes.paragraph): |
---|
503 | raise TransformError |
---|
504 | authors.append(item[0].children) |
---|
505 | if not authors: |
---|
506 | raise TransformError |
---|
507 | return authors |
---|
508 | |
---|
509 | def authors_from_paragraphs(self, field): |
---|
510 | for item in field[1]: |
---|
511 | if not isinstance(item, nodes.paragraph): |
---|
512 | raise TransformError |
---|
513 | authors = [item.children for item in field[1]] |
---|
514 | return authors |
---|