| 1 | # Author: David Goodger |
|---|
| 2 | # Contact: goodger@users.sourceforge.net |
|---|
| 3 | # Revision: $Revision: 3416 $ |
|---|
| 4 | # Date: $Date: 2005-06-01 15:52:43 +0200 (Wed, 01 Jun 2005) $ |
|---|
| 5 | # Copyright: This module has been placed in the public domain. |
|---|
| 6 | |
|---|
| 7 | """ |
|---|
| 8 | This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`, |
|---|
| 9 | the reStructuredText parser. |
|---|
| 10 | |
|---|
| 11 | |
|---|
| 12 | Usage |
|---|
| 13 | ===== |
|---|
| 14 | |
|---|
| 15 | 1. Create a parser:: |
|---|
| 16 | |
|---|
| 17 | parser = docutils.parsers.rst.Parser() |
|---|
| 18 | |
|---|
| 19 | Several optional arguments may be passed to modify the parser's behavior. |
|---|
| 20 | Please see `Customizing the Parser`_ below for details. |
|---|
| 21 | |
|---|
| 22 | 2. Gather input (a multi-line string), by reading a file or the standard |
|---|
| 23 | input:: |
|---|
| 24 | |
|---|
| 25 | input = sys.stdin.read() |
|---|
| 26 | |
|---|
| 27 | 3. Create a new empty `docutils.nodes.document` tree:: |
|---|
| 28 | |
|---|
| 29 | document = docutils.utils.new_document(source, settings) |
|---|
| 30 | |
|---|
| 31 | See `docutils.utils.new_document()` for parameter details. |
|---|
| 32 | |
|---|
| 33 | 4. Run the parser, populating the document tree:: |
|---|
| 34 | |
|---|
| 35 | parser.parse(input, document) |
|---|
| 36 | |
|---|
| 37 | |
|---|
| 38 | Parser Overview |
|---|
| 39 | =============== |
|---|
| 40 | |
|---|
| 41 | The reStructuredText parser is implemented as a state machine, examining its |
|---|
| 42 | input one line at a time. To understand how the parser works, please first |
|---|
| 43 | become familiar with the `docutils.statemachine` module, then see the |
|---|
| 44 | `states` module. |
|---|
| 45 | |
|---|
| 46 | |
|---|
| 47 | Customizing the Parser |
|---|
| 48 | ---------------------- |
|---|
| 49 | |
|---|
| 50 | Anything that isn't already customizable is that way simply because that type |
|---|
| 51 | of customizability hasn't been implemented yet. Patches welcome! |
|---|
| 52 | |
|---|
| 53 | When instantiating an object of the `Parser` class, two parameters may be |
|---|
| 54 | passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=1`` to enable an initial |
|---|
| 55 | RFC-2822 style header block, parsed as a "field_list" element (with "class" |
|---|
| 56 | attribute set to "rfc2822"). Currently this is the only body-level element |
|---|
| 57 | which is customizable without subclassing. (Tip: subclass `Parser` and change |
|---|
| 58 | its "state_classes" and "initial_state" attributes to refer to new classes. |
|---|
| 59 | Contact the author if you need more details.) |
|---|
| 60 | |
|---|
| 61 | The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass. |
|---|
| 62 | It handles inline markup recognition. A common extension is the addition of |
|---|
| 63 | further implicit hyperlinks, like "RFC 2822". This can be done by subclassing |
|---|
| 64 | `states.Inliner`, adding a new method for the implicit markup, and adding a |
|---|
| 65 | ``(pattern, method)`` pair to the "implicit_dispatch" attribute of the |
|---|
| 66 | subclass. See `states.Inliner.implicit_inline()` for details. Explicit |
|---|
| 67 | inline markup can be customized in a `states.Inliner` subclass via the |
|---|
| 68 | ``patterns.initial`` and ``dispatch`` attributes (and new methods as |
|---|
| 69 | appropriate). |
|---|
| 70 | """ |
|---|
| 71 | |
|---|
| 72 | __docformat__ = 'reStructuredText' |
|---|
| 73 | |
|---|
| 74 | |
|---|
| 75 | import docutils.parsers |
|---|
| 76 | import docutils.statemachine |
|---|
| 77 | from docutils.parsers.rst import states |
|---|
| 78 | from docutils import frontend |
|---|
| 79 | |
|---|
| 80 | |
|---|
| 81 | class Parser(docutils.parsers.Parser): |
|---|
| 82 | |
|---|
| 83 | """The reStructuredText parser.""" |
|---|
| 84 | |
|---|
| 85 | supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx') |
|---|
| 86 | """Aliases this parser supports.""" |
|---|
| 87 | |
|---|
| 88 | settings_spec = ( |
|---|
| 89 | 'reStructuredText Parser Options', |
|---|
| 90 | None, |
|---|
| 91 | (('Recognize and link to standalone PEP references (like "PEP 258").', |
|---|
| 92 | ['--pep-references'], |
|---|
| 93 | {'action': 'store_true', 'validator': frontend.validate_boolean}), |
|---|
| 94 | ('Base URL for PEP references ' |
|---|
| 95 | '(default "http://www.python.org/peps/").', |
|---|
| 96 | ['--pep-base-url'], |
|---|
| 97 | {'metavar': '<URL>', 'default': 'http://www.python.org/peps/', |
|---|
| 98 | 'validator': frontend.validate_url_trailing_slash}), |
|---|
| 99 | ('Recognize and link to standalone RFC references (like "RFC 822").', |
|---|
| 100 | ['--rfc-references'], |
|---|
| 101 | {'action': 'store_true', 'validator': frontend.validate_boolean}), |
|---|
| 102 | ('Base URL for RFC references (default "http://www.faqs.org/rfcs/").', |
|---|
| 103 | ['--rfc-base-url'], |
|---|
| 104 | {'metavar': '<URL>', 'default': 'http://www.faqs.org/rfcs/', |
|---|
| 105 | 'validator': frontend.validate_url_trailing_slash}), |
|---|
| 106 | ('Set number of spaces for tab expansion (default 8).', |
|---|
| 107 | ['--tab-width'], |
|---|
| 108 | {'metavar': '<width>', 'type': 'int', 'default': 8, |
|---|
| 109 | 'validator': frontend.validate_nonnegative_int}), |
|---|
| 110 | ('Remove spaces before footnote references.', |
|---|
| 111 | ['--trim-footnote-reference-space'], |
|---|
| 112 | {'action': 'store_true', 'validator': frontend.validate_boolean}), |
|---|
| 113 | ('Leave spaces before footnote references.', |
|---|
| 114 | ['--leave-footnote-reference-space'], |
|---|
| 115 | {'action': 'store_false', 'dest': 'trim_footnote_reference_space', |
|---|
| 116 | 'validator': frontend.validate_boolean}), |
|---|
| 117 | ('Disable directives that insert the contents of external file ' |
|---|
| 118 | '("include" & "raw"); replaced with a "warning" system message.', |
|---|
| 119 | ['--no-file-insertion'], |
|---|
| 120 | {'action': 'store_false', 'default': 1, |
|---|
| 121 | 'dest': 'file_insertion_enabled'}), |
|---|
| 122 | ('Enable directives that insert the contents of external file ' |
|---|
| 123 | '("include" & "raw"). Enabled by default.', |
|---|
| 124 | ['--file-insertion-enabled'], |
|---|
| 125 | {'action': 'store_true', 'dest': 'file_insertion_enabled'}), |
|---|
| 126 | ('Disable the "raw" directives; replaced with a "warning" ' |
|---|
| 127 | 'system message.', |
|---|
| 128 | ['--no-raw'], |
|---|
| 129 | {'action': 'store_false', 'default': 1, 'dest': 'raw_enabled'}), |
|---|
| 130 | ('Enable the "raw" directive. Enabled by default.', |
|---|
| 131 | ['--raw-enabled'], |
|---|
| 132 | {'action': 'store_true', 'dest': 'raw_enabled'}),)) |
|---|
| 133 | |
|---|
| 134 | config_section = 'restructuredtext parser' |
|---|
| 135 | config_section_dependencies = ('parsers',) |
|---|
| 136 | |
|---|
| 137 | def __init__(self, rfc2822=None, inliner=None): |
|---|
| 138 | if rfc2822: |
|---|
| 139 | self.initial_state = 'RFC2822Body' |
|---|
| 140 | else: |
|---|
| 141 | self.initial_state = 'Body' |
|---|
| 142 | self.state_classes = states.state_classes |
|---|
| 143 | self.inliner = inliner |
|---|
| 144 | |
|---|
| 145 | def parse(self, inputstring, document): |
|---|
| 146 | """Parse `inputstring` and populate `document`, a document tree.""" |
|---|
| 147 | self.setup_parse(inputstring, document) |
|---|
| 148 | self.statemachine = states.RSTStateMachine( |
|---|
| 149 | state_classes=self.state_classes, |
|---|
| 150 | initial_state=self.initial_state, |
|---|
| 151 | debug=document.reporter.debug_flag) |
|---|
| 152 | inputlines = docutils.statemachine.string2lines( |
|---|
| 153 | inputstring, tab_width=document.settings.tab_width, |
|---|
| 154 | convert_whitespace=1) |
|---|
| 155 | self.statemachine.run(inputlines, document, inliner=self.inliner) |
|---|
| 156 | self.finish_parse() |
|---|