1 | # Author: David Goodger |
---|
2 | # Contact: goodger@users.sourceforge.net |
---|
3 | # Revision: $Revision: 3654 $ |
---|
4 | # Date: $Date: 2005-07-03 17:02:15 +0200 (Sun, 03 Jul 2005) $ |
---|
5 | # Copyright: This module has been placed in the public domain. |
---|
6 | |
---|
7 | """ |
---|
8 | I/O classes provide a uniform API for low-level input and output. Subclasses |
---|
9 | will exist for a variety of input/output mechanisms. |
---|
10 | """ |
---|
11 | |
---|
12 | __docformat__ = 'reStructuredText' |
---|
13 | |
---|
14 | import sys |
---|
15 | try: |
---|
16 | import locale |
---|
17 | except: |
---|
18 | pass |
---|
19 | from types import UnicodeType |
---|
20 | from docutils import TransformSpec |
---|
21 | |
---|
22 | |
---|
23 | class Input(TransformSpec): |
---|
24 | |
---|
25 | """ |
---|
26 | Abstract base class for input wrappers. |
---|
27 | """ |
---|
28 | |
---|
29 | component_type = 'input' |
---|
30 | |
---|
31 | default_source_path = None |
---|
32 | |
---|
33 | def __init__(self, source=None, source_path=None, encoding=None, |
---|
34 | error_handler='strict'): |
---|
35 | self.encoding = encoding |
---|
36 | """Text encoding for the input source.""" |
---|
37 | |
---|
38 | self.error_handler = error_handler |
---|
39 | """Text decoding error handler.""" |
---|
40 | |
---|
41 | self.source = source |
---|
42 | """The source of input data.""" |
---|
43 | |
---|
44 | self.source_path = source_path |
---|
45 | """A text reference to the source.""" |
---|
46 | |
---|
47 | if not source_path: |
---|
48 | self.source_path = self.default_source_path |
---|
49 | |
---|
50 | self.successful_encoding = None |
---|
51 | """The encoding that successfully decoded the source data.""" |
---|
52 | |
---|
53 | def __repr__(self): |
---|
54 | return '%s: source=%r, source_path=%r' % (self.__class__, self.source, |
---|
55 | self.source_path) |
---|
56 | |
---|
57 | def read(self): |
---|
58 | raise NotImplementedError |
---|
59 | |
---|
60 | def decode(self, data): |
---|
61 | """ |
---|
62 | Decode a string, `data`, heuristically. |
---|
63 | Raise UnicodeError if unsuccessful. |
---|
64 | |
---|
65 | The client application should call ``locale.setlocale`` at the |
---|
66 | beginning of processing:: |
---|
67 | |
---|
68 | locale.setlocale(locale.LC_ALL, '') |
---|
69 | """ |
---|
70 | if self.encoding and self.encoding.lower() == 'unicode': |
---|
71 | assert isinstance(data, UnicodeType), ( |
---|
72 | 'input encoding is "unicode" ' |
---|
73 | 'but input is not a unicode object') |
---|
74 | if isinstance(data, UnicodeType): |
---|
75 | # Accept unicode even if self.encoding != 'unicode'. |
---|
76 | return data |
---|
77 | encodings = [self.encoding] |
---|
78 | if not self.encoding: |
---|
79 | # Apply heuristics only if no encoding is explicitly given. |
---|
80 | encodings.append('utf-8') |
---|
81 | try: |
---|
82 | encodings.append(locale.nl_langinfo(locale.CODESET)) |
---|
83 | except: |
---|
84 | pass |
---|
85 | try: |
---|
86 | encodings.append(locale.getlocale()[1]) |
---|
87 | except: |
---|
88 | pass |
---|
89 | try: |
---|
90 | encodings.append(locale.getdefaultlocale()[1]) |
---|
91 | except: |
---|
92 | pass |
---|
93 | encodings.append('latin-1') |
---|
94 | error = None |
---|
95 | error_details = '' |
---|
96 | for enc in encodings: |
---|
97 | if not enc: |
---|
98 | continue |
---|
99 | try: |
---|
100 | decoded = unicode(data, enc, self.error_handler) |
---|
101 | self.successful_encoding = enc |
---|
102 | # Return decoded, removing BOMs. |
---|
103 | return decoded.replace(u'\ufeff', u'') |
---|
104 | except (UnicodeError, LookupError), error: |
---|
105 | pass |
---|
106 | if error is not None: |
---|
107 | error_details = '\n(%s: %s)' % (error.__class__.__name__, error) |
---|
108 | raise UnicodeError( |
---|
109 | 'Unable to decode input data. Tried the following encodings: ' |
---|
110 | '%s.%s' |
---|
111 | % (', '.join([repr(enc) for enc in encodings if enc]), |
---|
112 | error_details)) |
---|
113 | |
---|
114 | |
---|
115 | class Output(TransformSpec): |
---|
116 | |
---|
117 | """ |
---|
118 | Abstract base class for output wrappers. |
---|
119 | """ |
---|
120 | |
---|
121 | component_type = 'output' |
---|
122 | |
---|
123 | default_destination_path = None |
---|
124 | |
---|
125 | def __init__(self, destination=None, destination_path=None, |
---|
126 | encoding=None, error_handler='strict'): |
---|
127 | self.encoding = encoding |
---|
128 | """Text encoding for the output destination.""" |
---|
129 | |
---|
130 | self.error_handler = error_handler or 'strict' |
---|
131 | """Text encoding error handler.""" |
---|
132 | |
---|
133 | self.destination = destination |
---|
134 | """The destination for output data.""" |
---|
135 | |
---|
136 | self.destination_path = destination_path |
---|
137 | """A text reference to the destination.""" |
---|
138 | |
---|
139 | if not destination_path: |
---|
140 | self.destination_path = self.default_destination_path |
---|
141 | |
---|
142 | def __repr__(self): |
---|
143 | return ('%s: destination=%r, destination_path=%r' |
---|
144 | % (self.__class__, self.destination, self.destination_path)) |
---|
145 | |
---|
146 | def write(self, data): |
---|
147 | """`data` is a Unicode string, to be encoded by `self.encode`.""" |
---|
148 | raise NotImplementedError |
---|
149 | |
---|
150 | def encode(self, data): |
---|
151 | if self.encoding and self.encoding.lower() == 'unicode': |
---|
152 | assert isinstance(data, UnicodeType), ( |
---|
153 | 'the encoding given is "unicode" but the output is not ' |
---|
154 | 'a Unicode string') |
---|
155 | return data |
---|
156 | if not isinstance(data, UnicodeType): |
---|
157 | # Non-unicode (e.g. binary) output. |
---|
158 | return data |
---|
159 | else: |
---|
160 | try: |
---|
161 | return data.encode(self.encoding, self.error_handler) |
---|
162 | except ValueError: |
---|
163 | # ValueError is raised if there are unencodable chars |
---|
164 | # in data and the error_handler isn't found. |
---|
165 | if self.error_handler == 'xmlcharrefreplace': |
---|
166 | # We are using xmlcharrefreplace with a Python |
---|
167 | # version that doesn't support it (2.1 or 2.2), so |
---|
168 | # we emulate its behavior. |
---|
169 | return ''.join([self.xmlcharref_encode(char) |
---|
170 | for char in data]) |
---|
171 | else: |
---|
172 | raise |
---|
173 | |
---|
174 | def xmlcharref_encode(self, char): |
---|
175 | """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler.""" |
---|
176 | try: |
---|
177 | return char.encode(self.encoding, 'strict') |
---|
178 | except UnicodeError: |
---|
179 | return '&#%i;' % ord(char) |
---|
180 | |
---|
181 | |
---|
182 | class FileInput(Input): |
---|
183 | |
---|
184 | """ |
---|
185 | Input for single, simple file-like objects. |
---|
186 | """ |
---|
187 | |
---|
188 | def __init__(self, source=None, source_path=None, |
---|
189 | encoding=None, error_handler='strict', |
---|
190 | autoclose=1, handle_io_errors=1): |
---|
191 | """ |
---|
192 | :Parameters: |
---|
193 | - `source`: either a file-like object (which is read directly), or |
---|
194 | `None` (which implies `sys.stdin` if no `source_path` given). |
---|
195 | - `source_path`: a path to a file, which is opened and then read. |
---|
196 | - `encoding`: the expected text encoding of the input file. |
---|
197 | - `error_handler`: the encoding error handler to use. |
---|
198 | - `autoclose`: close automatically after read (boolean); always |
---|
199 | false if `sys.stdin` is the source. |
---|
200 | - `handle_io_errors`: summarize I/O errors here, and exit? |
---|
201 | """ |
---|
202 | Input.__init__(self, source, source_path, encoding, error_handler) |
---|
203 | self.autoclose = autoclose |
---|
204 | self.handle_io_errors = handle_io_errors |
---|
205 | if source is None: |
---|
206 | if source_path: |
---|
207 | try: |
---|
208 | self.source = open(source_path) |
---|
209 | except IOError, error: |
---|
210 | if not handle_io_errors: |
---|
211 | raise |
---|
212 | print >>sys.stderr, '%s: %s' % (error.__class__.__name__, |
---|
213 | error) |
---|
214 | print >>sys.stderr, ( |
---|
215 | 'Unable to open source file for reading (%r). Exiting.' |
---|
216 | % source_path) |
---|
217 | sys.exit(1) |
---|
218 | else: |
---|
219 | self.source = sys.stdin |
---|
220 | self.autoclose = None |
---|
221 | if not source_path: |
---|
222 | try: |
---|
223 | self.source_path = self.source.name |
---|
224 | except AttributeError: |
---|
225 | pass |
---|
226 | |
---|
227 | def read(self): |
---|
228 | """ |
---|
229 | Read and decode a single file and return the data (Unicode string). |
---|
230 | """ |
---|
231 | try: |
---|
232 | data = self.source.read() |
---|
233 | finally: |
---|
234 | if self.autoclose: |
---|
235 | self.close() |
---|
236 | return self.decode(data) |
---|
237 | |
---|
238 | def close(self): |
---|
239 | self.source.close() |
---|
240 | |
---|
241 | |
---|
242 | class FileOutput(Output): |
---|
243 | |
---|
244 | """ |
---|
245 | Output for single, simple file-like objects. |
---|
246 | """ |
---|
247 | |
---|
248 | def __init__(self, destination=None, destination_path=None, |
---|
249 | encoding=None, error_handler='strict', autoclose=1, |
---|
250 | handle_io_errors=1): |
---|
251 | """ |
---|
252 | :Parameters: |
---|
253 | - `destination`: either a file-like object (which is written |
---|
254 | directly) or `None` (which implies `sys.stdout` if no |
---|
255 | `destination_path` given). |
---|
256 | - `destination_path`: a path to a file, which is opened and then |
---|
257 | written. |
---|
258 | - `autoclose`: close automatically after write (boolean); always |
---|
259 | false if `sys.stdout` is the destination. |
---|
260 | """ |
---|
261 | Output.__init__(self, destination, destination_path, |
---|
262 | encoding, error_handler) |
---|
263 | self.opened = 1 |
---|
264 | self.autoclose = autoclose |
---|
265 | self.handle_io_errors = handle_io_errors |
---|
266 | if destination is None: |
---|
267 | if destination_path: |
---|
268 | self.opened = None |
---|
269 | else: |
---|
270 | self.destination = sys.stdout |
---|
271 | self.autoclose = None |
---|
272 | if not destination_path: |
---|
273 | try: |
---|
274 | self.destination_path = self.destination.name |
---|
275 | except AttributeError: |
---|
276 | pass |
---|
277 | |
---|
278 | def open(self): |
---|
279 | try: |
---|
280 | self.destination = open(self.destination_path, 'w') |
---|
281 | except IOError, error: |
---|
282 | if not self.handle_io_errors: |
---|
283 | raise |
---|
284 | print >>sys.stderr, '%s: %s' % (error.__class__.__name__, |
---|
285 | error) |
---|
286 | print >>sys.stderr, ('Unable to open destination file for writing ' |
---|
287 | '(%r). Exiting.' % self.destination_path) |
---|
288 | sys.exit(1) |
---|
289 | self.opened = 1 |
---|
290 | |
---|
291 | def write(self, data): |
---|
292 | """Encode `data`, write it to a single file, and return it.""" |
---|
293 | output = self.encode(data) |
---|
294 | if not self.opened: |
---|
295 | self.open() |
---|
296 | try: |
---|
297 | self.destination.write(output) |
---|
298 | finally: |
---|
299 | if self.autoclose: |
---|
300 | self.close() |
---|
301 | return output |
---|
302 | |
---|
303 | def close(self): |
---|
304 | self.destination.close() |
---|
305 | self.opened = None |
---|
306 | |
---|
307 | |
---|
308 | class StringInput(Input): |
---|
309 | |
---|
310 | """ |
---|
311 | Direct string input. |
---|
312 | """ |
---|
313 | |
---|
314 | default_source_path = '<string>' |
---|
315 | |
---|
316 | def read(self): |
---|
317 | """Decode and return the source string.""" |
---|
318 | return self.decode(self.source) |
---|
319 | |
---|
320 | |
---|
321 | class StringOutput(Output): |
---|
322 | |
---|
323 | """ |
---|
324 | Direct string output. |
---|
325 | """ |
---|
326 | |
---|
327 | default_destination_path = '<string>' |
---|
328 | |
---|
329 | def write(self, data): |
---|
330 | """Encode `data`, store it in `self.destination`, and return it.""" |
---|
331 | self.destination = self.encode(data) |
---|
332 | return self.destination |
---|
333 | |
---|
334 | |
---|
335 | class NullInput(Input): |
---|
336 | |
---|
337 | """ |
---|
338 | Degenerate input: read nothing. |
---|
339 | """ |
---|
340 | |
---|
341 | default_source_path = 'null input' |
---|
342 | |
---|
343 | def read(self): |
---|
344 | """Return a null string.""" |
---|
345 | return u'' |
---|
346 | |
---|
347 | |
---|
348 | class NullOutput(Output): |
---|
349 | |
---|
350 | """ |
---|
351 | Degenerate output: write nothing. |
---|
352 | """ |
---|
353 | |
---|
354 | default_destination_path = 'null output' |
---|
355 | |
---|
356 | def write(self, data): |
---|
357 | """Do nothing ([don't even] send data to the bit bucket).""" |
---|
358 | pass |
---|
359 | |
---|
360 | |
---|
361 | class DocTreeInput(Input): |
---|
362 | |
---|
363 | """ |
---|
364 | Adapter for document tree input. |
---|
365 | |
---|
366 | The document tree must be passed in the ``source`` parameter. |
---|
367 | """ |
---|
368 | |
---|
369 | default_source_path = 'doctree input' |
---|
370 | |
---|
371 | def read(self): |
---|
372 | """Return the document tree.""" |
---|
373 | return self.source |
---|