root/galaxy-central/eggs/Babel-0.9.4-py2.6.egg/babel/messages/pofile.py

リビジョン 3, 15.5 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2007 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://babel.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://babel.edgewall.org/log/.
13
14"""Reading and writing of files in the ``gettext`` PO (portable object)
15format.
16
17:see: `The Format of PO Files
18       <http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
19"""
20
21from datetime import date, datetime
22import os
23import re
24try:
25    set
26except NameError:
27    from sets import Set as set
28
29from babel import __version__ as VERSION
30from babel.messages.catalog import Catalog, Message
31from babel.util import wraptext, LOCALTZ
32
33__all__ = ['read_po', 'write_po']
34__docformat__ = 'restructuredtext en'
35
36def unescape(string):
37    r"""Reverse `escape` the given string.
38
39    >>> print unescape('"Say:\\n  \\"hello, world!\\"\\n"')
40    Say:
41      "hello, world!"
42    <BLANKLINE>
43
44    :param string: the string to unescape
45    :return: the unescaped string
46    :rtype: `str` or `unicode`
47    """
48    return string[1:-1].replace('\\\\', '\\') \
49                       .replace('\\t', '\t') \
50                       .replace('\\r', '\r') \
51                       .replace('\\n', '\n') \
52                       .replace('\\"', '\"')
53
54def denormalize(string):
55    r"""Reverse the normalization done by the `normalize` function.
56
57    >>> print denormalize(r'''""
58    ... "Say:\n"
59    ... "  \"hello, world!\"\n"''')
60    Say:
61      "hello, world!"
62    <BLANKLINE>
63
64    >>> print denormalize(r'''""
65    ... "Say:\n"
66    ... "  \"Lorem ipsum dolor sit "
67    ... "amet, consectetur adipisicing"
68    ... " elit, \"\n"''')
69    Say:
70      "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
71    <BLANKLINE>
72
73    :param string: the string to denormalize
74    :return: the denormalized string
75    :rtype: `unicode` or `str`
76    """
77    if string.startswith('""'):
78        lines = []
79        for line in string.splitlines()[1:]:
80            lines.append(unescape(line))
81        return ''.join(lines)
82    else:
83        return unescape(string)
84
85def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
86    """Read messages from a ``gettext`` PO (portable object) file from the given
87    file-like object and return a `Catalog`.
88
89    >>> from StringIO import StringIO
90    >>> buf = StringIO('''
91    ... #: main.py:1
92    ... #, fuzzy, python-format
93    ... msgid "foo %(name)s"
94    ... msgstr ""
95    ...
96    ... # A user comment
97    ... #. An auto comment
98    ... #: main.py:3
99    ... msgid "bar"
100    ... msgid_plural "baz"
101    ... msgstr[0] ""
102    ... msgstr[1] ""
103    ... ''')
104    >>> catalog = read_po(buf)
105    >>> catalog.revision_date = datetime(2007, 04, 01)
106
107    >>> for message in catalog:
108    ...     if message.id:
109    ...         print (message.id, message.string)
110    ...         print ' ', (message.locations, message.flags)
111    ...         print ' ', (message.user_comments, message.auto_comments)
112    (u'foo %(name)s', '')
113      ([(u'main.py', 1)], set([u'fuzzy', u'python-format']))
114      ([], [])
115    ((u'bar', u'baz'), ('', ''))
116      ([(u'main.py', 3)], set([]))
117      ([u'A user comment'], [u'An auto comment'])
118
119    :param fileobj: the file-like object to read the PO file from
120    :param locale: the locale identifier or `Locale` object, or `None`
121                   if the catalog is not bound to a locale (which basically
122                   means it's a template)
123    :param domain: the message domain
124    :param ignore_obsolete: whether to ignore obsolete messages in the input
125    :return: an iterator over ``(message, translation, location)`` tuples
126    :rtype: ``iterator``
127    """
128    catalog = Catalog(locale=locale, domain=domain)
129
130    counter = [0]
131    offset = [0]
132    messages = []
133    translations = []
134    locations = []
135    flags = []
136    user_comments = []
137    auto_comments = []
138    obsolete = [False]
139    in_msgid = [False]
140    in_msgstr = [False]
141
142    def _add_message():
143        translations.sort()
144        if len(messages) > 1:
145            msgid = tuple([denormalize(m) for m in messages])
146        else:
147            msgid = denormalize(messages[0])
148        if isinstance(msgid, (list, tuple)):
149            string = []
150            for idx in range(catalog.num_plurals):
151                try:
152                    string.append(translations[idx])
153                except IndexError:
154                    string.append((idx, ''))
155            string = tuple([denormalize(t[1]) for t in string])
156        else:
157            string = denormalize(translations[0][1])
158        message = Message(msgid, string, list(locations), set(flags),
159                          auto_comments, user_comments, lineno=offset[0] + 1)
160        if obsolete[0]:
161            if not ignore_obsolete:
162                catalog.obsolete[msgid] = message
163        else:
164            catalog[msgid] = message
165        del messages[:]; del translations[:]; del locations[:];
166        del flags[:]; del auto_comments[:]; del user_comments[:]
167        obsolete[0] = False
168        counter[0] += 1
169
170    def _process_message_line(lineno, line):
171        if line.startswith('msgid_plural'):
172            in_msgid[0] = True
173            msg = line[12:].lstrip()
174            messages.append(msg)
175        elif line.startswith('msgid'):
176            in_msgid[0] = True
177            offset[0] = lineno
178            txt = line[5:].lstrip()
179            if messages:
180                _add_message()
181            messages.append(txt)
182        elif line.startswith('msgstr'):
183            in_msgid[0] = False
184            in_msgstr[0] = True
185            msg = line[6:].lstrip()
186            if msg.startswith('['):
187                idx, msg = msg[1:].split(']')
188                translations.append([int(idx), msg.lstrip()])
189            else:
190                translations.append([0, msg])
191        elif line.startswith('"'):
192            if in_msgid[0]:
193                messages[-1] += u'\n' + line.rstrip()
194            elif in_msgstr[0]:
195                translations[-1][1] += u'\n' + line.rstrip()
196
197    for lineno, line in enumerate(fileobj.readlines()):
198        line = line.strip().decode(catalog.charset)
199        if line.startswith('#'):
200            in_msgid[0] = in_msgstr[0] = False
201            if messages and translations:
202                _add_message()
203            if line[1:].startswith(':'):
204                for location in line[2:].lstrip().split():
205                    pos = location.rfind(':')
206                    if pos >= 0:
207                        try:
208                            lineno = int(location[pos + 1:])
209                        except ValueError:
210                            continue
211                        locations.append((location[:pos], lineno))
212            elif line[1:].startswith(','):
213                for flag in line[2:].lstrip().split(','):
214                    flags.append(flag.strip())
215            elif line[1:].startswith('~'):
216                obsolete[0] = True
217                _process_message_line(lineno, line[2:].lstrip())
218            elif line[1:].startswith('.'):
219                # These are called auto-comments
220                comment = line[2:].strip()
221                if comment: # Just check that we're not adding empty comments
222                    auto_comments.append(comment)
223            else:
224                # These are called user comments
225                user_comments.append(line[1:].strip())
226        else:
227            _process_message_line(lineno, line)
228
229    if messages:
230        _add_message()
231
232    # No actual messages found, but there was some info in comments, from which
233    # we'll construct an empty header message
234    elif not counter[0] and (flags or user_comments or auto_comments):
235        messages.append(u'')
236        translations.append([0, u''])
237        _add_message()
238
239    return catalog
240
241WORD_SEP = re.compile('('
242    r'\s+|'                                 # any whitespace
243    r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
244    r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)'   # em-dash
245')')
246
247def escape(string):
248    r"""Escape the given string so that it can be included in double-quoted
249    strings in ``PO`` files.
250
251    >>> escape('''Say:
252    ...   "hello, world!"
253    ... ''')
254    '"Say:\\n  \\"hello, world!\\"\\n"'
255
256    :param string: the string to escape
257    :return: the escaped string
258    :rtype: `str` or `unicode`
259    """
260    return '"%s"' % string.replace('\\', '\\\\') \
261                          .replace('\t', '\\t') \
262                          .replace('\r', '\\r') \
263                          .replace('\n', '\\n') \
264                          .replace('\"', '\\"')
265
266def normalize(string, prefix='', width=76):
267    r"""Convert a string into a format that is appropriate for .po files.
268
269    >>> print normalize('''Say:
270    ...   "hello, world!"
271    ... ''', width=None)
272    ""
273    "Say:\n"
274    "  \"hello, world!\"\n"
275
276    >>> print normalize('''Say:
277    ...   "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
278    ... ''', width=32)
279    ""
280    "Say:\n"
281    "  \"Lorem ipsum dolor sit "
282    "amet, consectetur adipisicing"
283    " elit, \"\n"
284
285    :param string: the string to normalize
286    :param prefix: a string that should be prepended to every line
287    :param width: the maximum line width; use `None`, 0, or a negative number
288                  to completely disable line wrapping
289    :return: the normalized string
290    :rtype: `unicode`
291    """
292    if width and width > 0:
293        prefixlen = len(prefix)
294        lines = []
295        for idx, line in enumerate(string.splitlines(True)):
296            if len(escape(line)) + prefixlen > width:
297                chunks = WORD_SEP.split(line)
298                chunks.reverse()
299                while chunks:
300                    buf = []
301                    size = 2
302                    while chunks:
303                        l = len(escape(chunks[-1])) - 2 + prefixlen
304                        if size + l < width:
305                            buf.append(chunks.pop())
306                            size += l
307                        else:
308                            if not buf:
309                                # handle long chunks by putting them on a
310                                # separate line
311                                buf.append(chunks.pop())
312                            break
313                    lines.append(u''.join(buf))
314            else:
315                lines.append(line)
316    else:
317        lines = string.splitlines(True)
318
319    if len(lines) <= 1:
320        return escape(string)
321
322    # Remove empty trailing line
323    if lines and not lines[-1]:
324        del lines[-1]
325        lines[-1] += '\n'
326    return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
327
328def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
329             sort_output=False, sort_by_file=False, ignore_obsolete=False,
330             include_previous=False):
331    r"""Write a ``gettext`` PO (portable object) template file for a given
332    message catalog to the provided file-like object.
333
334    >>> catalog = Catalog()
335    >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
336    ...             flags=('fuzzy',))
337    >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
338    >>> from StringIO import StringIO
339    >>> buf = StringIO()
340    >>> write_po(buf, catalog, omit_header=True)
341    >>> print buf.getvalue()
342    #: main.py:1
343    #, fuzzy, python-format
344    msgid "foo %(name)s"
345    msgstr ""
346    <BLANKLINE>
347    #: main.py:3
348    msgid "bar"
349    msgid_plural "baz"
350    msgstr[0] ""
351    msgstr[1] ""
352    <BLANKLINE>
353    <BLANKLINE>
354
355    :param fileobj: the file-like object to write to
356    :param catalog: the `Catalog` instance
357    :param width: the maximum line width for the generated output; use `None`,
358                  0, or a negative number to completely disable line wrapping
359    :param no_location: do not emit a location comment for every message
360    :param omit_header: do not include the ``msgid ""`` entry at the top of the
361                        output
362    :param sort_output: whether to sort the messages in the output by msgid
363    :param sort_by_file: whether to sort the messages in the output by their
364                         locations
365    :param ignore_obsolete: whether to ignore obsolete messages and not include
366                            them in the output; by default they are included as
367                            comments
368    :param include_previous: include the old msgid as a comment when
369                             updating the catalog
370    """
371    def _normalize(key, prefix=''):
372        return normalize(key, prefix=prefix, width=width) \
373            .encode(catalog.charset, 'backslashreplace')
374
375    def _write(text):
376        if isinstance(text, unicode):
377            text = text.encode(catalog.charset)
378        fileobj.write(text)
379
380    def _write_comment(comment, prefix=''):
381        lines = comment
382        if width and width > 0:
383            lines = wraptext(comment, width)
384        for line in lines:
385            _write('#%s %s\n' % (prefix, line.strip()))
386
387    def _write_message(message, prefix=''):
388        if isinstance(message.id, (list, tuple)):
389            _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
390            _write('%smsgid_plural %s\n' % (
391                prefix, _normalize(message.id[1], prefix)
392            ))
393
394            for idx in range(catalog.num_plurals):
395                try:
396                    string = message.string[idx]
397                except IndexError:
398                    string = ''
399                _write('%smsgstr[%d] %s\n' % (
400                    prefix, idx, _normalize(string, prefix)
401                ))
402        else:
403            _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
404            _write('%smsgstr %s\n' % (
405                prefix, _normalize(message.string or '', prefix)
406            ))
407
408    messages = list(catalog)
409    if sort_output:
410        messages.sort()
411    elif sort_by_file:
412        messages.sort(lambda x,y: cmp(x.locations, y.locations))
413
414    for message in messages:
415        if not message.id: # This is the header "message"
416            if omit_header:
417                continue
418            comment_header = catalog.header_comment
419            if width and width > 0:
420                lines = []
421                for line in comment_header.splitlines():
422                    lines += wraptext(line, width=width,
423                                      subsequent_indent='# ')
424                comment_header = u'\n'.join(lines) + u'\n'
425            _write(comment_header)
426
427        for comment in message.user_comments:
428            _write_comment(comment)
429        for comment in message.auto_comments:
430            _write_comment(comment, prefix='.')
431
432        if not no_location:
433            locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno)
434                              for filename, lineno in message.locations])
435            _write_comment(locs, prefix=':')
436        if message.flags:
437            _write('#%s\n' % ', '.join([''] + list(message.flags)))
438
439        if message.previous_id and include_previous:
440            _write_comment('msgid %s' % _normalize(message.previous_id[0]),
441                           prefix='|')
442            if len(message.previous_id) > 1:
443                _write_comment('msgid_plural %s' % _normalize(
444                    message.previous_id[1]
445                ), prefix='|')
446
447        _write_message(message)
448        _write('\n')
449
450    if not ignore_obsolete:
451        for message in catalog.obsolete.values():
452            for comment in message.user_comments:
453                _write_comment(comment)
454            _write_message(message, prefix='#~ ')
455            _write('\n')
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。