root/galaxy-central/eggs/Mako-0.2.5-py2.6.egg/mako/filters.py

リビジョン 3, 5.3 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1# filters.py
2# Copyright (C) 2006, 2007, 2008, 2009 Geoffrey T. Dairiki <dairiki@dairiki.org> and Michael Bayer <mike_mp@zzzcomputing.com>
3#
4# This module is part of Mako and is released under
5# the MIT License: http://www.opensource.org/licenses/mit-license.php
6
7
8import re, cgi, urllib, htmlentitydefs, codecs
9from StringIO import StringIO
10
11xml_escapes = {
12    '&' : '&amp;',
13    '>' : '&gt;',
14    '<' : '&lt;',
15    '"' : '&#34;',   # also &quot; in html-only
16    "'" : '&#39;'    # also &apos; in html-only   
17}
18# XXX: &quot; is valid in HTML and XML
19#      &apos; is not valid HTML, but is valid XML
20
21def html_escape(string):
22    return cgi.escape(string, True)
23
24def xml_escape(string):
25    return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)
26
27def url_escape(string):
28    # convert into a list of octets
29    string = string.encode("utf8")
30    return urllib.quote_plus(string)
31
32def url_unescape(string):
33    text = urllib.unquote_plus(string)
34    if not is_ascii_str(text):
35        text = text.decode("utf8")
36    return text
37
38def trim(string):
39    return string.strip()
40
41
42class Decode(object):
43    def __getattr__(self, key):
44        def decode(x):
45            if isinstance(x, unicode):
46                return x
47            elif not isinstance(x, str):
48                return unicode(str(x), encoding=key)
49            else:
50                return unicode(x, encoding=key)
51        return decode
52decode = Decode()
53       
54           
55_ASCII_re = re.compile(r'\A[\x00-\x7f]*\Z')
56
57def is_ascii_str(text):
58    return isinstance(text, str) and _ASCII_re.match(text)
59
60################################################################   
61
62class XMLEntityEscaper(object):
63    def __init__(self, codepoint2name, name2codepoint):
64        self.codepoint2entity = dict([(c, u'&%s;' % n)
65                                      for c,n in codepoint2name.iteritems()])
66        self.name2codepoint = name2codepoint
67
68    def escape_entities(self, text):
69        """Replace characters with their character entity references.
70
71        Only characters corresponding to a named entity are replaced.
72        """
73        return unicode(text).translate(self.codepoint2entity)
74
75    def __escape(self, m):
76        codepoint = ord(m.group())
77        try:
78            return self.codepoint2entity[codepoint]
79        except (KeyError, IndexError):
80            return '&#x%X;' % codepoint
81
82
83    __escapable = re.compile(r'["&<>]|[^\x00-\x7f]')
84
85    def escape(self, text):
86        """Replace characters with their character references.
87
88        Replace characters by their named entity references.
89        Non-ASCII characters, if they do not have a named entity reference,
90        are replaced by numerical character references.
91
92        The return value is guaranteed to be ASCII.
93        """
94        return self.__escapable.sub(self.__escape, unicode(text)
95                                    ).encode('ascii')
96
97    # XXX: This regexp will not match all valid XML entity names__.
98    # (It punts on details involving involving CombiningChars and Extenders.)
99    #
100    # .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef
101    __characterrefs = re.compile(r'''& (?:
102                                          \#(\d+)
103                                          | \#x([\da-f]+)
104                                          | ( (?!\d) [:\w] [-.:\w]+ )
105                                          ) ;''',
106                                 re.X | re.UNICODE)
107   
108    def __unescape(self, m):
109        dval, hval, name = m.groups()
110        if dval:
111            codepoint = int(dval)
112        elif hval:
113            codepoint = int(hval, 16)
114        else:
115            codepoint = self.name2codepoint.get(name, 0xfffd)
116            # U+FFFD = "REPLACEMENT CHARACTER"
117        if codepoint < 128:
118            return chr(codepoint)
119        return unichr(codepoint)
120   
121    def unescape(self, text):
122        """Unescape character references.
123
124        All character references (both entity references and numerical
125        character references) are unescaped.
126        """
127        return self.__characterrefs.sub(self.__unescape, text)
128
129
130_html_entities_escaper = XMLEntityEscaper(htmlentitydefs.codepoint2name,
131                                          htmlentitydefs.name2codepoint)
132
133html_entities_escape = _html_entities_escaper.escape_entities
134html_entities_unescape = _html_entities_escaper.unescape
135
136
137def htmlentityreplace_errors(ex):
138    """An encoding error handler.
139
140    This python `codecs`_ error handler replaces unencodable
141    characters with HTML entities, or, if no HTML entity exists for
142    the character, XML character references.
143
144    >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
145    'The cost was &euro;12.'
146    """
147    if isinstance(ex, UnicodeEncodeError):
148        # Handle encoding errors
149        bad_text = ex.object[ex.start:ex.end]
150        text = _html_entities_escaper.escape(bad_text)
151        return (unicode(text), ex.end)
152    raise ex
153
154codecs.register_error('htmlentityreplace', htmlentityreplace_errors)
155
156
157# TODO: options to make this dynamic per-compilation will be added in a later release
158DEFAULT_ESCAPES = {
159    'x':'filters.xml_escape',
160    'h':'filters.html_escape',
161    'u':'filters.url_escape',
162    'trim':'filters.trim',
163    'entity':'filters.html_entities_escape',
164    'unicode':'unicode',
165    'decode':'decode',
166    'str':'str',
167    'n':'n'
168}
169   
170
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。