1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
---|
2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
---|
3 | |
---|
4 | import cgi |
---|
5 | import htmlentitydefs |
---|
6 | import urllib |
---|
7 | import re |
---|
8 | |
---|
9 | __all__ = ['html_quote', 'html_unquote', 'url_quote', 'url_unquote', |
---|
10 | 'strip_html'] |
---|
11 | |
---|
12 | default_encoding = 'UTF-8' |
---|
13 | |
---|
14 | def html_quote(v, encoding=None): |
---|
15 | r""" |
---|
16 | Quote the value (turned to a string) as HTML. This quotes <, >, |
---|
17 | and quotes: |
---|
18 | |
---|
19 | >>> html_quote(1) |
---|
20 | '1' |
---|
21 | >>> html_quote(None) |
---|
22 | '' |
---|
23 | >>> html_quote('<hey!>') |
---|
24 | '<hey!>' |
---|
25 | >>> html_quote(u'\u1029') |
---|
26 | '\xe1\x80\xa9' |
---|
27 | """ |
---|
28 | encoding = encoding or default_encoding |
---|
29 | if v is None: |
---|
30 | return '' |
---|
31 | elif isinstance(v, str): |
---|
32 | return cgi.escape(v, 1) |
---|
33 | elif isinstance(v, unicode): |
---|
34 | return cgi.escape(v.encode(encoding), 1) |
---|
35 | else: |
---|
36 | return cgi.escape(unicode(v).encode(encoding), 1) |
---|
37 | |
---|
38 | _unquote_re = re.compile(r'&([a-zA-Z]+);') |
---|
39 | def _entity_subber(match, name2c=htmlentitydefs.name2codepoint): |
---|
40 | code = name2c.get(match.group(1)) |
---|
41 | if code: |
---|
42 | return unichr(code) |
---|
43 | else: |
---|
44 | return match.group(0) |
---|
45 | |
---|
46 | def html_unquote(s, encoding=None): |
---|
47 | r""" |
---|
48 | Decode the value. |
---|
49 | |
---|
50 | >>> html_unquote('<hey you>') |
---|
51 | u'<hey\xa0you>' |
---|
52 | >>> html_unquote('') |
---|
53 | '' |
---|
54 | >>> html_unquote('&blahblah;') |
---|
55 | u'&blahblah;' |
---|
56 | >>> html_unquote('\xe1\x80\xa9') |
---|
57 | u'\u1029' |
---|
58 | """ |
---|
59 | if isinstance(s, str): |
---|
60 | s = s.decode(encoding or default_encoding) |
---|
61 | return _unquote_re.sub(_entity_subber, s) |
---|
62 | |
---|
63 | def strip_html(s): |
---|
64 | # should this use html_unquote? |
---|
65 | s = re.sub('<.*?>', '', s) |
---|
66 | s = s.replace(' ', ' ').replace('<', '<') |
---|
67 | s = s.replace('>', '>').replace('&','&') |
---|
68 | return s |
---|
69 | |
---|
70 | def no_quote(s): |
---|
71 | """ |
---|
72 | Quoting that doesn't do anything |
---|
73 | """ |
---|
74 | return s |
---|
75 | |
---|
76 | url_quote = urllib.quote |
---|
77 | url_unquote = urllib.unquote |
---|
78 | |
---|
79 | if __name__ == '__main__': |
---|
80 | import doctest |
---|
81 | doctest.testmod() |
---|