| 1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
|---|
| 2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
|---|
| 3 | |
|---|
| 4 | import cgi |
|---|
| 5 | import htmlentitydefs |
|---|
| 6 | import urllib |
|---|
| 7 | import re |
|---|
| 8 | |
|---|
| 9 | __all__ = ['html_quote', 'html_unquote', 'url_quote', 'url_unquote', |
|---|
| 10 | 'strip_html'] |
|---|
| 11 | |
|---|
| 12 | default_encoding = 'UTF-8' |
|---|
| 13 | |
|---|
| 14 | def html_quote(v, encoding=None): |
|---|
| 15 | r""" |
|---|
| 16 | Quote the value (turned to a string) as HTML. This quotes <, >, |
|---|
| 17 | and quotes: |
|---|
| 18 | |
|---|
| 19 | >>> html_quote(1) |
|---|
| 20 | '1' |
|---|
| 21 | >>> html_quote(None) |
|---|
| 22 | '' |
|---|
| 23 | >>> html_quote('<hey!>') |
|---|
| 24 | '<hey!>' |
|---|
| 25 | >>> html_quote(u'\u1029') |
|---|
| 26 | '\xe1\x80\xa9' |
|---|
| 27 | """ |
|---|
| 28 | encoding = encoding or default_encoding |
|---|
| 29 | if v is None: |
|---|
| 30 | return '' |
|---|
| 31 | elif isinstance(v, str): |
|---|
| 32 | return cgi.escape(v, 1) |
|---|
| 33 | elif isinstance(v, unicode): |
|---|
| 34 | return cgi.escape(v.encode(encoding), 1) |
|---|
| 35 | else: |
|---|
| 36 | return cgi.escape(unicode(v).encode(encoding), 1) |
|---|
| 37 | |
|---|
| 38 | _unquote_re = re.compile(r'&([a-zA-Z]+);') |
|---|
| 39 | def _entity_subber(match, name2c=htmlentitydefs.name2codepoint): |
|---|
| 40 | code = name2c.get(match.group(1)) |
|---|
| 41 | if code: |
|---|
| 42 | return unichr(code) |
|---|
| 43 | else: |
|---|
| 44 | return match.group(0) |
|---|
| 45 | |
|---|
| 46 | def html_unquote(s, encoding=None): |
|---|
| 47 | r""" |
|---|
| 48 | Decode the value. |
|---|
| 49 | |
|---|
| 50 | >>> html_unquote('<hey you>') |
|---|
| 51 | u'<hey\xa0you>' |
|---|
| 52 | >>> html_unquote('') |
|---|
| 53 | '' |
|---|
| 54 | >>> html_unquote('&blahblah;') |
|---|
| 55 | u'&blahblah;' |
|---|
| 56 | >>> html_unquote('\xe1\x80\xa9') |
|---|
| 57 | u'\u1029' |
|---|
| 58 | """ |
|---|
| 59 | if isinstance(s, str): |
|---|
| 60 | s = s.decode(encoding or default_encoding) |
|---|
| 61 | return _unquote_re.sub(_entity_subber, s) |
|---|
| 62 | |
|---|
| 63 | def strip_html(s): |
|---|
| 64 | # should this use html_unquote? |
|---|
| 65 | s = re.sub('<.*?>', '', s) |
|---|
| 66 | s = s.replace(' ', ' ').replace('<', '<') |
|---|
| 67 | s = s.replace('>', '>').replace('&','&') |
|---|
| 68 | return s |
|---|
| 69 | |
|---|
| 70 | def no_quote(s): |
|---|
| 71 | """ |
|---|
| 72 | Quoting that doesn't do anything |
|---|
| 73 | """ |
|---|
| 74 | return s |
|---|
| 75 | |
|---|
| 76 | url_quote = urllib.quote |
|---|
| 77 | url_unquote = urllib.unquote |
|---|
| 78 | |
|---|
| 79 | if __name__ == '__main__': |
|---|
| 80 | import doctest |
|---|
| 81 | doctest.testmod() |
|---|