| 1 | ''' |
|---|
| 2 | Filters for the #filter directive as well as #transform |
|---|
| 3 | |
|---|
| 4 | #filter results in output filters Cheetah's $placeholders . |
|---|
| 5 | #transform results in a filter on the entirety of the output |
|---|
| 6 | ''' |
|---|
| 7 | import sys |
|---|
| 8 | |
|---|
| 9 | # Additional entities WebSafe knows how to transform. No need to include |
|---|
| 10 | # '<', '>' or '&' since those will have been done already. |
|---|
| 11 | webSafeEntities = {' ': ' ', '"': '"'} |
|---|
| 12 | |
|---|
| 13 | class Filter(object): |
|---|
| 14 | """A baseclass for the Cheetah Filters.""" |
|---|
| 15 | |
|---|
| 16 | def __init__(self, template=None): |
|---|
| 17 | """Setup a reference to the template that is using the filter instance. |
|---|
| 18 | This reference isn't used by any of the standard filters, but is |
|---|
| 19 | available to Filter subclasses, should they need it. |
|---|
| 20 | |
|---|
| 21 | Subclasses should call this method. |
|---|
| 22 | """ |
|---|
| 23 | self.template = template |
|---|
| 24 | |
|---|
| 25 | def filter(self, val, encoding=None, str=str, **kw): |
|---|
| 26 | ''' |
|---|
| 27 | Pass Unicode strings through unmolested, unless an encoding is specified. |
|---|
| 28 | ''' |
|---|
| 29 | if val is None: |
|---|
| 30 | return u'' |
|---|
| 31 | if isinstance(val, unicode): |
|---|
| 32 | if encoding: |
|---|
| 33 | return val.encode(encoding) |
|---|
| 34 | else: |
|---|
| 35 | return val |
|---|
| 36 | else: |
|---|
| 37 | try: |
|---|
| 38 | return str(val) |
|---|
| 39 | except UnicodeEncodeError: |
|---|
| 40 | return unicode(val) |
|---|
| 41 | return u'' |
|---|
| 42 | |
|---|
| 43 | RawOrEncodedUnicode = Filter |
|---|
| 44 | |
|---|
| 45 | class EncodeUnicode(Filter): |
|---|
| 46 | def filter(self, val, |
|---|
| 47 | encoding='utf8', |
|---|
| 48 | str=str, |
|---|
| 49 | **kw): |
|---|
| 50 | """Encode Unicode strings, by default in UTF-8. |
|---|
| 51 | |
|---|
| 52 | >>> import Cheetah.Template |
|---|
| 53 | >>> t = Cheetah.Template.Template(''' |
|---|
| 54 | ... $myvar |
|---|
| 55 | ... ${myvar, encoding='utf16'} |
|---|
| 56 | ... ''', searchList=[{'myvar': u'Asni\xe8res'}], |
|---|
| 57 | ... filter='EncodeUnicode') |
|---|
| 58 | >>> print t |
|---|
| 59 | """ |
|---|
| 60 | if isinstance(val, unicode): |
|---|
| 61 | return val.encode(encoding) |
|---|
| 62 | if val is None: |
|---|
| 63 | return '' |
|---|
| 64 | return str(val) |
|---|
| 65 | |
|---|
| 66 | |
|---|
| 67 | class Markdown(EncodeUnicode): |
|---|
| 68 | ''' |
|---|
| 69 | Markdown will change regular strings to Markdown |
|---|
| 70 | (http://daringfireball.net/projects/markdown/) |
|---|
| 71 | |
|---|
| 72 | Such that: |
|---|
| 73 | My Header |
|---|
| 74 | ========= |
|---|
| 75 | Becaomes: |
|---|
| 76 | <h1>My Header</h1> |
|---|
| 77 | |
|---|
| 78 | and so on. |
|---|
| 79 | |
|---|
| 80 | Markdown is meant to be used with the #transform |
|---|
| 81 | tag, as it's usefulness with #filter is marginal at |
|---|
| 82 | best |
|---|
| 83 | ''' |
|---|
| 84 | def filter(self, value, **kwargs): |
|---|
| 85 | # This is a bit of a hack to allow outright embedding of the markdown module |
|---|
| 86 | try: |
|---|
| 87 | import markdown |
|---|
| 88 | except ImportError: |
|---|
| 89 | print '>>> Exception raised importing the "markdown" module' |
|---|
| 90 | print '>>> Are you sure you have the ElementTree module installed?' |
|---|
| 91 | print ' http://effbot.org/downloads/#elementtree' |
|---|
| 92 | raise |
|---|
| 93 | |
|---|
| 94 | encoded = super(Markdown, self).filter(value, **kwargs) |
|---|
| 95 | return markdown.markdown(encoded) |
|---|
| 96 | |
|---|
| 97 | class CodeHighlighter(EncodeUnicode): |
|---|
| 98 | ''' |
|---|
| 99 | The CodeHighlighter filter depends on the "pygments" module which you can |
|---|
| 100 | download and install from: http://pygments.org |
|---|
| 101 | |
|---|
| 102 | What the CodeHighlighter assumes the string that it's receiving is source |
|---|
| 103 | code and uses pygments.lexers.guess_lexer() to try to guess which parser |
|---|
| 104 | to use when highlighting it. |
|---|
| 105 | |
|---|
| 106 | CodeHighlighter will return the HTML and CSS to render the code block, syntax |
|---|
| 107 | highlighted, in a browser |
|---|
| 108 | |
|---|
| 109 | NOTE: I had an issue installing pygments on Linux/amd64/Python 2.6 dealing with |
|---|
| 110 | importing of pygments.lexers, I was able to correct the failure by adding: |
|---|
| 111 | raise ImportError |
|---|
| 112 | to line 39 of pygments/plugin.py (since importing pkg_resources was causing issues) |
|---|
| 113 | ''' |
|---|
| 114 | def filter(self, source, **kwargs): |
|---|
| 115 | encoded = super(CodeHighlighter, self).filter(source, **kwargs) |
|---|
| 116 | try: |
|---|
| 117 | from pygments import highlight |
|---|
| 118 | from pygments import lexers |
|---|
| 119 | from pygments import formatters |
|---|
| 120 | except ImportError, ex: |
|---|
| 121 | print '<%s> - Failed to import pygments! (%s)' % (self.__class__.__name__, ex) |
|---|
| 122 | print '-- You may need to install it from: http://pygments.org' |
|---|
| 123 | return encoded |
|---|
| 124 | |
|---|
| 125 | lexer = None |
|---|
| 126 | try: |
|---|
| 127 | lexer = lexers.guess_lexer(source) |
|---|
| 128 | except lexers.ClassNotFound: |
|---|
| 129 | lexer = lexers.PythonLexer() |
|---|
| 130 | |
|---|
| 131 | formatter = formatters.HtmlFormatter(cssclass='code_highlighter') |
|---|
| 132 | encoded = highlight(encoded, lexer, formatter) |
|---|
| 133 | css = formatter.get_style_defs('.code_highlighter') |
|---|
| 134 | return '''<style type="text/css"><!-- |
|---|
| 135 | %(css)s |
|---|
| 136 | --></style>%(source)s''' % {'css' : css, 'source' : encoded} |
|---|
| 137 | |
|---|
| 138 | |
|---|
| 139 | |
|---|
| 140 | class MaxLen(Filter): |
|---|
| 141 | def filter(self, val, **kw): |
|---|
| 142 | """Replace None with '' and cut off at maxlen.""" |
|---|
| 143 | |
|---|
| 144 | output = super(MaxLen, self).filter(val, **kw) |
|---|
| 145 | if kw.has_key('maxlen') and len(output) > kw['maxlen']: |
|---|
| 146 | return output[:kw['maxlen']] |
|---|
| 147 | return output |
|---|
| 148 | |
|---|
| 149 | class WebSafe(Filter): |
|---|
| 150 | """Escape HTML entities in $placeholders. |
|---|
| 151 | """ |
|---|
| 152 | def filter(self, val, **kw): |
|---|
| 153 | s = super(WebSafe, self).filter(val, **kw) |
|---|
| 154 | # These substitutions are copied from cgi.escape(). |
|---|
| 155 | s = s.replace("&", "&") # Must be done first! |
|---|
| 156 | s = s.replace("<", "<") |
|---|
| 157 | s = s.replace(">", ">") |
|---|
| 158 | # Process the additional transformations if any. |
|---|
| 159 | if kw.has_key('also'): |
|---|
| 160 | also = kw['also'] |
|---|
| 161 | entities = webSafeEntities # Global variable. |
|---|
| 162 | for k in also: |
|---|
| 163 | if k in entities: |
|---|
| 164 | v = entities[k] |
|---|
| 165 | else: |
|---|
| 166 | v = "&#%s;" % ord(k) |
|---|
| 167 | s = s.replace(k, v) |
|---|
| 168 | return s |
|---|
| 169 | |
|---|
| 170 | |
|---|
| 171 | class Strip(Filter): |
|---|
| 172 | """Strip leading/trailing whitespace but preserve newlines. |
|---|
| 173 | |
|---|
| 174 | This filter goes through the value line by line, removing leading and |
|---|
| 175 | trailing whitespace on each line. It does not strip newlines, so every |
|---|
| 176 | input line corresponds to one output line, with its trailing newline intact. |
|---|
| 177 | |
|---|
| 178 | We do not use val.split('\n') because that would squeeze out consecutive |
|---|
| 179 | blank lines. Instead, we search for each newline individually. This |
|---|
| 180 | makes us unable to use the fast C .split method, but it makes the filter |
|---|
| 181 | much more widely useful. |
|---|
| 182 | |
|---|
| 183 | This filter is intended to be usable both with the #filter directive and |
|---|
| 184 | with the proposed #sed directive (which has not been ratified yet.) |
|---|
| 185 | """ |
|---|
| 186 | def filter(self, val, **kw): |
|---|
| 187 | s = super(Strip, self).filter(val, **kw) |
|---|
| 188 | result = [] |
|---|
| 189 | start = 0 # The current line will be s[start:end]. |
|---|
| 190 | while 1: # Loop through each line. |
|---|
| 191 | end = s.find('\n', start) # Find next newline. |
|---|
| 192 | if end == -1: # If no more newlines. |
|---|
| 193 | break |
|---|
| 194 | chunk = s[start:end].strip() |
|---|
| 195 | result.append(chunk) |
|---|
| 196 | result.append('\n') |
|---|
| 197 | start = end + 1 |
|---|
| 198 | # Write the unfinished portion after the last newline, if any. |
|---|
| 199 | chunk = s[start:].strip() |
|---|
| 200 | result.append(chunk) |
|---|
| 201 | return "".join(result) |
|---|
| 202 | |
|---|
| 203 | class StripSqueeze(Filter): |
|---|
| 204 | """Canonicalizes every chunk of whitespace to a single space. |
|---|
| 205 | |
|---|
| 206 | Strips leading/trailing whitespace. Removes all newlines, so multi-line |
|---|
| 207 | input is joined into one ling line with NO trailing newline. |
|---|
| 208 | """ |
|---|
| 209 | def filter(self, val, **kw): |
|---|
| 210 | s = super(StripSqueeze, self).filter(val, **kw) |
|---|
| 211 | s = s.split() |
|---|
| 212 | return " ".join(s) |
|---|
| 213 | |
|---|
| 214 | ################################################## |
|---|
| 215 | ## MAIN ROUTINE -- testing |
|---|
| 216 | |
|---|
| 217 | def test(): |
|---|
| 218 | s1 = "abc <=> &" |
|---|
| 219 | s2 = " asdf \n\t 1 2 3\n" |
|---|
| 220 | print "WebSafe INPUT:", `s1` |
|---|
| 221 | print " WebSafe:", `WebSafe().filter(s1)` |
|---|
| 222 | |
|---|
| 223 | print |
|---|
| 224 | print " Strip INPUT:", `s2` |
|---|
| 225 | print " Strip:", `Strip().filter(s2)` |
|---|
| 226 | print "StripSqueeze:", `StripSqueeze().filter(s2)` |
|---|
| 227 | |
|---|
| 228 | print "Unicode:", `EncodeUnicode().filter(u'aoeu12345\u1234')` |
|---|
| 229 | |
|---|
| 230 | if __name__ == "__main__": |
|---|
| 231 | test() |
|---|
| 232 | |
|---|
| 233 | # vim: shiftwidth=4 tabstop=4 expandtab |
|---|