| 1 | # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org) |
|---|
| 2 | # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php |
|---|
| 3 | """ |
|---|
| 4 | Middleware that tests the validity of all generated HTML using the |
|---|
| 5 | `WDG HTML Validator <http://www.htmlhelp.com/tools/validator/>`_ |
|---|
| 6 | """ |
|---|
| 7 | |
|---|
| 8 | from cStringIO import StringIO |
|---|
| 9 | try: |
|---|
| 10 | import subprocess |
|---|
| 11 | except ImportError: |
|---|
| 12 | from paste.util import subprocess24 as subprocess |
|---|
| 13 | from paste import wsgilib |
|---|
| 14 | import re |
|---|
| 15 | import cgi |
|---|
| 16 | |
|---|
| 17 | __all__ = ['WDGValidateMiddleware'] |
|---|
| 18 | |
|---|
| 19 | class WDGValidateMiddleware(object): |
|---|
| 20 | |
|---|
| 21 | """ |
|---|
| 22 | Middleware that checks HTML and appends messages about the validity of |
|---|
| 23 | the HTML. Uses: http://www.htmlhelp.com/tools/validator/ -- interacts |
|---|
| 24 | with the command line client. Use the configuration ``wdg_path`` to |
|---|
| 25 | override the path (default: looks for ``validate`` in $PATH). |
|---|
| 26 | |
|---|
| 27 | To install, in your web context's __init__.py:: |
|---|
| 28 | |
|---|
| 29 | def urlparser_wrap(environ, start_response, app): |
|---|
| 30 | return wdg_validate.WDGValidateMiddleware(app)( |
|---|
| 31 | environ, start_response) |
|---|
| 32 | |
|---|
| 33 | Or in your configuration:: |
|---|
| 34 | |
|---|
| 35 | middleware.append('paste.wdg_validate.WDGValidateMiddleware') |
|---|
| 36 | """ |
|---|
| 37 | |
|---|
| 38 | _end_body_regex = re.compile(r'</body>', re.I) |
|---|
| 39 | |
|---|
| 40 | def __init__(self, app, global_conf=None, wdg_path='validate'): |
|---|
| 41 | self.app = app |
|---|
| 42 | self.wdg_path = wdg_path |
|---|
| 43 | |
|---|
| 44 | def __call__(self, environ, start_response): |
|---|
| 45 | output = StringIO() |
|---|
| 46 | response = [] |
|---|
| 47 | |
|---|
| 48 | def writer_start_response(status, headers, exc_info=None): |
|---|
| 49 | response.extend((status, headers)) |
|---|
| 50 | start_response(status, headers, exc_info) |
|---|
| 51 | return output.write |
|---|
| 52 | |
|---|
| 53 | app_iter = self.app(environ, writer_start_response) |
|---|
| 54 | try: |
|---|
| 55 | for s in app_iter: |
|---|
| 56 | output.write(s) |
|---|
| 57 | finally: |
|---|
| 58 | if hasattr(app_iter, 'close'): |
|---|
| 59 | app_iter.close() |
|---|
| 60 | page = output.getvalue() |
|---|
| 61 | status, headers = response |
|---|
| 62 | v = wsgilib.header_value(headers, 'content-type') or '' |
|---|
| 63 | if (not v.startswith('text/html') |
|---|
| 64 | and not v.startswith('text/xhtml') |
|---|
| 65 | and not v.startswith('application/xhtml')): |
|---|
| 66 | # Can't validate |
|---|
| 67 | # @@: Should validate CSS too... but using what? |
|---|
| 68 | return [page] |
|---|
| 69 | ops = [] |
|---|
| 70 | if v.startswith('text/xhtml+xml'): |
|---|
| 71 | ops.append('--xml') |
|---|
| 72 | # @@: Should capture encoding too |
|---|
| 73 | html_errors = self.call_wdg_validate( |
|---|
| 74 | self.wdg_path, ops, page) |
|---|
| 75 | if not html_errors: |
|---|
| 76 | return [page] |
|---|
| 77 | return self.add_error(page, html_errors) |
|---|
| 78 | |
|---|
| 79 | def call_wdg_validate(self, wdg_path, ops, page): |
|---|
| 80 | if subprocess is None: |
|---|
| 81 | raise ValueError( |
|---|
| 82 | "This middleware requires the subprocess module from " |
|---|
| 83 | "Python 2.4") |
|---|
| 84 | proc = subprocess.Popen([wdg_path] + ops, |
|---|
| 85 | shell=False, |
|---|
| 86 | close_fds=True, |
|---|
| 87 | stdout=subprocess.PIPE, |
|---|
| 88 | stdin=subprocess.PIPE, |
|---|
| 89 | stderr=subprocess.STDOUT) |
|---|
| 90 | stdout = proc.communicate(page)[0] |
|---|
| 91 | proc.wait() |
|---|
| 92 | return stdout |
|---|
| 93 | |
|---|
| 94 | def add_error(self, html_page, html_errors): |
|---|
| 95 | add_text = ('<pre style="background-color: #ffd; color: #600; ' |
|---|
| 96 | 'border: 1px solid #000;">%s</pre>' |
|---|
| 97 | % cgi.escape(html_errors)) |
|---|
| 98 | match = self._end_body_regex.search(html_page) |
|---|
| 99 | if match: |
|---|
| 100 | return [html_page[:match.start()] |
|---|
| 101 | + add_text |
|---|
| 102 | + html_page[match.end():]] |
|---|
| 103 | else: |
|---|
| 104 | return [html_page + add_text] |
|---|
| 105 | |
|---|
| 106 | def make_wdg_validate_middleware( |
|---|
| 107 | app, global_conf, wdg_path='validate'): |
|---|
| 108 | """ |
|---|
| 109 | Wraps the application in the WDG validator from |
|---|
| 110 | http://www.htmlhelp.com/tools/validator/ |
|---|
| 111 | |
|---|
| 112 | Validation errors are appended to the text of each page. |
|---|
| 113 | You can configure this by giving the path to the validate |
|---|
| 114 | executable (by default picked up from $PATH) |
|---|
| 115 | """ |
|---|
| 116 | return WDGValidateMiddleware( |
|---|
| 117 | app, global_conf, wdg_path=wdg_path) |
|---|