root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_headersutil.py @ 3

リビジョン 3, 7.9 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""Utility functions for HTTP header value parsing and construction.
2
3Copyright 1997-1998, Gisle Aas
4Copyright 2002-2006, John J. Lee
5
6This code is free software; you can redistribute it and/or modify it
7under the terms of the BSD or ZPL 2.1 licenses (see the file
8COPYING.txt included with the distribution).
9
10"""
11
12import os, re
13from types import StringType
14from types import UnicodeType
15STRING_TYPES = StringType, UnicodeType
16
17from _util import http2time
18import _rfc3986
19
20def is_html(ct_headers, url, allow_xhtml=False):
21    """
22    ct_headers: Sequence of Content-Type headers
23    url: Response URL
24
25    """
26    if not ct_headers:
27        # guess
28        ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
29        html_exts = [".htm", ".html"]
30        if allow_xhtml:
31            html_exts += [".xhtml"]
32        return ext in html_exts
33    # use first header
34    ct = split_header_words(ct_headers)[0][0][0]
35    html_types = ["text/html"]
36    if allow_xhtml:
37        html_types += [
38            "text/xhtml", "text/xml",
39            "application/xml", "application/xhtml+xml",
40            ]
41    return ct in html_types
42
43def unmatched(match):
44    """Return unmatched part of re.Match object."""
45    start, end = match.span(0)
46    return match.string[:start]+match.string[end:]
47
48token_re =        re.compile(r"^\s*([^=\s;,]+)")
49quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
50value_re =        re.compile(r"^\s*=\s*([^\s;,]*)")
51escape_re = re.compile(r"\\(.)")
52def split_header_words(header_values):
53    r"""Parse header values into a list of lists containing key,value pairs.
54
55    The function knows how to deal with ",", ";" and "=" as well as quoted
56    values after "=".  A list of space separated tokens are parsed as if they
57    were separated by ";".
58
59    If the header_values passed as argument contains multiple values, then they
60    are treated as if they were a single value separated by comma ",".
61
62    This means that this function is useful for parsing header fields that
63    follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
64    the requirement for tokens).
65
66      headers           = #header
67      header            = (token | parameter) *( [";"] (token | parameter))
68
69      token             = 1*<any CHAR except CTLs or separators>
70      separators        = "(" | ")" | "<" | ">" | "@"
71                        | "," | ";" | ":" | "\" | <">
72                        | "/" | "[" | "]" | "?" | "="
73                        | "{" | "}" | SP | HT
74
75      quoted-string     = ( <"> *(qdtext | quoted-pair ) <"> )
76      qdtext            = <any TEXT except <">>
77      quoted-pair       = "\" CHAR
78
79      parameter         = attribute "=" value
80      attribute         = token
81      value             = token | quoted-string
82
83    Each header is represented by a list of key/value pairs.  The value for a
84    simple token (not part of a parameter) is None.  Syntactically incorrect
85    headers will not necessarily be parsed as you would want.
86
87    This is easier to describe with some examples:
88
89    >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
90    [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
91    >>> split_header_words(['text/html; charset="iso-8859-1"'])
92    [[('text/html', None), ('charset', 'iso-8859-1')]]
93    >>> split_header_words([r'Basic realm="\"foo\bar\""'])
94    [[('Basic', None), ('realm', '"foobar"')]]
95
96    """
97    assert type(header_values) not in STRING_TYPES
98    result = []
99    for text in header_values:
100        orig_text = text
101        pairs = []
102        while text:
103            m = token_re.search(text)
104            if m:
105                text = unmatched(m)
106                name = m.group(1)
107                m = quoted_value_re.search(text)
108                if m:  # quoted value
109                    text = unmatched(m)
110                    value = m.group(1)
111                    value = escape_re.sub(r"\1", value)
112                else:
113                    m = value_re.search(text)
114                    if m:  # unquoted value
115                        text = unmatched(m)
116                        value = m.group(1)
117                        value = value.rstrip()
118                    else:
119                        # no value, a lone token
120                        value = None
121                pairs.append((name, value))
122            elif text.lstrip().startswith(","):
123                # concatenated headers, as per RFC 2616 section 4.2
124                text = text.lstrip()[1:]
125                if pairs: result.append(pairs)
126                pairs = []
127            else:
128                # skip junk
129                non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
130                assert nr_junk_chars > 0, (
131                    "split_header_words bug: '%s', '%s', %s" %
132                    (orig_text, text, pairs))
133                text = non_junk
134        if pairs: result.append(pairs)
135    return result
136
137join_escape_re = re.compile(r"([\"\\])")
138def join_header_words(lists):
139    """Do the inverse of the conversion done by split_header_words.
140
141    Takes a list of lists of (key, value) pairs and produces a single header
142    value.  Attribute values are quoted if needed.
143
144    >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
145    'text/plain; charset="iso-8859/1"'
146    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
147    'text/plain, charset="iso-8859/1"'
148
149    """
150    headers = []
151    for pairs in lists:
152        attr = []
153        for k, v in pairs:
154            if v is not None:
155                if not re.search(r"^\w+$", v):
156                    v = join_escape_re.sub(r"\\\1", v)  # escape " and \
157                    v = '"%s"' % v
158                if k is None:  # Netscape cookies may have no name
159                    k = v
160                else:
161                    k = "%s=%s" % (k, v)
162            attr.append(k)
163        if attr: headers.append("; ".join(attr))
164    return ", ".join(headers)
165
166def parse_ns_headers(ns_headers):
167    """Ad-hoc parser for Netscape protocol cookie-attributes.
168
169    The old Netscape cookie format for Set-Cookie can for instance contain
170    an unquoted "," in the expires field, so we have to use this ad-hoc
171    parser instead of split_header_words.
172
173    XXX This may not make the best possible effort to parse all the crap
174    that Netscape Cookie headers contain.  Ronald Tschalar's HTTPClient
175    parser is probably better, so could do worse than following that if
176    this ever gives any trouble.
177
178    Currently, this is also used for parsing RFC 2109 cookies.
179
180    """
181    known_attrs = ("expires", "domain", "path", "secure",
182                   # RFC 2109 attrs (may turn up in Netscape cookies, too)
183                   "port", "max-age")
184
185    result = []
186    for ns_header in ns_headers:
187        pairs = []
188        version_set = False
189        params = re.split(r";\s*", ns_header)
190        for ii in range(len(params)):
191            param = params[ii]
192            param = param.rstrip()
193            if param == "": continue
194            if "=" not in param:
195                k, v = param, None
196            else:
197                k, v = re.split(r"\s*=\s*", param, 1)
198                k = k.lstrip()
199            if ii != 0:
200                lc = k.lower()
201                if lc in known_attrs:
202                    k = lc
203                if k == "version":
204                    # This is an RFC 2109 cookie.
205                    version_set = True
206                if k == "expires":
207                    # convert expires date to seconds since epoch
208                    if v.startswith('"'): v = v[1:]
209                    if v.endswith('"'): v = v[:-1]
210                    v = http2time(v)  # None if invalid
211            pairs.append((k, v))
212
213        if pairs:
214            if not version_set:
215                pairs.append(("version", "0"))
216            result.append(pairs)
217
218    return result
219
220
221def _test():
222   import doctest, _headersutil
223   return doctest.testmod(_headersutil)
224
225if __name__ == "__main__":
226   _test()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。