Context Navigation

_headersutil.py @ 3

リビジョン 3, 7.9 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""Utility functions for HTTP header value parsing and construction.
2
3	Copyright 1997-1998, Gisle Aas
4	Copyright 2002-2006, John J. Lee
5
6	This code is free software; you can redistribute it and/or modify it
7	under the terms of the BSD or ZPL 2.1 licenses (see the file
8	COPYING.txt included with the distribution).
9
10	"""
11
12	import os, re
13	from types import StringType
14	from types import UnicodeType
15	STRING_TYPES = StringType, UnicodeType
16
17	from _util import http2time
18	import _rfc3986
19
20	def is_html(ct_headers, url, allow_xhtml=False):
21	"""
22	ct_headers: Sequence of Content-Type headers
23	url: Response URL
24
25	"""
26	if not ct_headers:
27	# guess
28	ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
29	html_exts = [".htm", ".html"]
30	if allow_xhtml:
31	html_exts += [".xhtml"]
32	return ext in html_exts
33	# use first header
34	ct = split_header_words(ct_headers)[0][0][0]
35	html_types = ["text/html"]
36	if allow_xhtml:
37	html_types += [
38	"text/xhtml", "text/xml",
39	"application/xml", "application/xhtml+xml",
40	]
41	return ct in html_types
42
43	def unmatched(match):
44	"""Return unmatched part of re.Match object."""
45	start, end = match.span(0)
46	return match.string[:start]+match.string[end:]
47
48	token_re = re.compile(r"^\s*([^=\s;,]+)")
49	quoted_value_re = re.compile(r"^\s=\s\"([^\"\\](?:\\.[^\"\\])*)\"")
50	value_re = re.compile(r"^\s=\s([^\s;,]*)")
51	escape_re = re.compile(r"\\(.)")
52	def split_header_words(header_values):
53	r"""Parse header values into a list of lists containing key,value pairs.
54
55	The function knows how to deal with ",", ";" and "=" as well as quoted
56	values after "=". A list of space separated tokens are parsed as if they
57	were separated by ";".
58
59	If the header_values passed as argument contains multiple values, then they
60	are treated as if they were a single value separated by comma ",".
61
62	This means that this function is useful for parsing header fields that
63	follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
64	the requirement for tokens).
65
66	headers = #header
67	header = (token \| parameter) *( [";"] (token \| parameter))
68
69	token = 1*<any CHAR except CTLs or separators>
70	separators = "(" \| ")" \| "<" \| ">" \| "@"
71	\| "," \| ";" \| ":" \| "\" \| <">
72	\| "/" \| "[" \| "]" \| "?" \| "="
73	\| "{" \| "}" \| SP \| HT
74
75	quoted-string = ( <"> *(qdtext \| quoted-pair ) <"> )
76	qdtext = <any TEXT except <">>
77	quoted-pair = "\" CHAR
78
79	parameter = attribute "=" value
80	attribute = token
81	value = token \| quoted-string
82
83	Each header is represented by a list of key/value pairs. The value for a
84	simple token (not part of a parameter) is None. Syntactically incorrect
85	headers will not necessarily be parsed as you would want.
86
87	This is easier to describe with some examples:
88
89	>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
90	[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
91	>>> split_header_words(['text/html; charset="iso-8859-1"'])
92	[[('text/html', None), ('charset', 'iso-8859-1')]]
93	>>> split_header_words([r'Basic realm="\"foo\bar\""'])
94	[[('Basic', None), ('realm', '"foobar"')]]
95
96	"""
97	assert type(header_values) not in STRING_TYPES
98	result = []
99	for text in header_values:
100	orig_text = text
101	pairs = []
102	while text:
103	m = token_re.search(text)
104	if m:
105	text = unmatched(m)
106	name = m.group(1)
107	m = quoted_value_re.search(text)
108	if m: # quoted value
109	text = unmatched(m)
110	value = m.group(1)
111	value = escape_re.sub(r"\1", value)
112	else:
113	m = value_re.search(text)
114	if m: # unquoted value
115	text = unmatched(m)
116	value = m.group(1)
117	value = value.rstrip()
118	else:
119	# no value, a lone token
120	value = None
121	pairs.append((name, value))
122	elif text.lstrip().startswith(","):
123	# concatenated headers, as per RFC 2616 section 4.2
124	text = text.lstrip()[1:]
125	if pairs: result.append(pairs)
126	pairs = []
127	else:
128	# skip junk
129	non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
130	assert nr_junk_chars > 0, (
131	"split_header_words bug: '%s', '%s', %s" %
132	(orig_text, text, pairs))
133	text = non_junk
134	if pairs: result.append(pairs)
135	return result
136
137	join_escape_re = re.compile(r"([\"\\])")
138	def join_header_words(lists):
139	"""Do the inverse of the conversion done by split_header_words.
140
141	Takes a list of lists of (key, value) pairs and produces a single header
142	value. Attribute values are quoted if needed.
143
144	>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
145	'text/plain; charset="iso-8859/1"'
146	>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
147	'text/plain, charset="iso-8859/1"'
148
149	"""
150	headers = []
151	for pairs in lists:
152	attr = []
153	for k, v in pairs:
154	if v is not None:
155	if not re.search(r"^\w+$", v):
156	v = join_escape_re.sub(r"\\\1", v) # escape " and \
157	v = '"%s"' % v
158	if k is None: # Netscape cookies may have no name
159	k = v
160	else:
161	k = "%s=%s" % (k, v)
162	attr.append(k)
163	if attr: headers.append("; ".join(attr))
164	return ", ".join(headers)
165
166	def parse_ns_headers(ns_headers):
167	"""Ad-hoc parser for Netscape protocol cookie-attributes.
168
169	The old Netscape cookie format for Set-Cookie can for instance contain
170	an unquoted "," in the expires field, so we have to use this ad-hoc
171	parser instead of split_header_words.
172
173	XXX This may not make the best possible effort to parse all the crap
174	that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
175	parser is probably better, so could do worse than following that if
176	this ever gives any trouble.
177
178	Currently, this is also used for parsing RFC 2109 cookies.
179
180	"""
181	known_attrs = ("expires", "domain", "path", "secure",
182	# RFC 2109 attrs (may turn up in Netscape cookies, too)
183	"port", "max-age")
184
185	result = []
186	for ns_header in ns_headers:
187	pairs = []
188	version_set = False
189	params = re.split(r";\s*", ns_header)
190	for ii in range(len(params)):
191	param = params[ii]
192	param = param.rstrip()
193	if param == "": continue
194	if "=" not in param:
195	k, v = param, None
196	else:
197	k, v = re.split(r"\s=\s", param, 1)
198	k = k.lstrip()
199	if ii != 0:
200	lc = k.lower()
201	if lc in known_attrs:
202	k = lc
203	if k == "version":
204	# This is an RFC 2109 cookie.
205	version_set = True
206	if k == "expires":
207	# convert expires date to seconds since epoch
208	if v.startswith('"'): v = v[1:]
209	if v.endswith('"'): v = v[:-1]
210	v = http2time(v) # None if invalid
211	pairs.append((k, v))
212
213	if pairs:
214	if not version_set:
215	pairs.append(("version", "0"))
216	result.append(pairs)
217
218	return result
219
220
221	def _test():
222	import doctest, _headersutil
223	return doctest.testmod(_headersutil)
224
225	if __name__ == "__main__":
226	_test()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_headersutil.py @ 3

異なるフォーマットでダウンロード: