Context Navigation

_headersutil.py @ 3

リビジョン 3, 7.9 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

Rev	行番号
[3]	1	"""Utility functions for HTTP header value parsing and construction.
	2
	3	Copyright 1997-1998, Gisle Aas
	4	Copyright 2002-2006, John J. Lee
	5
	6	This code is free software; you can redistribute it and/or modify it
	7	under the terms of the BSD or ZPL 2.1 licenses (see the file
	8	COPYING.txt included with the distribution).
	9
	10	"""
	11
	12	import os, re
	13	from types import StringType
	14	from types import UnicodeType
	15	STRING_TYPES = StringType, UnicodeType
	16
	17	from _util import http2time
	18	import _rfc3986
	19
	20	def is_html(ct_headers, url, allow_xhtml=False):
	21	"""
	22	ct_headers: Sequence of Content-Type headers
	23	url: Response URL
	24
	25	"""
	26	if not ct_headers:
	27	# guess
	28	ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
	29	html_exts = [".htm", ".html"]
	30	if allow_xhtml:
	31	html_exts += [".xhtml"]
	32	return ext in html_exts
	33	# use first header
	34	ct = split_header_words(ct_headers)[0][0][0]
	35	html_types = ["text/html"]
	36	if allow_xhtml:
	37	html_types += [
	38	"text/xhtml", "text/xml",
	39	"application/xml", "application/xhtml+xml",
	40	]
	41	return ct in html_types
	42
	43	def unmatched(match):
	44	"""Return unmatched part of re.Match object."""
	45	start, end = match.span(0)
	46	return match.string[:start]+match.string[end:]
	47
	48	token_re = re.compile(r"^\s*([^=\s;,]+)")
	49	quoted_value_re = re.compile(r"^\s=\s\"([^\"\\](?:\\.[^\"\\])*)\"")
	50	value_re = re.compile(r"^\s=\s([^\s;,]*)")
	51	escape_re = re.compile(r"\\(.)")
	52	def split_header_words(header_values):
	53	r"""Parse header values into a list of lists containing key,value pairs.
	54
	55	The function knows how to deal with ",", ";" and "=" as well as quoted
	56	values after "=". A list of space separated tokens are parsed as if they
	57	were separated by ";".
	58
	59	If the header_values passed as argument contains multiple values, then they
	60	are treated as if they were a single value separated by comma ",".
	61
	62	This means that this function is useful for parsing header fields that
	63	follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
	64	the requirement for tokens).
	65
	66	headers = #header
	67	header = (token \| parameter) *( [";"] (token \| parameter))
	68
	69	token = 1*<any CHAR except CTLs or separators>
	70	separators = "(" \| ")" \| "<" \| ">" \| "@"
	71	\| "," \| ";" \| ":" \| "\" \| <">
	72	\| "/" \| "[" \| "]" \| "?" \| "="
	73	\| "{" \| "}" \| SP \| HT
	74
	75	quoted-string = ( <"> *(qdtext \| quoted-pair ) <"> )
	76	qdtext = <any TEXT except <">>
	77	quoted-pair = "\" CHAR
	78
	79	parameter = attribute "=" value
	80	attribute = token
	81	value = token \| quoted-string
	82
	83	Each header is represented by a list of key/value pairs. The value for a
	84	simple token (not part of a parameter) is None. Syntactically incorrect
	85	headers will not necessarily be parsed as you would want.
	86
	87	This is easier to describe with some examples:
	88
	89	>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
	90	[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
	91	>>> split_header_words(['text/html; charset="iso-8859-1"'])
	92	[[('text/html', None), ('charset', 'iso-8859-1')]]
	93	>>> split_header_words([r'Basic realm="\"foo\bar\""'])
	94	[[('Basic', None), ('realm', '"foobar"')]]
	95
	96	"""
	97	assert type(header_values) not in STRING_TYPES
	98	result = []
	99	for text in header_values:
	100	orig_text = text
	101	pairs = []
	102	while text:
	103	m = token_re.search(text)
	104	if m:
	105	text = unmatched(m)
	106	name = m.group(1)
	107	m = quoted_value_re.search(text)
	108	if m: # quoted value
	109	text = unmatched(m)
	110	value = m.group(1)
	111	value = escape_re.sub(r"\1", value)
	112	else:
	113	m = value_re.search(text)
	114	if m: # unquoted value
	115	text = unmatched(m)
	116	value = m.group(1)
	117	value = value.rstrip()
	118	else:
	119	# no value, a lone token
	120	value = None
	121	pairs.append((name, value))
	122	elif text.lstrip().startswith(","):
	123	# concatenated headers, as per RFC 2616 section 4.2
	124	text = text.lstrip()[1:]
	125	if pairs: result.append(pairs)
	126	pairs = []
	127	else:
	128	# skip junk
	129	non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
	130	assert nr_junk_chars > 0, (
	131	"split_header_words bug: '%s', '%s', %s" %
	132	(orig_text, text, pairs))
	133	text = non_junk
	134	if pairs: result.append(pairs)
	135	return result
	136
	137	join_escape_re = re.compile(r"([\"\\])")
	138	def join_header_words(lists):
	139	"""Do the inverse of the conversion done by split_header_words.
	140
	141	Takes a list of lists of (key, value) pairs and produces a single header
	142	value. Attribute values are quoted if needed.
	143
	144	>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
	145	'text/plain; charset="iso-8859/1"'
	146	>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
	147	'text/plain, charset="iso-8859/1"'
	148
	149	"""
	150	headers = []
	151	for pairs in lists:
	152	attr = []
	153	for k, v in pairs:
	154	if v is not None:
	155	if not re.search(r"^\w+$", v):
	156	v = join_escape_re.sub(r"\\\1", v) # escape " and \
	157	v = '"%s"' % v
	158	if k is None: # Netscape cookies may have no name
	159	k = v
	160	else:
	161	k = "%s=%s" % (k, v)
	162	attr.append(k)
	163	if attr: headers.append("; ".join(attr))
	164	return ", ".join(headers)
	165
	166	def parse_ns_headers(ns_headers):
	167	"""Ad-hoc parser for Netscape protocol cookie-attributes.
	168
	169	The old Netscape cookie format for Set-Cookie can for instance contain
	170	an unquoted "," in the expires field, so we have to use this ad-hoc
	171	parser instead of split_header_words.
	172
	173	XXX This may not make the best possible effort to parse all the crap
	174	that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
	175	parser is probably better, so could do worse than following that if
	176	this ever gives any trouble.
	177
	178	Currently, this is also used for parsing RFC 2109 cookies.
	179
	180	"""
	181	known_attrs = ("expires", "domain", "path", "secure",
	182	# RFC 2109 attrs (may turn up in Netscape cookies, too)
	183	"port", "max-age")
	184
	185	result = []
	186	for ns_header in ns_headers:
	187	pairs = []
	188	version_set = False
	189	params = re.split(r";\s*", ns_header)
	190	for ii in range(len(params)):
	191	param = params[ii]
	192	param = param.rstrip()
	193	if param == "": continue
	194	if "=" not in param:
	195	k, v = param, None
	196	else:
	197	k, v = re.split(r"\s=\s", param, 1)
	198	k = k.lstrip()
	199	if ii != 0:
	200	lc = k.lower()
	201	if lc in known_attrs:
	202	k = lc
	203	if k == "version":
	204	# This is an RFC 2109 cookie.
	205	version_set = True
	206	if k == "expires":
	207	# convert expires date to seconds since epoch
	208	if v.startswith('"'): v = v[1:]
	209	if v.endswith('"'): v = v[:-1]
	210	v = http2time(v) # None if invalid
	211	pairs.append((k, v))
	212
	213	if pairs:
	214	if not version_set:
	215	pairs.append(("version", "0"))
	216	result.append(pairs)
	217
	218	return result
	219
	220
	221	def _test():
	222	import doctest, _headersutil
	223	return doctest.testmod(_headersutil)
	224
	225	if __name__ == "__main__":
	226	_test()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_headersutil.py @ 3

異なるフォーマットでダウンロード: