Context Navigation

_rfc3986.py

リビジョン 3, 7.4 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""RFC 3986 URI parsing and relative reference resolution / absolutization.
2
3	(aka splitting and joining)
4
5	Copyright 2006 John J. Lee <jjl@pobox.com>
6
7	This code is free software; you can redistribute it and/or modify it under
8	the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
9	included with the distribution).
10
11	"""
12
13	# XXX Wow, this is ugly. Overly-direct translation of the RFC ATM.
14
15	import sys, re, posixpath, urllib
16
17	## def chr_range(a, b):
18	## return "".join(map(chr, range(ord(a), ord(b)+1)))
19
20	## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
21	## "abcdefghijklmnopqrstuvwxyz"
22	## "0123456789"
23	## "-_.~")
24	## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
25	## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
26	# this re matches any character that's not in URI_CHARS
27	BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
28
29
30	def clean_url(url, encoding):
31	# percent-encode illegal URI characters
32	# Trying to come up with test cases for this gave me a headache, revisit
33	# when do switch to unicode.
34	# Somebody else's comments (lost the attribution):
35	## - IE will return you the url in the encoding you send it
36	## - Mozilla/Firefox will send you latin-1 if there's no non latin-1
37	## characters in your link. It will send you utf-8 however if there are...
38	if type(url) == type(""):
39	url = url.decode(encoding, "replace")
40	url = url.strip()
41	# for second param to urllib.quote(), we want URI_CHARS, minus the
42	# 'always_safe' characters that urllib.quote() never percent-encodes
43	return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
44
45	def is_clean_uri(uri):
46	"""
47	>>> is_clean_uri("ABC!")
48	True
49	>>> is_clean_uri(u"ABC!")
50	True
51	>>> is_clean_uri("ABC\|")
52	False
53	>>> is_clean_uri(u"ABC\|")
54	False
55	>>> is_clean_uri("http://example.com/0")
56	True
57	>>> is_clean_uri(u"http://example.com/0")
58	True
59	"""
60	# note module re treats bytestrings as through they were decoded as latin-1
61	# so this function accepts both unicode and bytestrings
62	return not bool(BAD_URI_CHARS_RE.search(uri))
63
64
65	SPLIT_MATCH = re.compile(
66	r"^(([^:/?#]+):)?(//([^/?#]))?([^?#])(\?([^#]))?(#(.))?").match
67	def urlsplit(absolute_uri):
68	"""Return scheme, authority, path, query, fragment."""
69	match = SPLIT_MATCH(absolute_uri)
70	if match:
71	g = match.groups()
72	return g[1], g[3], g[4], g[6], g[8]
73
74	def urlunsplit(parts):
75	scheme, authority, path, query, fragment = parts
76	r = []
77	append = r.append
78	if scheme is not None:
79	append(scheme)
80	append(":")
81	if authority is not None:
82	append("//")
83	append(authority)
84	append(path)
85	if query is not None:
86	append("?")
87	append(query)
88	if fragment is not None:
89	append("#")
90	append(fragment)
91	return "".join(r)
92
93	def urljoin(base_uri, uri_reference):
94	return urlunsplit(urljoin_parts(urlsplit(base_uri),
95	urlsplit(uri_reference)))
96
97	# oops, this doesn't do the same thing as the literal translation
98	# from the RFC below
99	## def urljoin_parts(base_parts, reference_parts):
100	## scheme, authority, path, query, fragment = base_parts
101	## rscheme, rauthority, rpath, rquery, rfragment = reference_parts
102
103	## # compute target URI path
104	## if rpath == "":
105	## tpath = path
106	## else:
107	## tpath = rpath
108	## if not tpath.startswith("/"):
109	## tpath = merge(authority, path, tpath)
110	## tpath = posixpath.normpath(tpath)
111
112	## if rscheme is not None:
113	## return (rscheme, rauthority, tpath, rquery, rfragment)
114	## elif rauthority is not None:
115	## return (scheme, rauthority, tpath, rquery, rfragment)
116	## elif rpath == "":
117	## if rquery is not None:
118	## tquery = rquery
119	## else:
120	## tquery = query
121	## return (scheme, authority, tpath, tquery, rfragment)
122	## else:
123	## return (scheme, authority, tpath, rquery, rfragment)
124
125	def urljoin_parts(base_parts, reference_parts):
126	scheme, authority, path, query, fragment = base_parts
127	rscheme, rauthority, rpath, rquery, rfragment = reference_parts
128
129	if rscheme == scheme:
130	rscheme = None
131
132	if rscheme is not None:
133	tscheme, tauthority, tpath, tquery = (
134	rscheme, rauthority, remove_dot_segments(rpath), rquery)
135	else:
136	if rauthority is not None:
137	tauthority, tpath, tquery = (
138	rauthority, remove_dot_segments(rpath), rquery)
139	else:
140	if rpath == "":
141	tpath = path
142	if rquery is not None:
143	tquery = rquery
144	else:
145	tquery = query
146	else:
147	if rpath.startswith("/"):
148	tpath = remove_dot_segments(rpath)
149	else:
150	tpath = merge(authority, path, rpath)
151	tpath = remove_dot_segments(tpath)
152	tquery = rquery
153	tauthority = authority
154	tscheme = scheme
155	tfragment = rfragment
156	return (tscheme, tauthority, tpath, tquery, tfragment)
157
158	# um, something vaguely like this is what I want, but I have to generate
159	# lots of test cases first, if only to understand what it is that
160	# remove_dot_segments really does...
161	## def remove_dot_segments(path):
162	## if path == '':
163	## return ''
164	## comps = path.split('/')
165	## new_comps = []
166	## for comp in comps:
167	## if comp in ['.', '']:
168	## if not new_comps or new_comps[-1]:
169	## new_comps.append('')
170	## continue
171	## if comp != '..':
172	## new_comps.append(comp)
173	## elif new_comps:
174	## new_comps.pop()
175	## return '/'.join(new_comps)
176
177
178	def remove_dot_segments(path):
179	r = []
180	while path:
181	# A
182	if path.startswith("../"):
183	path = path[3:]
184	continue
185	if path.startswith("./"):
186	path = path[2:]
187	continue
188	# B
189	if path.startswith("/./"):
190	path = path[2:]
191	continue
192	if path == "/.":
193	path = "/"
194	continue
195	# C
196	if path.startswith("/../"):
197	path = path[3:]
198	if r:
199	r.pop()
200	continue
201	if path == "/..":
202	path = "/"
203	if r:
204	r.pop()
205	continue
206	# D
207	if path == ".":
208	path = path[1:]
209	continue
210	if path == "..":
211	path = path[2:]
212	continue
213	# E
214	start = 0
215	if path.startswith("/"):
216	start = 1
217	ii = path.find("/", start)
218	if ii < 0:
219	ii = None
220	r.append(path[:ii])
221	if ii is None:
222	break
223	path = path[ii:]
224	return "".join(r)
225
226	def merge(base_authority, base_path, ref_path):
227	# XXXX Oddly, the sample Perl implementation of this by Roy Fielding
228	# doesn't even take base_authority as a parameter, despite the wording in
229	# the RFC suggesting otherwise. Perhaps I'm missing some obvious identity.
230	#if base_authority is not None and base_path == "":
231	if base_path == "":
232	return "/" + ref_path
233	ii = base_path.rfind("/")
234	if ii >= 0:
235	return base_path[:ii+1] + ref_path
236	return ref_path
237
238	if __name__ == "__main__":
239	import doctest
240	doctest.testmod()

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_rfc3986.py

異なるフォーマットでダウンロード: