root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/_rfc3986.py

リビジョン 3, 7.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""RFC 3986 URI parsing and relative reference resolution / absolutization.
2
3(aka splitting and joining)
4
5Copyright 2006 John J. Lee <jjl@pobox.com>
6
7This code is free software; you can redistribute it and/or modify it under
8the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
9included with the distribution).
10
11"""
12
13# XXX Wow, this is ugly.  Overly-direct translation of the RFC ATM.
14
15import sys, re, posixpath, urllib
16
17## def chr_range(a, b):
18##     return "".join(map(chr, range(ord(a), ord(b)+1)))
19
20## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
21##                         "abcdefghijklmnopqrstuvwxyz"
22##                         "0123456789"
23##                         "-_.~")
24## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
25## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
26# this re matches any character that's not in URI_CHARS
27BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
28
29
30def clean_url(url, encoding):
31    # percent-encode illegal URI characters
32    # Trying to come up with test cases for this gave me a headache, revisit
33    # when do switch to unicode.
34    # Somebody else's comments (lost the attribution):
35##     - IE will return you the url in the encoding you send it
36##     - Mozilla/Firefox will send you latin-1 if there's no non latin-1
37##     characters in your link. It will send you utf-8 however if there are...
38    if type(url) == type(""):
39        url = url.decode(encoding, "replace")
40    url = url.strip()
41    # for second param to urllib.quote(), we want URI_CHARS, minus the
42    # 'always_safe' characters that urllib.quote() never percent-encodes
43    return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
44
45def is_clean_uri(uri):
46    """
47    >>> is_clean_uri("ABC!")
48    True
49    >>> is_clean_uri(u"ABC!")
50    True
51    >>> is_clean_uri("ABC|")
52    False
53    >>> is_clean_uri(u"ABC|")
54    False
55    >>> is_clean_uri("http://example.com/0")
56    True
57    >>> is_clean_uri(u"http://example.com/0")
58    True
59    """
60    # note module re treats bytestrings as through they were decoded as latin-1
61    # so this function accepts both unicode and bytestrings
62    return not bool(BAD_URI_CHARS_RE.search(uri))
63
64
65SPLIT_MATCH = re.compile(
66    r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
67def urlsplit(absolute_uri):
68    """Return scheme, authority, path, query, fragment."""
69    match = SPLIT_MATCH(absolute_uri)
70    if match:
71        g = match.groups()
72        return g[1], g[3], g[4], g[6], g[8]
73
74def urlunsplit(parts):
75    scheme, authority, path, query, fragment = parts
76    r = []
77    append = r.append
78    if scheme is not None:
79        append(scheme)
80        append(":")
81    if authority is not None:
82        append("//")
83        append(authority)
84    append(path)
85    if query is not None:
86        append("?")
87        append(query)
88    if fragment is not None:
89        append("#")
90        append(fragment)
91    return "".join(r)
92
93def urljoin(base_uri, uri_reference):
94    return urlunsplit(urljoin_parts(urlsplit(base_uri),
95                                    urlsplit(uri_reference)))
96
97# oops, this doesn't do the same thing as the literal translation
98# from the RFC below
99## def urljoin_parts(base_parts, reference_parts):
100##     scheme, authority, path, query, fragment = base_parts
101##     rscheme, rauthority, rpath, rquery, rfragment = reference_parts
102
103##     # compute target URI path
104##     if rpath == "":
105##         tpath = path
106##     else:
107##         tpath = rpath
108##         if not tpath.startswith("/"):
109##             tpath = merge(authority, path, tpath)
110##         tpath = posixpath.normpath(tpath)
111
112##     if rscheme is not None:
113##         return (rscheme, rauthority, tpath, rquery, rfragment)
114##     elif rauthority is not None:
115##         return (scheme, rauthority, tpath, rquery, rfragment)
116##     elif rpath == "":
117##         if rquery is not None:
118##             tquery = rquery
119##         else:
120##             tquery = query
121##         return (scheme, authority, tpath, tquery, rfragment)
122##     else:
123##         return (scheme, authority, tpath, rquery, rfragment)
124
125def urljoin_parts(base_parts, reference_parts):
126    scheme, authority, path, query, fragment = base_parts
127    rscheme, rauthority, rpath, rquery, rfragment = reference_parts
128
129    if rscheme == scheme:
130        rscheme = None
131
132    if rscheme is not None:
133        tscheme, tauthority, tpath, tquery = (
134            rscheme, rauthority, remove_dot_segments(rpath), rquery)
135    else:
136        if rauthority is not None:
137            tauthority, tpath, tquery = (
138                rauthority, remove_dot_segments(rpath), rquery)
139        else:
140            if rpath == "":
141                tpath = path
142                if rquery is not None:
143                    tquery = rquery
144                else:
145                    tquery = query
146            else:
147                if rpath.startswith("/"):
148                    tpath = remove_dot_segments(rpath)
149                else:
150                    tpath = merge(authority, path, rpath)
151                    tpath = remove_dot_segments(tpath)
152                tquery = rquery
153            tauthority = authority
154        tscheme = scheme
155    tfragment = rfragment
156    return (tscheme, tauthority, tpath, tquery, tfragment)
157
158# um, something *vaguely* like this is what I want, but I have to generate
159# lots of test cases first, if only to understand what it is that
160# remove_dot_segments really does...
161## def remove_dot_segments(path):
162##     if path == '':
163##         return ''
164##     comps = path.split('/')
165##     new_comps = []
166##     for comp in comps:
167##         if comp in ['.', '']:
168##             if not new_comps or new_comps[-1]:
169##                 new_comps.append('')
170##             continue
171##         if comp != '..':
172##             new_comps.append(comp)
173##         elif new_comps:
174##             new_comps.pop()
175##     return '/'.join(new_comps)
176
177
178def remove_dot_segments(path):
179    r = []
180    while path:
181        # A
182        if path.startswith("../"):
183            path = path[3:]
184            continue
185        if path.startswith("./"):
186            path = path[2:]
187            continue
188        # B
189        if path.startswith("/./"):
190            path = path[2:]
191            continue
192        if path == "/.":
193            path = "/"
194            continue
195        # C
196        if path.startswith("/../"):
197            path = path[3:]
198            if r:
199                r.pop()
200            continue
201        if path == "/..":
202            path = "/"
203            if r:
204                r.pop()
205            continue
206        # D
207        if path == ".":
208            path = path[1:]
209            continue
210        if path == "..":
211            path = path[2:]
212            continue
213        # E
214        start = 0
215        if path.startswith("/"):
216            start = 1
217        ii = path.find("/", start)
218        if ii < 0:
219            ii = None
220        r.append(path[:ii])
221        if ii is None:
222            break
223        path = path[ii:]
224    return "".join(r)
225
226def merge(base_authority, base_path, ref_path):
227    # XXXX Oddly, the sample Perl implementation of this by Roy Fielding
228    # doesn't even take base_authority as a parameter, despite the wording in
229    # the RFC suggesting otherwise.  Perhaps I'm missing some obvious identity.
230    #if base_authority is not None and base_path == "":
231    if base_path == "":
232        return "/" + ref_path
233    ii = base_path.rfind("/")
234    if ii >= 0:
235        return base_path[:ii+1] + ref_path
236    return ref_path
237
238if __name__ == "__main__":
239    import doctest
240    doctest.testmod()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。