1 | """Integration with Python standard library module urllib2: Request class. |
---|
2 | |
---|
3 | Copyright 2004-2006 John J Lee <jjl@pobox.com> |
---|
4 | |
---|
5 | This code is free software; you can redistribute it and/or modify it |
---|
6 | under the terms of the BSD or ZPL 2.1 licenses (see the file |
---|
7 | COPYING.txt included with the distribution). |
---|
8 | |
---|
9 | """ |
---|
10 | |
---|
11 | import urllib2, urllib, logging |
---|
12 | |
---|
13 | from _clientcookie import request_host |
---|
14 | import _rfc3986 |
---|
15 | |
---|
16 | warn = logging.getLogger("mechanize").warning |
---|
17 | # don't complain about missing logging handler |
---|
18 | logging.getLogger("mechanize").setLevel(logging.ERROR) |
---|
19 | |
---|
20 | |
---|
21 | class Request(urllib2.Request): |
---|
22 | def __init__(self, url, data=None, headers={}, |
---|
23 | origin_req_host=None, unverifiable=False, visit=None): |
---|
24 | # In mechanize 0.2, the interpretation of a unicode url argument will |
---|
25 | # change: A unicode url argument will be interpreted as an IRI, and a |
---|
26 | # bytestring as a URI. For now, we accept unicode or bytestring. We |
---|
27 | # don't insist that the value is always a URI (specifically, must only |
---|
28 | # contain characters which are legal), because that might break working |
---|
29 | # code (who knows what bytes some servers want to see, especially with |
---|
30 | # browser plugins for internationalised URIs). |
---|
31 | if not _rfc3986.is_clean_uri(url): |
---|
32 | warn("url argument is not a URI " |
---|
33 | "(contains illegal characters) %r" % url) |
---|
34 | urllib2.Request.__init__(self, url, data, headers) |
---|
35 | self.selector = None |
---|
36 | self.unredirected_hdrs = {} |
---|
37 | self.visit = visit |
---|
38 | |
---|
39 | # All the terminology below comes from RFC 2965. |
---|
40 | self.unverifiable = unverifiable |
---|
41 | # Set request-host of origin transaction. |
---|
42 | # The origin request-host is needed in order to decide whether |
---|
43 | # unverifiable sub-requests (automatic redirects, images embedded |
---|
44 | # in HTML, etc.) are to third-party hosts. If they are, the |
---|
45 | # resulting transactions might need to be conducted with cookies |
---|
46 | # turned off. |
---|
47 | if origin_req_host is None: |
---|
48 | origin_req_host = request_host(self) |
---|
49 | self.origin_req_host = origin_req_host |
---|
50 | |
---|
51 | def get_selector(self): |
---|
52 | return urllib.splittag(self.__r_host)[0] |
---|
53 | |
---|
54 | def get_origin_req_host(self): |
---|
55 | return self.origin_req_host |
---|
56 | |
---|
57 | def is_unverifiable(self): |
---|
58 | return self.unverifiable |
---|
59 | |
---|
60 | def add_unredirected_header(self, key, val): |
---|
61 | """Add a header that will not be added to a redirected request.""" |
---|
62 | self.unredirected_hdrs[key.capitalize()] = val |
---|
63 | |
---|
64 | def has_header(self, header_name): |
---|
65 | """True iff request has named header (regular or unredirected).""" |
---|
66 | return (header_name in self.headers or |
---|
67 | header_name in self.unredirected_hdrs) |
---|
68 | |
---|
69 | def get_header(self, header_name, default=None): |
---|
70 | return self.headers.get( |
---|
71 | header_name, |
---|
72 | self.unredirected_hdrs.get(header_name, default)) |
---|
73 | |
---|
74 | def header_items(self): |
---|
75 | hdrs = self.unredirected_hdrs.copy() |
---|
76 | hdrs.update(self.headers) |
---|
77 | return hdrs.items() |
---|
78 | |
---|
79 | def __str__(self): |
---|
80 | return "<Request for %s>" % self.get_full_url() |
---|
81 | |
---|
82 | def get_method(self): |
---|
83 | if self.has_data(): |
---|
84 | return "POST" |
---|
85 | else: |
---|
86 | return "GET" |
---|