[3] | 1 | """Integration with Python standard library module urllib2: Request class. |
---|
| 2 | |
---|
| 3 | Copyright 2004-2006 John J Lee <jjl@pobox.com> |
---|
| 4 | |
---|
| 5 | This code is free software; you can redistribute it and/or modify it |
---|
| 6 | under the terms of the BSD or ZPL 2.1 licenses (see the file |
---|
| 7 | COPYING.txt included with the distribution). |
---|
| 8 | |
---|
| 9 | """ |
---|
| 10 | |
---|
| 11 | import urllib2, urllib, logging |
---|
| 12 | |
---|
| 13 | from _clientcookie import request_host |
---|
| 14 | import _rfc3986 |
---|
| 15 | |
---|
| 16 | warn = logging.getLogger("mechanize").warning |
---|
| 17 | # don't complain about missing logging handler |
---|
| 18 | logging.getLogger("mechanize").setLevel(logging.ERROR) |
---|
| 19 | |
---|
| 20 | |
---|
| 21 | class Request(urllib2.Request): |
---|
| 22 | def __init__(self, url, data=None, headers={}, |
---|
| 23 | origin_req_host=None, unverifiable=False, visit=None): |
---|
| 24 | # In mechanize 0.2, the interpretation of a unicode url argument will |
---|
| 25 | # change: A unicode url argument will be interpreted as an IRI, and a |
---|
| 26 | # bytestring as a URI. For now, we accept unicode or bytestring. We |
---|
| 27 | # don't insist that the value is always a URI (specifically, must only |
---|
| 28 | # contain characters which are legal), because that might break working |
---|
| 29 | # code (who knows what bytes some servers want to see, especially with |
---|
| 30 | # browser plugins for internationalised URIs). |
---|
| 31 | if not _rfc3986.is_clean_uri(url): |
---|
| 32 | warn("url argument is not a URI " |
---|
| 33 | "(contains illegal characters) %r" % url) |
---|
| 34 | urllib2.Request.__init__(self, url, data, headers) |
---|
| 35 | self.selector = None |
---|
| 36 | self.unredirected_hdrs = {} |
---|
| 37 | self.visit = visit |
---|
| 38 | |
---|
| 39 | # All the terminology below comes from RFC 2965. |
---|
| 40 | self.unverifiable = unverifiable |
---|
| 41 | # Set request-host of origin transaction. |
---|
| 42 | # The origin request-host is needed in order to decide whether |
---|
| 43 | # unverifiable sub-requests (automatic redirects, images embedded |
---|
| 44 | # in HTML, etc.) are to third-party hosts. If they are, the |
---|
| 45 | # resulting transactions might need to be conducted with cookies |
---|
| 46 | # turned off. |
---|
| 47 | if origin_req_host is None: |
---|
| 48 | origin_req_host = request_host(self) |
---|
| 49 | self.origin_req_host = origin_req_host |
---|
| 50 | |
---|
| 51 | def get_selector(self): |
---|
| 52 | return urllib.splittag(self.__r_host)[0] |
---|
| 53 | |
---|
| 54 | def get_origin_req_host(self): |
---|
| 55 | return self.origin_req_host |
---|
| 56 | |
---|
| 57 | def is_unverifiable(self): |
---|
| 58 | return self.unverifiable |
---|
| 59 | |
---|
| 60 | def add_unredirected_header(self, key, val): |
---|
| 61 | """Add a header that will not be added to a redirected request.""" |
---|
| 62 | self.unredirected_hdrs[key.capitalize()] = val |
---|
| 63 | |
---|
| 64 | def has_header(self, header_name): |
---|
| 65 | """True iff request has named header (regular or unredirected).""" |
---|
| 66 | return (header_name in self.headers or |
---|
| 67 | header_name in self.unredirected_hdrs) |
---|
| 68 | |
---|
| 69 | def get_header(self, header_name, default=None): |
---|
| 70 | return self.headers.get( |
---|
| 71 | header_name, |
---|
| 72 | self.unredirected_hdrs.get(header_name, default)) |
---|
| 73 | |
---|
| 74 | def header_items(self): |
---|
| 75 | hdrs = self.unredirected_hdrs.copy() |
---|
| 76 | hdrs.update(self.headers) |
---|
| 77 | return hdrs.items() |
---|
| 78 | |
---|
| 79 | def __str__(self): |
---|
| 80 | return "<Request for %s>" % self.get_full_url() |
---|
| 81 | |
---|
| 82 | def get_method(self): |
---|
| 83 | if self.has_data(): |
---|
| 84 | return "POST" |
---|
| 85 | else: |
---|
| 86 | return "GET" |
---|