root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/other_packages/_mechanize_dist/ClientForm.py

リビジョン 3, 121.3 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""HTML form handling for web clients.
2
3ClientForm is a Python module for handling HTML forms on the client
4side, useful for parsing HTML forms, filling them in and returning the
5completed forms to the server.  It has developed from a port of Gisle
6Aas' Perl module HTML::Form, from the libwww-perl library, but the
7interface is not the same.
8
9The most useful docstring is the one for HTMLForm.
10
11RFC 1866: HTML 2.0
12RFC 1867: Form-based File Upload in HTML
13RFC 2388: Returning Values from Forms: multipart/form-data
14HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
15HTML 4.01 Specification, W3C Recommendation 24 December 1999
16
17
18Copyright 2002-2006 John J. Lee <jjl@pobox.com>
19Copyright 2005 Gary Poster
20Copyright 2005 Zope Corporation
21Copyright 1998-2000 Gisle Aas.
22
23This code is free software; you can redistribute it and/or modify it
24under the terms of the BSD or ZPL 2.1 licenses (see the file
25COPYING.txt included with the distribution).
26
27"""
28
29# XXX
30# add an __all__
31# Remove parser testing hack
32# safeUrl()-ize action
33# Switch to unicode throughout (would be 0.3.x)
34#  See Wichert Akkerman's 2004-01-22 message to c.l.py.
35# Add charset parameter to Content-type headers?  How to find value??
36# Add some more functional tests
37#  Especially single and multiple file upload on the internet.
38#  Does file upload work when name is missing?  Sourceforge tracker form
39#   doesn't like it.  Check standards, and test with Apache.  Test
40#   binary upload with Apache.
41# mailto submission & enctype text/plain
42# I'm not going to fix this unless somebody tells me what real servers
43#  that want this encoding actually expect: If enctype is
44#  application/x-www-form-urlencoded and there's a FILE control present.
45#  Strictly, it should be 'name=data' (see HTML 4.01 spec., section
46#  17.13.2), but I send "name=" ATM.  What about multiple file upload??
47
48# Would be nice, but I'm not going to do it myself:
49# -------------------------------------------------
50# Maybe a 0.4.x?
51#   Replace by_label etc. with moniker / selector concept. Allows, eg.,
52#    a choice between selection by value / id / label / element
53#    contents.  Or choice between matching labels exactly or by
54#    substring.  Etc.
55#   Remove deprecated methods.
56#   ...what else?
57# Work on DOMForm.
58# XForms?  Don't know if there's a need here.
59
60
61try: True
62except NameError:
63    True = 1
64    False = 0
65
66try: bool
67except NameError:
68    def bool(expr):
69        if expr: return True
70        else: return False
71
72try:
73    import logging
74except ImportError:
75    def debug(msg, *args, **kwds):
76        pass
77else:
78    _logger = logging.getLogger("ClientForm")
79    OPTIMIZATION_HACK = True
80
81    def debug(msg, *args, **kwds):
82        if OPTIMIZATION_HACK:
83            return
84
85        try:
86            raise Exception()
87        except:
88            caller_name = (
89                sys.exc_info()[2].tb_frame.f_back.f_back.f_code.co_name)
90        extended_msg = '%%s %s' % msg
91        extended_args = (caller_name,)+args
92        debug = _logger.debug(extended_msg, *extended_args, **kwds)
93
94    def _show_debug_messages():
95        global OPTIMIZATION_HACK
96        OPTIMIZATION_HACK = False
97        _logger.setLevel(logging.DEBUG)
98        handler = logging.StreamHandler(sys.stdout)
99        handler.setLevel(logging.DEBUG)
100        _logger.addHandler(handler)
101
102import sys, urllib, urllib2, types, mimetools, copy, urlparse, \
103       htmlentitydefs, re, random
104from cStringIO import StringIO
105
106import sgmllib
107# monkeypatch to fix http://www.python.org/sf/803422 :-(
108sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
109
110# HTMLParser.HTMLParser is recent, so live without it if it's not available
111# (also, sgmllib.SGMLParser is much more tolerant of bad HTML)
112try:
113    import HTMLParser
114except ImportError:
115    HAVE_MODULE_HTMLPARSER = False
116else:
117    HAVE_MODULE_HTMLPARSER = True
118
119try:
120    import warnings
121except ImportError:
122    def deprecation(message):
123        pass
124else:
125    def deprecation(message):
126        warnings.warn(message, DeprecationWarning, stacklevel=2)
127
128VERSION = "0.2.7"
129
130CHUNK = 1024  # size of chunks fed to parser, in bytes
131
132DEFAULT_ENCODING = "latin-1"
133
134class Missing: pass
135
136_compress_re = re.compile(r"\s+")
137def compress_text(text): return _compress_re.sub(" ", text.strip())
138
139def normalize_line_endings(text):
140    return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
141
142
143# This version of urlencode is from my Python 1.5.2 back-port of the
144# Python 2.1 CVS maintenance branch of urllib.  It will accept a sequence
145# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
146def urlencode(query,doseq=False,):
147    """Encode a sequence of two-element tuples or dictionary into a URL query \
148string.
149
150    If any values in the query arg are sequences and doseq is true, each
151    sequence element is converted to a separate parameter.
152
153    If the query arg is a sequence of two-element tuples, the order of the
154    parameters in the output will match the order of parameters in the
155    input.
156    """
157
158    if hasattr(query,"items"):
159        # mapping objects
160        query = query.items()
161    else:
162        # it's a bother at times that strings and string-like objects are
163        # sequences...
164        try:
165            # non-sequence items should not work with len()
166            x = len(query)
167            # non-empty strings will fail this
168            if len(query) and type(query[0]) != types.TupleType:
169                raise TypeError()
170            # zero-length sequences of all types will get here and succeed,
171            # but that's a minor nit - since the original implementation
172            # allowed empty dicts that type of behavior probably should be
173            # preserved for consistency
174        except TypeError:
175            ty,va,tb = sys.exc_info()
176            raise TypeError("not a valid non-string sequence or mapping "
177                            "object", tb)
178
179    l = []
180    if not doseq:
181        # preserve old behavior
182        for k, v in query:
183            k = urllib.quote_plus(str(k))
184            v = urllib.quote_plus(str(v))
185            l.append(k + '=' + v)
186    else:
187        for k, v in query:
188            k = urllib.quote_plus(str(k))
189            if type(v) == types.StringType:
190                v = urllib.quote_plus(v)
191                l.append(k + '=' + v)
192            elif type(v) == types.UnicodeType:
193                # is there a reasonable way to convert to ASCII?
194                # encode generates a string, but "replace" or "ignore"
195                # lose information and "strict" can raise UnicodeError
196                v = urllib.quote_plus(v.encode("ASCII","replace"))
197                l.append(k + '=' + v)
198            else:
199                try:
200                    # is this a sufficient test for sequence-ness?
201                    x = len(v)
202                except TypeError:
203                    # not a sequence
204                    v = urllib.quote_plus(str(v))
205                    l.append(k + '=' + v)
206                else:
207                    # loop over the sequence
208                    for elt in v:
209                        l.append(k + '=' + urllib.quote_plus(str(elt)))
210    return '&'.join(l)
211
212def unescape(data, entities, encoding=DEFAULT_ENCODING):
213    if data is None or "&" not in data:
214        return data
215
216    def replace_entities(match, entities=entities, encoding=encoding):
217        ent = match.group()
218        if ent[1] == "#":
219            return unescape_charref(ent[2:-1], encoding)
220
221        repl = entities.get(ent)
222        if repl is not None:
223            if type(repl) != type(""):
224                try:
225                    repl = repl.encode(encoding)
226                except UnicodeError:
227                    repl = ent
228        else:
229            repl = ent
230
231        return repl
232
233    return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
234
235def unescape_charref(data, encoding):
236    name, base = data, 10
237    if name.startswith("x"):
238        name, base= name[1:], 16
239    uc = unichr(int(name, base))
240    if encoding is None:
241        return uc
242    else:
243        try:
244            repl = uc.encode(encoding)
245        except UnicodeError:
246            repl = "&#%s;" % data
247        return repl
248
249def get_entitydefs():
250    import htmlentitydefs
251    from codecs import latin_1_decode
252    entitydefs = {}
253    try:
254        htmlentitydefs.name2codepoint
255    except AttributeError:
256        entitydefs = {}
257        for name, char in htmlentitydefs.entitydefs.items():
258            uc = latin_1_decode(char)[0]
259            if uc.startswith("&#") and uc.endswith(";"):
260                uc = unescape_charref(uc[2:-1], None)
261            entitydefs["&%s;" % name] = uc
262    else:
263        for name, codepoint in htmlentitydefs.name2codepoint.items():
264            entitydefs["&%s;" % name] = unichr(codepoint)
265    return entitydefs
266
267
268def issequence(x):
269    try:
270        x[0]
271    except (TypeError, KeyError):
272        return False
273    except IndexError:
274        pass
275    return True
276
277def isstringlike(x):
278    try: x+""
279    except: return False
280    else: return True
281
282
283def choose_boundary():
284    """Return a string usable as a multipart boundary."""
285    # follow IE and firefox
286    nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2])
287    return "-"*27 + nonce
288
289# This cut-n-pasted MimeWriter from standard library is here so can add
290# to HTTP headers rather than message body when appropriate.  It also uses
291# \r\n in place of \n.  This is a bit nasty.
292class MimeWriter:
293
294    """Generic MIME writer.
295
296    Methods:
297
298    __init__()
299    addheader()
300    flushheaders()
301    startbody()
302    startmultipartbody()
303    nextpart()
304    lastpart()
305
306    A MIME writer is much more primitive than a MIME parser.  It
307    doesn't seek around on the output file, and it doesn't use large
308    amounts of buffer space, so you have to write the parts in the
309    order they should occur on the output file.  It does buffer the
310    headers you add, allowing you to rearrange their order.
311
312    General usage is:
313
314    f = <open the output file>
315    w = MimeWriter(f)
316    ...call w.addheader(key, value) 0 or more times...
317
318    followed by either:
319
320    f = w.startbody(content_type)
321    ...call f.write(data) for body data...
322
323    or:
324
325    w.startmultipartbody(subtype)
326    for each part:
327        subwriter = w.nextpart()
328        ...use the subwriter's methods to create the subpart...
329    w.lastpart()
330
331    The subwriter is another MimeWriter instance, and should be
332    treated in the same way as the toplevel MimeWriter.  This way,
333    writing recursive body parts is easy.
334
335    Warning: don't forget to call lastpart()!
336
337    XXX There should be more state so calls made in the wrong order
338    are detected.
339
340    Some special cases:
341
342    - startbody() just returns the file passed to the constructor;
343      but don't use this knowledge, as it may be changed.
344
345    - startmultipartbody() actually returns a file as well;
346      this can be used to write the initial 'if you can read this your
347      mailer is not MIME-aware' message.
348
349    - If you call flushheaders(), the headers accumulated so far are
350      written out (and forgotten); this is useful if you don't need a
351      body part at all, e.g. for a subpart of type message/rfc822
352      that's (mis)used to store some header-like information.
353
354    - Passing a keyword argument 'prefix=<flag>' to addheader(),
355      start*body() affects where the header is inserted; 0 means
356      append at the end, 1 means insert at the start; default is
357      append for addheader(), but insert for start*body(), which use
358      it to determine where the Content-type header goes.
359
360    """
361
362    def __init__(self, fp, http_hdrs=None):
363        self._http_hdrs = http_hdrs
364        self._fp = fp
365        self._headers = []
366        self._boundary = []
367        self._first_part = True
368
369    def addheader(self, key, value, prefix=0,
370                  add_to_http_hdrs=0):
371        """
372        prefix is ignored if add_to_http_hdrs is true.
373        """
374        lines = value.split("\r\n")
375        while lines and not lines[-1]: del lines[-1]
376        while lines and not lines[0]: del lines[0]
377        if add_to_http_hdrs:
378            value = "".join(lines)
379            self._http_hdrs.append((key, value))
380        else:
381            for i in range(1, len(lines)):
382                lines[i] = "    " + lines[i].strip()
383            value = "\r\n".join(lines) + "\r\n"
384            line = key + ": " + value
385            if prefix:
386                self._headers.insert(0, line)
387            else:
388                self._headers.append(line)
389
390    def flushheaders(self):
391        self._fp.writelines(self._headers)
392        self._headers = []
393
394    def startbody(self, ctype=None, plist=[], prefix=1,
395                  add_to_http_hdrs=0, content_type=1):
396        """
397        prefix is ignored if add_to_http_hdrs is true.
398        """
399        if content_type and ctype:
400            for name, value in plist:
401                ctype = ctype + ';\r\n %s=%s' % (name, value)
402            self.addheader("Content-type", ctype, prefix=prefix,
403                           add_to_http_hdrs=add_to_http_hdrs)
404        self.flushheaders()
405        if not add_to_http_hdrs: self._fp.write("\r\n")
406        self._first_part = True
407        return self._fp
408
409    def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
410                           add_to_http_hdrs=0, content_type=1):
411        boundary = boundary or choose_boundary()
412        self._boundary.append(boundary)
413        return self.startbody("multipart/" + subtype,
414                              [("boundary", boundary)] + plist,
415                              prefix=prefix,
416                              add_to_http_hdrs=add_to_http_hdrs,
417                              content_type=content_type)
418
419    def nextpart(self):
420        boundary = self._boundary[-1]
421        if self._first_part:
422            self._first_part = False
423        else:
424            self._fp.write("\r\n")
425        self._fp.write("--" + boundary + "\r\n")
426        return self.__class__(self._fp)
427
428    def lastpart(self):
429        if self._first_part:
430            self.nextpart()
431        boundary = self._boundary.pop()
432        self._fp.write("\r\n--" + boundary + "--\r\n")
433
434
435class LocateError(ValueError): pass
436class AmbiguityError(LocateError): pass
437class ControlNotFoundError(LocateError): pass
438class ItemNotFoundError(LocateError): pass
439
440class ItemCountError(ValueError): pass
441
442# for backwards compatibility, ParseError derives from exceptions that were
443# raised by versions of ClientForm <= 0.2.5
444if HAVE_MODULE_HTMLPARSER:
445    SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
446    class ParseError(sgmllib.SGMLParseError,
447                     HTMLParser.HTMLParseError,
448                     ):
449        pass
450else:
451    if hasattr(sgmllib, "SGMLParseError"):
452        SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
453        class ParseError(sgmllib.SGMLParseError):
454            pass
455    else:
456        SGMLLIB_PARSEERROR = RuntimeError
457        class ParseError(RuntimeError):
458            pass
459
460
461class _AbstractFormParser:
462    """forms attribute contains HTMLForm instances on completion."""
463    # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
464    def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
465        if entitydefs is None:
466            entitydefs = get_entitydefs()
467        self._entitydefs = entitydefs
468        self._encoding = encoding
469
470        self.base = None
471        self.forms = []
472        self.labels = []
473        self._current_label = None
474        self._current_form = None
475        self._select = None
476        self._optgroup = None
477        self._option = None
478        self._textarea = None
479
480        # forms[0] will contain all controls that are outside of any form
481        # self._global_form is an alias for self.forms[0]
482        self._global_form = None
483        self.start_form([])
484        self.end_form()
485        self._current_form = self._global_form = self.forms[0]
486
487    def do_base(self, attrs):
488        debug("%s", attrs)
489        for key, value in attrs:
490            if key == "href":
491                self.base = value
492
493    def end_body(self):
494        debug("")
495        if self._current_label is not None:
496            self.end_label()
497        if self._current_form is not self._global_form:
498            self.end_form()
499
500    def start_form(self, attrs):
501        debug("%s", attrs)
502        if self._current_form is not self._global_form:
503            raise ParseError("nested FORMs")
504        name = None
505        action = None
506        enctype = "application/x-www-form-urlencoded"
507        method = "GET"
508        d = {}
509        for key, value in attrs:
510            if key == "name":
511                name = value
512            elif key == "action":
513                action = value
514            elif key == "method":
515                method = value.upper()
516            elif key == "enctype":
517                enctype = value.lower()
518            d[key] = value
519        controls = []
520        self._current_form = (name, action, method, enctype), d, controls
521
522    def end_form(self):
523        debug("")
524        if self._current_label is not None:
525            self.end_label()
526        if self._current_form is self._global_form:
527            raise ParseError("end of FORM before start")
528        self.forms.append(self._current_form)
529        self._current_form = self._global_form
530
531    def start_select(self, attrs):
532        debug("%s", attrs)
533        if self._select is not None:
534            raise ParseError("nested SELECTs")
535        if self._textarea is not None:
536            raise ParseError("SELECT inside TEXTAREA")
537        d = {}
538        for key, val in attrs:
539            d[key] = val
540
541        self._select = d
542        self._add_label(d)
543
544        self._append_select_control({"__select": d})
545
546    def end_select(self):
547        debug("")
548        if self._current_form is self._global_form:
549            return
550        if self._select is None:
551            raise ParseError("end of SELECT before start")
552
553        if self._option is not None:
554            self._end_option()
555
556        self._select = None
557
558    def start_optgroup(self, attrs):
559        debug("%s", attrs)
560        if self._select is None:
561            raise ParseError("OPTGROUP outside of SELECT")
562        d = {}
563        for key, val in attrs:
564            d[key] = val
565
566        self._optgroup = d
567
568    def end_optgroup(self):
569        debug("")
570        if self._optgroup is None:
571            raise ParseError("end of OPTGROUP before start")
572        self._optgroup = None
573
574    def _start_option(self, attrs):
575        debug("%s", attrs)
576        if self._select is None:
577            raise ParseError("OPTION outside of SELECT")
578        if self._option is not None:
579            self._end_option()
580
581        d = {}
582        for key, val in attrs:
583            d[key] = val
584
585        self._option = {}
586        self._option.update(d)
587        if (self._optgroup and self._optgroup.has_key("disabled") and
588            not self._option.has_key("disabled")):
589            self._option["disabled"] = None
590
591    def _end_option(self):
592        debug("")
593        if self._option is None:
594            raise ParseError("end of OPTION before start")
595
596        contents = self._option.get("contents", "").strip()
597        self._option["contents"] = contents
598        if not self._option.has_key("value"):
599            self._option["value"] = contents
600        if not self._option.has_key("label"):
601            self._option["label"] = contents
602        # stuff dict of SELECT HTML attrs into a special private key
603        #  (gets deleted again later)
604        self._option["__select"] = self._select
605        self._append_select_control(self._option)
606        self._option = None
607
608    def _append_select_control(self, attrs):
609        debug("%s", attrs)
610        controls = self._current_form[2]
611        name = self._select.get("name")
612        controls.append(("select", name, attrs))
613
614    def start_textarea(self, attrs):
615        debug("%s", attrs)
616        if self._textarea is not None:
617            raise ParseError("nested TEXTAREAs")
618        if self._select is not None:
619            raise ParseError("TEXTAREA inside SELECT")
620        d = {}
621        for key, val in attrs:
622            d[key] = val
623        self._add_label(d)
624
625        self._textarea = d
626
627    def end_textarea(self):
628        debug("")
629        if self._current_form is self._global_form:
630            return
631        if self._textarea is None:
632            raise ParseError("end of TEXTAREA before start")
633        controls = self._current_form[2]
634        name = self._textarea.get("name")
635        controls.append(("textarea", name, self._textarea))
636        self._textarea = None
637
638    def start_label(self, attrs):
639        debug("%s", attrs)
640        if self._current_label:
641            self.end_label()
642        d = {}
643        for key, val in attrs:
644            d[key] = val
645        taken = bool(d.get("for"))  # empty id is invalid
646        d["__text"] = ""
647        d["__taken"] = taken
648        if taken:
649            self.labels.append(d)
650        self._current_label = d
651
652    def end_label(self):
653        debug("")
654        label = self._current_label
655        if label is None:
656            # something is ugly in the HTML, but we're ignoring it
657            return
658        self._current_label = None
659        label["__text"] = label["__text"]
660        # if it is staying around, it is True in all cases
661        del label["__taken"]
662
663    def _add_label(self, d):
664        #debug("%s", d)
665        if self._current_label is not None:
666            if self._current_label["__taken"]:
667                self.end_label()  # be fuzzy
668            else:
669                self._current_label["__taken"] = True
670                d["__label"] = self._current_label
671
672    def handle_data(self, data):
673        debug("%s", data)
674
675        # according to http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1
676        # line break immediately after start tags or immediately before end
677        # tags must be ignored, but real browsers only ignore a line break
678        # after a start tag, so we'll do that.
679        if data[0:2] == "\r\n":
680            data = data[2:]
681        if data[0:1] in ["\n", "\r"]:
682            data = data[1:]
683
684        if self._option is not None:
685            # self._option is a dictionary of the OPTION element's HTML
686            # attributes, but it has two special keys, one of which is the
687            # special "contents" key contains text between OPTION tags (the
688            # other is the "__select" key: see the end_option method)
689            map = self._option
690            key = "contents"
691        elif self._textarea is not None:
692            map = self._textarea
693            key = "value"
694            data = normalize_line_endings(data)
695        # not if within option or textarea
696        elif self._current_label is not None:
697            map = self._current_label
698            key = "__text"
699        else:
700            return
701
702        if not map.has_key(key):
703            map[key] = data
704        else:
705            map[key] = map[key] + data
706
707    def do_button(self, attrs):
708        debug("%s", attrs)
709        d = {}
710        d["type"] = "submit"  # default
711        for key, val in attrs:
712            d[key] = val
713        controls = self._current_form[2]
714
715        type = d["type"]
716        name = d.get("name")
717        # we don't want to lose information, so use a type string that
718        # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
719        # e.g. type for BUTTON/RESET is "resetbutton"
720        #     (type for INPUT/RESET is "reset")
721        type = type+"button"
722        self._add_label(d)
723        controls.append((type, name, d))
724
725    def do_input(self, attrs):
726        debug("%s", attrs)
727        d = {}
728        d["type"] = "text"  # default
729        for key, val in attrs:
730            d[key] = val
731        controls = self._current_form[2]
732
733        type = d["type"]
734        name = d.get("name")
735        self._add_label(d)
736        controls.append((type, name, d))
737
738    def do_isindex(self, attrs):
739        debug("%s", attrs)
740        d = {}
741        for key, val in attrs:
742            d[key] = val
743        controls = self._current_form[2]
744
745        self._add_label(d)
746        # isindex doesn't have type or name HTML attributes
747        controls.append(("isindex", None, d))
748
749    def handle_entityref(self, name):
750        #debug("%s", name)
751        self.handle_data(unescape(
752            '&%s;' % name, self._entitydefs, self._encoding))
753
754    def handle_charref(self, name):
755        #debug("%s", name)
756        self.handle_data(unescape_charref(name, self._encoding))
757
758    def unescape_attr(self, name):
759        #debug("%s", name)
760        return unescape(name, self._entitydefs, self._encoding)
761
762    def unescape_attrs(self, attrs):
763        #debug("%s", attrs)
764        escaped_attrs = {}
765        for key, val in attrs.items():
766            try:
767                val.items
768            except AttributeError:
769                escaped_attrs[key] = self.unescape_attr(val)
770            else:
771                # e.g. "__select" -- yuck!
772                escaped_attrs[key] = self.unescape_attrs(val)
773        return escaped_attrs
774
775    def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
776    def unknown_charref(self, ref): self.handle_data("&#%s;" % ref)
777
778
779if not HAVE_MODULE_HTMLPARSER:
780    class XHTMLCompatibleFormParser:
781        def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
782            raise ValueError("HTMLParser could not be imported")
783else:
784    class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
785        """Good for XHTML, bad for tolerance of incorrect HTML."""
786        # thanks to Michael Howitz for this!
787        def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
788            HTMLParser.HTMLParser.__init__(self)
789            _AbstractFormParser.__init__(self, entitydefs, encoding)
790
791        def feed(self, data):
792            try:
793                HTMLParser.HTMLParser.feed(self, data)
794            except HTMLParser.HTMLParseError, exc:
795                raise ParseError(exc)
796
797        def start_option(self, attrs):
798            _AbstractFormParser._start_option(self, attrs)
799
800        def end_option(self):
801            _AbstractFormParser._end_option(self)
802
803        def handle_starttag(self, tag, attrs):
804            try:
805                method = getattr(self, "start_" + tag)
806            except AttributeError:
807                try:
808                    method = getattr(self, "do_" + tag)
809                except AttributeError:
810                    pass  # unknown tag
811                else:
812                    method(attrs)
813            else:
814                method(attrs)
815
816        def handle_endtag(self, tag):
817            try:
818                method = getattr(self, "end_" + tag)
819            except AttributeError:
820                pass  # unknown tag
821            else:
822                method()
823
824        def unescape(self, name):
825            # Use the entitydefs passed into constructor, not
826            # HTMLParser.HTMLParser's entitydefs.
827            return self.unescape_attr(name)
828
829        def unescape_attr_if_required(self, name):
830            return name  # HTMLParser.HTMLParser already did it
831        def unescape_attrs_if_required(self, attrs):
832            return attrs  # ditto
833
834
835class _AbstractSgmllibParser(_AbstractFormParser):
836
837    def do_option(self, attrs):
838        _AbstractFormParser._start_option(self, attrs)
839
840    if sys.version_info[:2] >= (2,5):
841        # we override this attr to decode hex charrefs
842        entity_or_charref = re.compile(
843            '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
844        def convert_entityref(self, name):
845            return unescape("&%s;" % name, self._entitydefs, self._encoding)
846        def convert_charref(self, name):
847            return unescape_charref("%s" % name, self._encoding)
848        def unescape_attr_if_required(self, name):
849            return name  # sgmllib already did it
850        def unescape_attrs_if_required(self, attrs):
851            return attrs  # ditto
852    else:
853        def unescape_attr_if_required(self, name):
854            return self.unescape_attr(name)
855        def unescape_attrs_if_required(self, attrs):
856            return self.unescape_attrs(attrs)
857
858
859class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
860    """Good for tolerance of incorrect HTML, bad for XHTML."""
861    def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
862        sgmllib.SGMLParser.__init__(self)
863        _AbstractFormParser.__init__(self, entitydefs, encoding)
864
865    def feed(self, data):
866        try:
867            sgmllib.SGMLParser.feed(self, data)
868        except SGMLLIB_PARSEERROR, exc:
869            raise ParseError(exc)
870
871
872
873# sigh, must support mechanize by allowing dynamic creation of classes based on
874# its bundled copy of BeautifulSoup (which was necessary because of dependency
875# problems)
876
877def _create_bs_classes(bs,
878                       icbinbs,
879                       ):
880    class _AbstractBSFormParser(_AbstractSgmllibParser):
881        bs_base_class = None
882        def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
883            _AbstractFormParser.__init__(self, entitydefs, encoding)
884            self.bs_base_class.__init__(self)
885        def handle_data(self, data):
886            _AbstractFormParser.handle_data(self, data)
887            self.bs_base_class.handle_data(self, data)
888        def feed(self, data):
889            try:
890                self.bs_base_class.feed(self, data)
891            except SGMLLIB_PARSEERROR, exc:
892                raise ParseError(exc)
893
894
895    class RobustFormParser(_AbstractBSFormParser, bs):
896        """Tries to be highly tolerant of incorrect HTML."""
897        pass
898    RobustFormParser.bs_base_class = bs
899    class NestingRobustFormParser(_AbstractBSFormParser, icbinbs):
900        """Tries to be highly tolerant of incorrect HTML.
901
902        Different from RobustFormParser in that it more often guesses nesting
903        above missing end tags (see BeautifulSoup docs).
904
905        """
906        pass
907    NestingRobustFormParser.bs_base_class = icbinbs
908
909    return RobustFormParser, NestingRobustFormParser
910
911try:
912    if sys.version_info[:2] < (2, 2):
913        raise ImportError  # BeautifulSoup uses generators
914    import BeautifulSoup
915except ImportError:
916    pass
917else:
918    RobustFormParser, NestingRobustFormParser = _create_bs_classes(
919        BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup
920        )
921
922
923#FormParser = XHTMLCompatibleFormParser  # testing hack
924#FormParser = RobustFormParser  # testing hack
925
926
927def ParseResponseEx(response,
928                    select_default=False,
929                    form_parser_class=FormParser,
930                    request_class=urllib2.Request,
931                    entitydefs=None,
932                    encoding=DEFAULT_ENCODING,
933
934                    # private
935                    _urljoin=urlparse.urljoin,
936                    _urlparse=urlparse.urlparse,
937                    _urlunparse=urlparse.urlunparse,
938                    ):
939    """Identical to ParseResponse, except that:
940
941    1. The returned list contains an extra item.  The first form in the list
942    contains all controls not contained in any FORM element.
943
944    2. The arguments ignore_errors and backwards_compat have been removed.
945
946    3. Backwards-compatibility mode (backwards_compat=True) is not available.
947    """
948    return _ParseFileEx(response, response.geturl(),
949                        select_default,
950                        False,
951                        form_parser_class,
952                        request_class,
953                        entitydefs,
954                        False,
955                        encoding,
956                        _urljoin=_urljoin,
957                        _urlparse=_urlparse,
958                        _urlunparse=_urlunparse,
959                        )
960
961def ParseFileEx(file, base_uri,
962                select_default=False,
963                form_parser_class=FormParser,
964                request_class=urllib2.Request,
965                entitydefs=None,
966                encoding=DEFAULT_ENCODING,
967
968                # private
969                _urljoin=urlparse.urljoin,
970                _urlparse=urlparse.urlparse,
971                _urlunparse=urlparse.urlunparse,
972                ):
973    """Identical to ParseFile, except that:
974
975    1. The returned list contains an extra item.  The first form in the list
976    contains all controls not contained in any FORM element.
977
978    2. The arguments ignore_errors and backwards_compat have been removed.
979
980    3. Backwards-compatibility mode (backwards_compat=True) is not available.
981    """
982    return _ParseFileEx(file, base_uri,
983                        select_default,
984                        False,
985                        form_parser_class,
986                        request_class,
987                        entitydefs,
988                        False,
989                        encoding,
990                        _urljoin=_urljoin,
991                        _urlparse=_urlparse,
992                        _urlunparse=_urlunparse,
993                        )
994
995def ParseResponse(response, *args, **kwds):
996    """Parse HTTP response and return a list of HTMLForm instances.
997
998    The return value of urllib2.urlopen can be conveniently passed to this
999    function as the response parameter.
1000
1001    ClientForm.ParseError is raised on parse errors.
1002
1003    response: file-like object (supporting read() method) with a method
1004     geturl(), returning the URI of the HTTP response
1005    select_default: for multiple-selection SELECT controls and RADIO controls,
1006     pick the first item as the default if none are selected in the HTML
1007    form_parser_class: class to instantiate and use to pass
1008    request_class: class to return from .click() method (default is
1009     urllib2.Request)
1010    entitydefs: mapping like {"&amp;": "&", ...} containing HTML entity
1011     definitions (a sensible default is used)
1012    encoding: character encoding used for encoding numeric character references
1013     when matching link text.  ClientForm does not attempt to find the encoding
1014     in a META HTTP-EQUIV attribute in the document itself (mechanize, for
1015     example, does do that and will pass the correct value to ClientForm using
1016     this parameter).
1017
1018    backwards_compat: boolean that determines whether the returned HTMLForm
1019     objects are backwards-compatible with old code.  If backwards_compat is
1020     true:
1021
1022     - ClientForm 0.1 code will continue to work as before.
1023
1024     - Label searches that do not specify a nr (number or count) will always
1025       get the first match, even if other controls match.  If
1026       backwards_compat is False, label searches that have ambiguous results
1027       will raise an AmbiguityError.
1028
1029     - Item label matching is done by strict string comparison rather than
1030       substring matching.
1031
1032     - De-selecting individual list items is allowed even if the Item is
1033       disabled.
1034
1035    The backwards_compat argument will be deprecated in a future release.
1036
1037    Pass a true value for select_default if you want the behaviour specified by
1038    RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
1039    RADIO or multiple-selection SELECT control if none were selected in the
1040    HTML.  Most browsers (including Microsoft Internet Explorer (IE) and
1041    Netscape Navigator) instead leave all items unselected in these cases.  The
1042    W3C HTML 4.0 standard leaves this behaviour undefined in the case of
1043    multiple-selection SELECT controls, but insists that at least one RADIO
1044    button should be checked at all times, in contradiction to browser
1045    behaviour.
1046
1047    There is a choice of parsers.  ClientForm.XHTMLCompatibleFormParser (uses
1048    HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
1049    sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML.
1050    Note that HTMLParser is only available in Python 2.2 and later.  You can
1051    pass your own class in here as a hack to work around bad HTML, but at your
1052    own risk: there is no well-defined interface.
1053
1054    """
1055    return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
1056
1057def ParseFile(file, base_uri, *args, **kwds):
1058    """Parse HTML and return a list of HTMLForm instances.
1059
1060    ClientForm.ParseError is raised on parse errors.
1061
1062    file: file-like object (supporting read() method) containing HTML with zero
1063     or more forms to be parsed
1064    base_uri: the URI of the document (note that the base URI used to submit
1065     the form will be that given in the BASE element if present, not that of
1066     the document)
1067
1068    For the other arguments and further details, see ParseResponse.__doc__.
1069
1070    """
1071    return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
1072
1073def _ParseFileEx(file, base_uri,
1074                 select_default=False,
1075                 ignore_errors=False,
1076                 form_parser_class=FormParser,
1077                 request_class=urllib2.Request,
1078                 entitydefs=None,
1079                 backwards_compat=True,
1080                 encoding=DEFAULT_ENCODING,
1081                 _urljoin=urlparse.urljoin,
1082                 _urlparse=urlparse.urlparse,
1083                 _urlunparse=urlparse.urlunparse,
1084                 ):
1085    if backwards_compat:
1086        deprecation("operating in backwards-compatibility mode")
1087    fp = form_parser_class(entitydefs, encoding)
1088   
1089    file.seek(0)
1090   
1091    while 1:
1092        data = file.read(CHUNK)
1093        try:
1094            fp.feed(data)
1095        except ParseError, e:
1096            e.base_uri = base_uri
1097            raise
1098        if len(data) != CHUNK: break
1099    if fp.base is not None:
1100        # HTML BASE element takes precedence over document URI
1101        base_uri = fp.base
1102    labels = []  # Label(label) for label in fp.labels]
1103    id_to_labels = {}
1104    for l in fp.labels:
1105        label = Label(l)
1106        labels.append(label)
1107        for_id = l["for"]
1108        coll = id_to_labels.get(for_id)
1109        if coll is None:
1110            id_to_labels[for_id] = [label]
1111        else:
1112            coll.append(label)
1113    forms = []
1114    for (name, action, method, enctype), attrs, controls in fp.forms:
1115        if action is None:
1116            action = base_uri
1117        else:
1118            action = _urljoin(base_uri, action)
1119        action = fp.unescape_attr_if_required(action)
1120        name = fp.unescape_attr_if_required(name)
1121        attrs = fp.unescape_attrs_if_required(attrs)
1122        # would be nice to make HTMLForm class (form builder) pluggable
1123        form = HTMLForm(
1124            action, method, enctype, name, attrs, request_class,
1125            forms, labels, id_to_labels, backwards_compat)
1126        form._urlparse = _urlparse
1127        form._urlunparse = _urlunparse
1128        for ii in range(len(controls)):
1129            type, name, attrs = controls[ii]
1130            attrs = fp.unescape_attrs_if_required(attrs)
1131            name = fp.unescape_attr_if_required(name)
1132            # index=ii*10 allows ImageControl to return multiple ordered pairs
1133            form.new_control(type, name, attrs, select_default=select_default,
1134                             index=ii*10)
1135        forms.append(form)
1136    for form in forms:
1137        form.fixup()
1138    return forms
1139
1140
1141class Label:
1142    def __init__(self, attrs):
1143        self.id = attrs.get("for")
1144        self._text = attrs.get("__text").strip()
1145        self._ctext = compress_text(self._text)
1146        self.attrs = attrs
1147        self._backwards_compat = False  # maintained by HTMLForm
1148
1149    def __getattr__(self, name):
1150        if name == "text":
1151            if self._backwards_compat:
1152                return self._text
1153            else:
1154                return self._ctext
1155        return getattr(Label, name)
1156
1157    def __setattr__(self, name, value):
1158        if name == "text":
1159            # don't see any need for this, so make it read-only
1160            raise AttributeError("text attribute is read-only")
1161        self.__dict__[name] = value
1162
1163    def __str__(self):
1164        return "<Label(id=%r, text=%r)>" % (self.id, self.text)
1165
1166
1167def _get_label(attrs):
1168    text = attrs.get("__label")
1169    if text is not None:
1170        return Label(text)
1171    else:
1172        return None
1173
1174class Control:
1175    """An HTML form control.
1176
1177    An HTMLForm contains a sequence of Controls.  The Controls in an HTMLForm
1178    are accessed using the HTMLForm.find_control method or the
1179    HTMLForm.controls attribute.
1180
1181    Control instances are usually constructed using the ParseFile /
1182    ParseResponse functions.  If you use those functions, you can ignore the
1183    rest of this paragraph.  A Control is only properly initialised after the
1184    fixup method has been called.  In fact, this is only strictly necessary for
1185    ListControl instances.  This is necessary because ListControls are built up
1186    from ListControls each containing only a single item, and their initial
1187    value(s) can only be known after the sequence is complete.
1188
1189    The types and values that are acceptable for assignment to the value
1190    attribute are defined by subclasses.
1191
1192    If the disabled attribute is true, this represents the state typically
1193    represented by browsers by 'greying out' a control.  If the disabled
1194    attribute is true, the Control will raise AttributeError if an attempt is
1195    made to change its value.  In addition, the control will not be considered
1196    'successful' as defined by the W3C HTML 4 standard -- ie. it will
1197    contribute no data to the return value of the HTMLForm.click* methods.  To
1198    enable a control, set the disabled attribute to a false value.
1199
1200    If the readonly attribute is true, the Control will raise AttributeError if
1201    an attempt is made to change its value.  To make a control writable, set
1202    the readonly attribute to a false value.
1203
1204    All controls have the disabled and readonly attributes, not only those that
1205    may have the HTML attributes of the same names.
1206
1207    On assignment to the value attribute, the following exceptions are raised:
1208    TypeError, AttributeError (if the value attribute should not be assigned
1209    to, because the control is disabled, for example) and ValueError.
1210
1211    If the name or value attributes are None, or the value is an empty list, or
1212    if the control is disabled, the control is not successful.
1213
1214    Public attributes:
1215
1216    type: string describing type of control (see the keys of the
1217     HTMLForm.type2class dictionary for the allowable values) (readonly)
1218    name: name of control (readonly)
1219    value: current value of control (subclasses may allow a single value, a
1220     sequence of values, or either)
1221    disabled: disabled state
1222    readonly: readonly state
1223    id: value of id HTML attribute
1224
1225    """
1226    def __init__(self, type, name, attrs, index=None):
1227        """
1228        type: string describing type of control (see the keys of the
1229         HTMLForm.type2class dictionary for the allowable values)
1230        name: control name
1231        attrs: HTML attributes of control's HTML element
1232
1233        """
1234        raise NotImplementedError()
1235
1236    def add_to_form(self, form):
1237        self._form = form
1238        form.controls.append(self)
1239
1240    def fixup(self):
1241        pass
1242
1243    def is_of_kind(self, kind):
1244        raise NotImplementedError()
1245
1246    def clear(self):
1247        raise NotImplementedError()
1248
1249    def __getattr__(self, name): raise NotImplementedError()
1250    def __setattr__(self, name, value): raise NotImplementedError()
1251
1252    def pairs(self):
1253        """Return list of (key, value) pairs suitable for passing to urlencode.
1254        """
1255        return [(k, v) for (i, k, v) in self._totally_ordered_pairs()]
1256
1257    def _totally_ordered_pairs(self):
1258        """Return list of (key, value, index) tuples.
1259
1260        Like pairs, but allows preserving correct ordering even where several
1261        controls are involved.
1262
1263        """
1264        raise NotImplementedError()
1265
1266    def _write_mime_data(self, mw, name, value):
1267        """Write data for a subitem of this control to a MimeWriter."""
1268        # called by HTMLForm
1269        mw2 = mw.nextpart()
1270        mw2.addheader("Content-disposition",
1271                      'form-data; name="%s"' % name, 1)
1272        f = mw2.startbody(prefix=0)
1273        f.write(value)
1274
1275    def __str__(self):
1276        raise NotImplementedError()
1277
1278    def get_labels(self):
1279        """Return all labels (Label instances) for this control.
1280       
1281        If the control was surrounded by a <label> tag, that will be the first
1282        label; all other labels, connected by 'for' and 'id', are in the order
1283        that appear in the HTML.
1284
1285        """
1286        res = []
1287        if self._label:
1288            res.append(self._label)
1289        if self.id:
1290            res.extend(self._form._id_to_labels.get(self.id, ()))
1291        return res
1292
1293
1294#---------------------------------------------------
1295class ScalarControl(Control):
1296    """Control whose value is not restricted to one of a prescribed set.
1297
1298    Some ScalarControls don't accept any value attribute.  Otherwise, takes a
1299    single value, which must be string-like.
1300
1301    Additional read-only public attribute:
1302
1303    attrs: dictionary mapping the names of original HTML attributes of the
1304     control to their values
1305
1306    """
1307    def __init__(self, type, name, attrs, index=None):
1308        self._index = index
1309        self._label = _get_label(attrs)
1310        self.__dict__["type"] = type.lower()
1311        self.__dict__["name"] = name
1312        self._value = attrs.get("value")
1313        self.disabled = attrs.has_key("disabled")
1314        self.readonly = attrs.has_key("readonly")
1315        self.id = attrs.get("id")
1316
1317        self.attrs = attrs.copy()
1318
1319        self._clicked = False
1320
1321        self._urlparse = urlparse.urlparse
1322        self._urlunparse = urlparse.urlunparse
1323
1324    def __getattr__(self, name):
1325        if name == "value":
1326            return self.__dict__["_value"]
1327        else:
1328            raise AttributeError("%s instance has no attribute '%s'" %
1329                                 (self.__class__.__name__, name))
1330
1331    def __setattr__(self, name, value):
1332        if name == "value":
1333            if not isstringlike(value):
1334                raise TypeError("must assign a string")
1335            elif self.readonly:
1336                raise AttributeError("control '%s' is readonly" % self.name)
1337            elif self.disabled:
1338                raise AttributeError("control '%s' is disabled" % self.name)
1339            self.__dict__["_value"] = value
1340        elif name in ("name", "type"):
1341            raise AttributeError("%s attribute is readonly" % name)
1342        else:
1343            self.__dict__[name] = value
1344
1345    def _totally_ordered_pairs(self):
1346        name = self.name
1347        value = self.value
1348        if name is None or value is None or self.disabled:
1349            return []
1350        return [(self._index, name, value)]
1351
1352    def clear(self):
1353        if self.readonly:
1354            raise AttributeError("control '%s' is readonly" % self.name)
1355        self.__dict__["_value"] = None
1356
1357    def __str__(self):
1358        name = self.name
1359        value = self.value
1360        if name is None: name = "<None>"
1361        if value is None: value = "<None>"
1362
1363        infos = []
1364        if self.disabled: infos.append("disabled")
1365        if self.readonly: infos.append("readonly")
1366        info = ", ".join(infos)
1367        if info: info = " (%s)" % info
1368
1369        return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1370
1371
1372#---------------------------------------------------
1373class TextControl(ScalarControl):
1374    """Textual input control.
1375
1376    Covers:
1377
1378    INPUT/TEXT
1379    INPUT/PASSWORD
1380    INPUT/HIDDEN
1381    TEXTAREA
1382
1383    """
1384    def __init__(self, type, name, attrs, index=None):
1385        ScalarControl.__init__(self, type, name, attrs, index)
1386        if self.type == "hidden": self.readonly = True
1387        if self._value is None:
1388            self._value = ""
1389
1390    def is_of_kind(self, kind): return kind == "text"
1391
1392#---------------------------------------------------
1393class FileControl(ScalarControl):
1394    """File upload with INPUT TYPE=FILE.
1395
1396    The value attribute of a FileControl is always None.  Use add_file instead.
1397
1398    Additional public method: add_file
1399
1400    """
1401
1402    def __init__(self, type, name, attrs, index=None):
1403        ScalarControl.__init__(self, type, name, attrs, index)
1404        self._value = None
1405        self._upload_data = []
1406
1407    def is_of_kind(self, kind): return kind == "file"
1408
1409    def clear(self):
1410        if self.readonly:
1411            raise AttributeError("control '%s' is readonly" % self.name)
1412        self._upload_data = []
1413
1414    def __setattr__(self, name, value):
1415        if name in ("value", "name", "type"):
1416            raise AttributeError("%s attribute is readonly" % name)
1417        else:
1418            self.__dict__[name] = value
1419
1420    def add_file(self, file_object, content_type=None, filename=None):
1421        if not hasattr(file_object, "read"):
1422            raise TypeError("file-like object must have read method")
1423        if content_type is not None and not isstringlike(content_type):
1424            raise TypeError("content type must be None or string-like")
1425        if filename is not None and not isstringlike(filename):
1426            raise TypeError("filename must be None or string-like")
1427        if content_type is None:
1428            content_type = "application/octet-stream"
1429        self._upload_data.append((file_object, content_type, filename))
1430
1431    def _totally_ordered_pairs(self):
1432        # XXX should it be successful even if unnamed?
1433        if self.name is None or self.disabled:
1434            return []
1435        return [(self._index, self.name, "")]
1436
1437    def _write_mime_data(self, mw, _name, _value):
1438        # called by HTMLForm
1439        # assert _name == self.name and _value == ''
1440        if len(self._upload_data) == 1:
1441            # single file
1442            file_object, content_type, filename = self._upload_data[0]
1443            mw2 = mw.nextpart()
1444            fn_part = filename and ('; filename="%s"' % filename) or ""
1445            disp = 'form-data; name="%s"%s' % (self.name, fn_part)
1446            mw2.addheader("Content-disposition", disp, prefix=1)
1447            fh = mw2.startbody(content_type, prefix=0)
1448            fh.write(file_object.read())
1449        elif len(self._upload_data) != 0:
1450            # multiple files
1451            mw2 = mw.nextpart()
1452            disp = 'form-data; name="%s"' % self.name
1453            mw2.addheader("Content-disposition", disp, prefix=1)
1454            fh = mw2.startmultipartbody("mixed", prefix=0)
1455            for file_object, content_type, filename in self._upload_data:
1456                mw3 = mw2.nextpart()
1457                fn_part = filename and ('; filename="%s"' % filename) or ""
1458                disp = "file%s" % fn_part
1459                mw3.addheader("Content-disposition", disp, prefix=1)
1460                fh2 = mw3.startbody(content_type, prefix=0)
1461                fh2.write(file_object.read())
1462            mw2.lastpart()
1463
1464    def __str__(self):
1465        name = self.name
1466        if name is None: name = "<None>"
1467
1468        if not self._upload_data:
1469            value = "<No files added>"
1470        else:
1471            value = []
1472            for file, ctype, filename in self._upload_data:
1473                if filename is None:
1474                    value.append("<Unnamed file>")
1475                else:
1476                    value.append(filename)
1477            value = ", ".join(value)
1478
1479        info = []
1480        if self.disabled: info.append("disabled")
1481        if self.readonly: info.append("readonly")
1482        info = ", ".join(info)
1483        if info: info = " (%s)" % info
1484
1485        return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
1486
1487
1488#---------------------------------------------------
1489class IsindexControl(ScalarControl):
1490    """ISINDEX control.
1491
1492    ISINDEX is the odd-one-out of HTML form controls.  In fact, it isn't really
1493    part of regular HTML forms at all, and predates it.  You're only allowed
1494    one ISINDEX per HTML document.  ISINDEX and regular form submission are
1495    mutually exclusive -- either submit a form, or the ISINDEX.
1496
1497    Having said this, since ISINDEX controls may appear in forms (which is
1498    probably bad HTML), ParseFile / ParseResponse will include them in the
1499    HTMLForm instances it returns.  You can set the ISINDEX's value, as with
1500    any other control (but note that ISINDEX controls have no name, so you'll
1501    need to use the type argument of set_value!).  When you submit the form,
1502    the ISINDEX will not be successful (ie., no data will get returned to the
1503    server as a result of its presence), unless you click on the ISINDEX
1504    control, in which case the ISINDEX gets submitted instead of the form:
1505
1506    form.set_value("my isindex value", type="isindex")
1507    urllib2.urlopen(form.click(type="isindex"))
1508
1509    ISINDEX elements outside of FORMs are ignored.  If you want to submit one
1510    by hand, do it like so:
1511
1512    url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
1513    result = urllib2.urlopen(url)
1514
1515    """
1516    def __init__(self, type, name, attrs, index=None):
1517        ScalarControl.__init__(self, type, name, attrs, index)
1518        if self._value is None:
1519            self._value = ""
1520
1521    def is_of_kind(self, kind): return kind in ["text", "clickable"]
1522
1523    def _totally_ordered_pairs(self):
1524        return []
1525
1526    def _click(self, form, coord, return_type, request_class=urllib2.Request):
1527        # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
1528        # want "bar+baz".
1529        # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
1530        # deprecated in 4.01, but it should still say how to submit it).
1531        # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
1532        parts = self._urlparse(form.action)
1533        rest, (query, frag) = parts[:-2], parts[-2:]
1534        parts = rest + (urllib.quote_plus(self.value), None)
1535        url = self._urlunparse(parts)
1536        req_data = url, None, []
1537
1538        if return_type == "pairs":
1539            return []
1540        elif return_type == "request_data":
1541            return req_data
1542        else:
1543            return request_class(url)
1544
1545    def __str__(self):
1546        value = self.value
1547        if value is None: value = "<None>"
1548
1549        infos = []
1550        if self.disabled: infos.append("disabled")
1551        if self.readonly: infos.append("readonly")
1552        info = ", ".join(infos)
1553        if info: info = " (%s)" % info
1554
1555        return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
1556
1557
1558#---------------------------------------------------
1559class IgnoreControl(ScalarControl):
1560    """Control that we're not interested in.
1561
1562    Covers:
1563
1564    INPUT/RESET
1565    BUTTON/RESET
1566    INPUT/BUTTON
1567    BUTTON/BUTTON
1568
1569    These controls are always unsuccessful, in the terminology of HTML 4 (ie.
1570    they never require any information to be returned to the server).
1571
1572    BUTTON/BUTTON is used to generate events for script embedded in HTML.
1573
1574    The value attribute of IgnoreControl is always None.
1575
1576    """
1577    def __init__(self, type, name, attrs, index=None):
1578        ScalarControl.__init__(self, type, name, attrs, index)
1579        self._value = None
1580
1581    def is_of_kind(self, kind): return False
1582
1583    def __setattr__(self, name, value):
1584        if name == "value":
1585            raise AttributeError(
1586                "control '%s' is ignored, hence read-only" % self.name)
1587        elif name in ("name", "type"):
1588            raise AttributeError("%s attribute is readonly" % name)
1589        else:
1590            self.__dict__[name] = value
1591
1592
1593#---------------------------------------------------
1594# ListControls
1595
1596# helpers and subsidiary classes
1597
1598class Item:
1599    def __init__(self, control, attrs, index=None):
1600        label = _get_label(attrs)
1601        self.__dict__.update({
1602            "name": attrs["value"],
1603            "_labels": label and [label] or [],
1604            "attrs": attrs,
1605            "_control": control,
1606            "disabled": attrs.has_key("disabled"),
1607            "_selected": False,
1608            "id": attrs.get("id"),
1609            "_index": index,
1610            })
1611        control.items.append(self)
1612
1613    def get_labels(self):
1614        """Return all labels (Label instances) for this item.
1615       
1616        For items that represent radio buttons or checkboxes, if the item was
1617        surrounded by a <label> tag, that will be the first label; all other
1618        labels, connected by 'for' and 'id', are in the order that appear in
1619        the HTML.
1620       
1621        For items that represent select options, if the option had a label
1622        attribute, that will be the first label.  If the option has contents
1623        (text within the option tags) and it is not the same as the label
1624        attribute (if any), that will be a label.  There is nothing in the
1625        spec to my knowledge that makes an option with an id unable to be the
1626        target of a label's for attribute, so those are included, if any, for
1627        the sake of consistency and completeness.
1628
1629        """
1630        res = []
1631        res.extend(self._labels)
1632        if self.id:
1633            res.extend(self._control._form._id_to_labels.get(self.id, ()))
1634        return res
1635
1636    def __getattr__(self, name):
1637        if name=="selected":
1638            return self._selected
1639        raise AttributeError(name)
1640
1641    def __setattr__(self, name, value):
1642        if name == "selected":
1643            self._control._set_selected_state(self, value)
1644        elif name == "disabled":
1645            self.__dict__["disabled"] = bool(value)
1646        else:
1647            raise AttributeError(name)
1648
1649    def __str__(self):
1650        res = self.name
1651        if self.selected:
1652            res = "*" + res
1653        if self.disabled:
1654            res = "(%s)" % res
1655        return res
1656
1657    def __repr__(self):
1658        attrs = [("name", self.name), ("id", self.id)]+self.attrs.items()
1659        return "<%s %s>" % (
1660            self.__class__.__name__,
1661            " ".join(["%s=%r" % (k, v) for k, v in attrs])
1662            )
1663
1664def disambiguate(items, nr, **kwds):
1665    msgs = []
1666    for key, value in kwds.items():
1667        msgs.append("%s=%r" % (key, value))
1668    msg = " ".join(msgs)
1669    if not items:
1670        raise ItemNotFoundError(msg)
1671    if nr is None:
1672        if len(items) > 1:
1673            raise AmbiguityError(msg)
1674        nr = 0
1675    if len(items) <= nr:
1676        raise ItemNotFoundError(msg)
1677    return items[nr]
1678
1679class ListControl(Control):
1680    """Control representing a sequence of items.
1681
1682    The value attribute of a ListControl represents the successful list items
1683    in the control.  The successful list items are those that are selected and
1684    not disabled.
1685
1686    ListControl implements both list controls that take a length-1 value
1687    (single-selection) and those that take length >1 values
1688    (multiple-selection).
1689
1690    ListControls accept sequence values only.  Some controls only accept
1691    sequences of length 0 or 1 (RADIO, and single-selection SELECT).
1692    In those cases, ItemCountError is raised if len(sequence) > 1.  CHECKBOXes
1693    and multiple-selection SELECTs (those having the "multiple" HTML attribute)
1694    accept sequences of any length.
1695
1696    Note the following mistake:
1697
1698    control.value = some_value
1699    assert control.value == some_value    # not necessarily true
1700
1701    The reason for this is that the value attribute always gives the list items
1702    in the order they were listed in the HTML.
1703
1704    ListControl items can also be referred to by their labels instead of names.
1705    Use the label argument to .get(), and the .set_value_by_label(),
1706    .get_value_by_label() methods.
1707
1708    Note that, rather confusingly, though SELECT controls are represented in
1709    HTML by SELECT elements (which contain OPTION elements, representing
1710    individual list items), CHECKBOXes and RADIOs are not represented by *any*
1711    element.  Instead, those controls are represented by a collection of INPUT
1712    elements.  For example, this is a SELECT control, named "control1":
1713
1714    <select name="control1">
1715     <option>foo</option>
1716     <option value="1">bar</option>
1717    </select>
1718
1719    and this is a CHECKBOX control, named "control2":
1720
1721    <input type="checkbox" name="control2" value="foo" id="cbe1">
1722    <input type="checkbox" name="control2" value="bar" id="cbe2">
1723
1724    The id attribute of a CHECKBOX or RADIO ListControl is always that of its
1725    first element (for example, "cbe1" above).
1726
1727
1728    Additional read-only public attribute: multiple.
1729
1730    """
1731
1732    # ListControls are built up by the parser from their component items by
1733    # creating one ListControl per item, consolidating them into a single
1734    # master ListControl held by the HTMLForm:
1735
1736    # -User calls form.new_control(...)
1737    # -Form creates Control, and calls control.add_to_form(self).
1738    # -Control looks for a Control with the same name and type in the form,
1739    #  and if it finds one, merges itself with that control by calling
1740    #  control.merge_control(self).  The first Control added to the form, of
1741    #  a particular name and type, is the only one that survives in the
1742    #  form.
1743    # -Form calls control.fixup for all its controls.  ListControls in the
1744    #  form know they can now safely pick their default values.
1745
1746    # To create a ListControl without an HTMLForm, use:
1747
1748    # control.merge_control(new_control)
1749
1750    # (actually, it's much easier just to use ParseFile)
1751
1752    _label = None
1753
1754    def __init__(self, type, name, attrs={}, select_default=False,
1755                 called_as_base_class=False, index=None):
1756        """
1757        select_default: for RADIO and multiple-selection SELECT controls, pick
1758         the first item as the default if no 'selected' HTML attribute is
1759         present
1760
1761        """
1762        if not called_as_base_class:
1763            raise NotImplementedError()
1764
1765        self.__dict__["type"] = type.lower()
1766        self.__dict__["name"] = name
1767        self._value = attrs.get("value")
1768        self.disabled = False
1769        self.readonly = False
1770        self.id = attrs.get("id")
1771
1772        # As Controls are merged in with .merge_control(), self.attrs will
1773        # refer to each Control in turn -- always the most recently merged
1774        # control.  Each merged-in Control instance corresponds to a single
1775        # list item: see ListControl.__doc__.
1776        self.items = []
1777        self._form = None
1778
1779        self._select_default = select_default
1780        self._clicked = False
1781
1782    def clear(self):
1783        self.value = []
1784
1785    def is_of_kind(self, kind):
1786        if kind  == "list":
1787            return True
1788        elif kind == "multilist":
1789            return bool(self.multiple)
1790        elif kind == "singlelist":
1791            return not self.multiple
1792        else:
1793            return False
1794
1795    def get_items(self, name=None, label=None, id=None,
1796                  exclude_disabled=False):
1797        """Return matching items by name or label.
1798
1799        For argument docs, see the docstring for .get()
1800
1801        """
1802        if name is not None and not isstringlike(name):
1803            raise TypeError("item name must be string-like")
1804        if label is not None and not isstringlike(label):
1805            raise TypeError("item label must be string-like")
1806        if id is not None and not isstringlike(id):
1807            raise TypeError("item id must be string-like")
1808        items = []  # order is important
1809        compat = self._form.backwards_compat
1810        for o in self.items:
1811            if exclude_disabled and o.disabled:
1812                continue
1813            if name is not None and o.name != name:
1814                continue
1815            if label is not None:
1816                for l in o.get_labels():
1817                    if ((compat and l.text == label) or
1818                        (not compat and l.text.find(label) > -1)):
1819                        break
1820                else:
1821                    continue
1822            if id is not None and o.id != id:
1823                continue
1824            items.append(o)
1825        return items
1826
1827    def get(self, name=None, label=None, id=None, nr=None,
1828            exclude_disabled=False):
1829        """Return item by name or label, disambiguating if necessary with nr.
1830
1831        All arguments must be passed by name, with the exception of 'name',
1832        which may be used as a positional argument.
1833
1834        If name is specified, then the item must have the indicated name.
1835
1836        If label is specified, then the item must have a label whose
1837        whitespace-compressed, stripped, text substring-matches the indicated
1838        label string (eg. label="please choose" will match
1839        "  Do  please  choose an item ").
1840
1841        If id is specified, then the item must have the indicated id.
1842
1843        nr is an optional 0-based index of the items matching the query.
1844
1845        If nr is the default None value and more than item is found, raises
1846        AmbiguityError (unless the HTMLForm instance's backwards_compat
1847        attribute is true).
1848
1849        If no item is found, or if items are found but nr is specified and not
1850        found, raises ItemNotFoundError.
1851
1852        Optionally excludes disabled items.
1853
1854        """
1855        if nr is None and self._form.backwards_compat:
1856            nr = 0  # :-/
1857        items = self.get_items(name, label, id, exclude_disabled)
1858        return disambiguate(items, nr, name=name, label=label, id=id)
1859
1860    def _get(self, name, by_label=False, nr=None, exclude_disabled=False):
1861        # strictly for use by deprecated methods
1862        if by_label:
1863            name, label = None, name
1864        else:
1865            name, label = name, None
1866        return self.get(name, label, nr, exclude_disabled)
1867
1868    def toggle(self, name, by_label=False, nr=None):
1869        """Deprecated: given a name or label and optional disambiguating index
1870        nr, toggle the matching item's selection.
1871
1872        Selecting items follows the behavior described in the docstring of the
1873        'get' method.
1874
1875        if the item is disabled, or this control is disabled or readonly,
1876        raise AttributeError.
1877
1878        """
1879        deprecation(
1880            "item = control.get(...); item.selected = not item.selected")
1881        o = self._get(name, by_label, nr)
1882        self._set_selected_state(o, not o.selected)
1883
1884    def set(self, selected, name, by_label=False, nr=None):
1885        """Deprecated: given a name or label and optional disambiguating index
1886        nr, set the matching item's selection to the bool value of selected.
1887
1888        Selecting items follows the behavior described in the docstring of the
1889        'get' method.
1890
1891        if the item is disabled, or this control is disabled or readonly,
1892        raise AttributeError.
1893
1894        """
1895        deprecation(
1896            "control.get(...).selected = <boolean>")
1897        self._set_selected_state(self._get(name, by_label, nr), selected)
1898
1899    def _set_selected_state(self, item, action):
1900        # action:
1901        # bool False: off
1902        # bool True: on
1903        if self.disabled:
1904            raise AttributeError("control '%s' is disabled" % self.name)
1905        if self.readonly:
1906            raise AttributeError("control '%s' is readonly" % self.name)
1907        action == bool(action)
1908        compat = self._form.backwards_compat
1909        if not compat and item.disabled:
1910            raise AttributeError("item is disabled")
1911        else:
1912            if compat and item.disabled and action:
1913                raise AttributeError("item is disabled")
1914            if self.multiple:
1915                item.__dict__["_selected"] = action
1916            else:
1917                if not action:
1918                    item.__dict__["_selected"] = False
1919                else:
1920                    for o in self.items:
1921                        o.__dict__["_selected"] = False
1922                    item.__dict__["_selected"] = True
1923
1924    def toggle_single(self, by_label=None):
1925        """Deprecated: toggle the selection of the single item in this control.
1926       
1927        Raises ItemCountError if the control does not contain only one item.
1928       
1929        by_label argument is ignored, and included only for backwards
1930        compatibility.
1931
1932        """
1933        deprecation(
1934            "control.items[0].selected = not control.items[0].selected")
1935        if len(self.items) != 1:
1936            raise ItemCountError(
1937                "'%s' is not a single-item control" % self.name)
1938        item = self.items[0]
1939        self._set_selected_state(item, not item.selected)
1940
1941    def set_single(self, selected, by_label=None):
1942        """Deprecated: set the selection of the single item in this control.
1943       
1944        Raises ItemCountError if the control does not contain only one item.
1945       
1946        by_label argument is ignored, and included only for backwards
1947        compatibility.
1948
1949        """
1950        deprecation(
1951            "control.items[0].selected = <boolean>")
1952        if len(self.items) != 1:
1953            raise ItemCountError(
1954                "'%s' is not a single-item control" % self.name)
1955        self._set_selected_state(self.items[0], selected)
1956
1957    def get_item_disabled(self, name, by_label=False, nr=None):
1958        """Get disabled state of named list item in a ListControl."""
1959        deprecation(
1960            "control.get(...).disabled")
1961        return self._get(name, by_label, nr).disabled
1962
1963    def set_item_disabled(self, disabled, name, by_label=False, nr=None):
1964        """Set disabled state of named list item in a ListControl.
1965
1966        disabled: boolean disabled state
1967
1968        """
1969        deprecation(
1970            "control.get(...).disabled = <boolean>")
1971        self._get(name, by_label, nr).disabled = disabled
1972
1973    def set_all_items_disabled(self, disabled):
1974        """Set disabled state of all list items in a ListControl.
1975
1976        disabled: boolean disabled state
1977
1978        """
1979        for o in self.items:
1980            o.disabled = disabled
1981
1982    def get_item_attrs(self, name, by_label=False, nr=None):
1983        """Return dictionary of HTML attributes for a single ListControl item.
1984
1985        The HTML element types that describe list items are: OPTION for SELECT
1986        controls, INPUT for the rest.  These elements have HTML attributes that
1987        you may occasionally want to know about -- for example, the "alt" HTML
1988        attribute gives a text string describing the item (graphical browsers
1989        usually display this as a tooltip).
1990
1991        The returned dictionary maps HTML attribute names to values.  The names
1992        and values are taken from the original HTML.
1993
1994        """
1995        deprecation(
1996            "control.get(...).attrs")
1997        return self._get(name, by_label, nr).attrs
1998
1999    def add_to_form(self, form):
2000        assert self._form is None or form == self._form, (
2001            "can't add control to more than one form")
2002        self._form = form
2003        if self.name is None:
2004            # always count nameless elements as separate controls
2005            Control.add_to_form(self, form)
2006        else:
2007            try:
2008                control = form.find_control(self.name, self.type)
2009            except (ControlNotFoundError, AmbiguityError):
2010                Control.add_to_form(self, form)
2011            else:
2012                control.merge_control(self)
2013
2014    def merge_control(self, control):
2015        assert bool(control.multiple) == bool(self.multiple)
2016        # usually, isinstance(control, self.__class__)
2017        self.items.extend(control.items)
2018
2019    def fixup(self):
2020        """
2021        ListControls are built up from component list items (which are also
2022        ListControls) during parsing.  This method should be called after all
2023        items have been added.  See ListControl.__doc__ for the reason this is
2024        required.
2025
2026        """
2027        # Need to set default selection where no item was indicated as being
2028        # selected by the HTML:
2029
2030        # CHECKBOX:
2031        #  Nothing should be selected.
2032        # SELECT/single, SELECT/multiple and RADIO:
2033        #  RFC 1866 (HTML 2.0): says first item should be selected.
2034        #  W3C HTML 4.01 Specification: says that client behaviour is
2035        #   undefined in this case.  For RADIO, exactly one must be selected,
2036        #   though which one is undefined.
2037        #  Both Netscape and Microsoft Internet Explorer (IE) choose first
2038        #   item for SELECT/single.  However, both IE5 and Mozilla (both 1.0
2039        #   and Firebird 0.6) leave all items unselected for RADIO and
2040        #   SELECT/multiple.
2041
2042        # Since both Netscape and IE all choose the first item for
2043        # SELECT/single, we do the same.  OTOH, both Netscape and IE
2044        # leave SELECT/multiple with nothing selected, in violation of RFC 1866
2045        # (but not in violation of the W3C HTML 4 standard); the same is true
2046        # of RADIO (which *is* in violation of the HTML 4 standard).  We follow
2047        # RFC 1866 if the _select_default attribute is set, and Netscape and IE
2048        # otherwise.  RFC 1866 and HTML 4 are always violated insofar as you
2049        # can deselect all items in a RadioControl.
2050       
2051        for o in self.items:
2052            # set items' controls to self, now that we've merged
2053            o.__dict__["_control"] = self
2054
2055    def __getattr__(self, name):
2056        if name == "value":
2057            compat = self._form.backwards_compat
2058            if self.name is None:
2059                return []
2060            return [o.name for o in self.items if o.selected and
2061                    (not o.disabled or compat)]
2062        else:
2063            raise AttributeError("%s instance has no attribute '%s'" %
2064                                 (self.__class__.__name__, name))
2065
2066    def __setattr__(self, name, value):
2067        if name == "value":
2068            if self.disabled:
2069                raise AttributeError("control '%s' is disabled" % self.name)
2070            if self.readonly:
2071                raise AttributeError("control '%s' is readonly" % self.name)
2072            self._set_value(value)
2073        elif name in ("name", "type", "multiple"):
2074            raise AttributeError("%s attribute is readonly" % name)
2075        else:
2076            self.__dict__[name] = value
2077
2078    def _set_value(self, value):
2079        if value is None or isstringlike(value):
2080            raise TypeError("ListControl, must set a sequence")
2081        if not value:
2082            compat = self._form.backwards_compat
2083            for o in self.items:
2084                if not o.disabled or compat:
2085                    o.selected = False
2086        elif self.multiple:
2087            self._multiple_set_value(value)
2088        elif len(value) > 1:
2089            raise ItemCountError(
2090                "single selection list, must set sequence of "
2091                "length 0 or 1")
2092        else:
2093            self._single_set_value(value)
2094
2095    def _get_items(self, name, target=1):
2096        all_items = self.get_items(name)
2097        items = [o for o in all_items if not o.disabled]
2098        if len(items) < target:
2099            if len(all_items) < target:
2100                raise ItemNotFoundError(
2101                    "insufficient items with name %r" % name)
2102            else:
2103                raise AttributeError(
2104                    "insufficient non-disabled items with name %s" % name)
2105        on = []
2106        off = []
2107        for o in items:
2108            if o.selected:
2109                on.append(o)
2110            else:
2111                off.append(o)
2112        return on, off
2113
2114    def _single_set_value(self, value):
2115        assert len(value) == 1
2116        on, off = self._get_items(value[0])
2117        assert len(on) <= 1
2118        if not on:
2119            off[0].selected = True
2120
2121    def _multiple_set_value(self, value):
2122        compat = self._form.backwards_compat
2123        turn_on = []  # transactional-ish
2124        turn_off = [item for item in self.items if
2125                    item.selected and (not item.disabled or compat)]
2126        names = {}
2127        for nn in value:
2128            if nn in names.keys():
2129                names[nn] += 1
2130            else:
2131                names[nn] = 1
2132        for name, count in names.items():
2133            on, off = self._get_items(name, count)
2134            for i in range(count):
2135                if on:
2136                    item = on[0]
2137                    del on[0]
2138                    del turn_off[turn_off.index(item)]
2139                else:
2140                    item = off[0]
2141                    del off[0]
2142                    turn_on.append(item)
2143        for item in turn_off:
2144            item.selected = False
2145        for item in turn_on:
2146            item.selected = True
2147
2148    def set_value_by_label(self, value):
2149        """Set the value of control by item labels.
2150
2151        value is expected to be an iterable of strings that are substrings of
2152        the item labels that should be selected.  Before substring matching is
2153        performed, the original label text is whitespace-compressed
2154        (consecutive whitespace characters are converted to a single space
2155        character) and leading and trailing whitespace is stripped.  Ambiguous
2156        labels are accepted without complaint if the form's backwards_compat is
2157        True; otherwise, it will not complain as long as all ambiguous labels
2158        share the same item name (e.g. OPTION value).
2159
2160        """
2161        if isstringlike(value):
2162            raise TypeError(value)
2163        if not self.multiple and len(value) > 1:
2164            raise ItemCountError(
2165                "single selection list, must set sequence of "
2166                "length 0 or 1")
2167        items = []
2168        for nn in value:
2169            found = self.get_items(label=nn)
2170            if len(found) > 1:
2171                if not self._form.backwards_compat:
2172                    # ambiguous labels are fine as long as item names (e.g.
2173                    # OPTION values) are same
2174                    opt_name = found[0].name
2175                    if [o for o in found[1:] if o.name != opt_name]:
2176                        raise AmbiguityError(nn)
2177                else:
2178                    # OK, we'll guess :-(  Assume first available item.
2179                    found = found[:1]
2180            for o in found:
2181                # For the multiple-item case, we could try to be smarter,
2182                # saving them up and trying to resolve, but that's too much.
2183                if self._form.backwards_compat or o not in items:
2184                    items.append(o)
2185                    break
2186            else:  # all of them are used
2187                raise ItemNotFoundError(nn)
2188        # now we have all the items that should be on
2189        # let's just turn everything off and then back on.
2190        self.value = []
2191        for o in items:
2192            o.selected = True
2193
2194    def get_value_by_label(self):
2195        """Return the value of the control as given by normalized labels."""
2196        res = []
2197        compat = self._form.backwards_compat
2198        for o in self.items:
2199            if (not o.disabled or compat) and o.selected:
2200                for l in o.get_labels():
2201                    if l.text:
2202                        res.append(l.text)
2203                        break
2204                else:
2205                    res.append(None)
2206        return res
2207
2208    def possible_items(self, by_label=False):
2209        """Deprecated: return the names or labels of all possible items.
2210
2211        Includes disabled items, which may be misleading for some use cases.
2212
2213        """
2214        deprecation(
2215            "[item.name for item in self.items]")
2216        if by_label:
2217            res = []
2218            for o in self.items:
2219                for l in o.get_labels():
2220                    if l.text:
2221                        res.append(l.text)
2222                        break
2223                else:
2224                    res.append(None)
2225            return res
2226        return [o.name for o in self.items]
2227
2228    def _totally_ordered_pairs(self):
2229        if self.disabled or self.name is None:
2230            return []
2231        else:
2232            return [(o._index, self.name, o.name) for o in self.items
2233                    if o.selected and not o.disabled]
2234
2235    def __str__(self):
2236        name = self.name
2237        if name is None: name = "<None>"
2238
2239        display = [str(o) for o in self.items]
2240
2241        infos = []
2242        if self.disabled: infos.append("disabled")
2243        if self.readonly: infos.append("readonly")
2244        info = ", ".join(infos)
2245        if info: info = " (%s)" % info
2246
2247        return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
2248                                    name, ", ".join(display), info)
2249
2250
2251class RadioControl(ListControl):
2252    """
2253    Covers:
2254
2255    INPUT/RADIO
2256
2257    """
2258    def __init__(self, type, name, attrs, select_default=False, index=None):
2259        attrs.setdefault("value", "on")
2260        ListControl.__init__(self, type, name, attrs, select_default,
2261                             called_as_base_class=True, index=index)
2262        self.__dict__["multiple"] = False
2263        o = Item(self, attrs, index)
2264        o.__dict__["_selected"] = attrs.has_key("checked")
2265
2266    def fixup(self):
2267        ListControl.fixup(self)
2268        found = [o for o in self.items if o.selected and not o.disabled]
2269        if not found:
2270            if self._select_default:
2271                for o in self.items:
2272                    if not o.disabled:
2273                        o.selected = True
2274                        break
2275        else:
2276            # Ensure only one item selected.  Choose the last one,
2277            # following IE and Firefox.
2278            for o in found[:-1]:
2279                o.selected = False
2280
2281    def get_labels(self):
2282        return []
2283
2284class CheckboxControl(ListControl):
2285    """
2286    Covers:
2287
2288    INPUT/CHECKBOX
2289
2290    """
2291    def __init__(self, type, name, attrs, select_default=False, index=None):
2292        attrs.setdefault("value", "on")
2293        ListControl.__init__(self, type, name, attrs, select_default,
2294                             called_as_base_class=True, index=index)
2295        self.__dict__["multiple"] = True
2296        o = Item(self, attrs, index)
2297        o.__dict__["_selected"] = attrs.has_key("checked")
2298
2299    def get_labels(self):
2300        return []
2301
2302
2303class SelectControl(ListControl):
2304    """
2305    Covers:
2306
2307    SELECT (and OPTION)
2308
2309
2310    OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance.
2311
2312    SELECT control values and labels are subject to some messy defaulting
2313    rules.  For example, if the HTML representation of the control is:
2314
2315    <SELECT name=year>
2316      <OPTION value=0 label="2002">current year</OPTION>
2317      <OPTION value=1>2001</OPTION>
2318      <OPTION>2000</OPTION>
2319    </SELECT>
2320
2321    The items, in order, have labels "2002", "2001" and "2000", whereas their
2322    names (the OPTION values) are "0", "1" and "2000" respectively.  Note that
2323    the value of the last OPTION in this example defaults to its contents, as
2324    specified by RFC 1866, as do the labels of the second and third OPTIONs.
2325
2326    The OPTION labels are sometimes more meaningful than the OPTION values,
2327    which can make for more maintainable code.
2328
2329    Additional read-only public attribute: attrs
2330
2331    The attrs attribute is a dictionary of the original HTML attributes of the
2332    SELECT element.  Other ListControls do not have this attribute, because in
2333    other cases the control as a whole does not correspond to any single HTML
2334    element.  control.get(...).attrs may be used as usual to get at the HTML
2335    attributes of the HTML elements corresponding to individual list items (for
2336    SELECT controls, these are OPTION elements).
2337
2338    Another special case is that the Item.attrs dictionaries have a special key
2339    "contents" which does not correspond to any real HTML attribute, but rather
2340    contains the contents of the OPTION element:
2341
2342    <OPTION>this bit</OPTION>
2343
2344    """
2345    # HTML attributes here are treated slightly differently from other list
2346    # controls:
2347    # -The SELECT HTML attributes dictionary is stuffed into the OPTION
2348    #  HTML attributes dictionary under the "__select" key.
2349    # -The content of each OPTION element is stored under the special
2350    #  "contents" key of the dictionary.
2351    # After all this, the dictionary is passed to the SelectControl constructor
2352    # as the attrs argument, as usual.  However:
2353    # -The first SelectControl constructed when building up a SELECT control
2354    #  has a constructor attrs argument containing only the __select key -- so
2355    #  this SelectControl represents an empty SELECT control.
2356    # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
2357    #  the __select dictionary containing the SELECT HTML-attributes.
2358
2359    def __init__(self, type, name, attrs, select_default=False, index=None):
2360        # fish out the SELECT HTML attributes from the OPTION HTML attributes
2361        # dictionary
2362        self.attrs = attrs["__select"].copy()
2363        self.__dict__["_label"] = _get_label(self.attrs)
2364        self.__dict__["id"] = self.attrs.get("id")
2365        self.__dict__["multiple"] = self.attrs.has_key("multiple")
2366        # the majority of the contents, label, and value dance already happened
2367        contents = attrs.get("contents")
2368        attrs = attrs.copy()
2369        del attrs["__select"]
2370
2371        ListControl.__init__(self, type, name, self.attrs, select_default,
2372                             called_as_base_class=True, index=index)
2373        self.disabled = self.attrs.has_key("disabled")
2374        self.readonly = self.attrs.has_key("readonly")
2375        if attrs.has_key("value"):
2376            # otherwise it is a marker 'select started' token
2377            o = Item(self, attrs, index)
2378            o.__dict__["_selected"] = attrs.has_key("selected")
2379            # add 'label' label and contents label, if different.  If both are
2380            # provided, the 'label' label is used for display in HTML
2381            # 4.0-compliant browsers (and any lower spec? not sure) while the
2382            # contents are used for display in older or less-compliant
2383            # browsers.  We make label objects for both, if the values are
2384            # different.
2385            label = attrs.get("label")
2386            if label:
2387                o._labels.append(Label({"__text": label}))
2388                if contents and contents != label:
2389                    o._labels.append(Label({"__text": contents}))
2390            elif contents:
2391                o._labels.append(Label({"__text": contents}))
2392
2393    def fixup(self):
2394        ListControl.fixup(self)
2395        # Firefox doesn't exclude disabled items from those considered here
2396        # (i.e. from 'found', for both branches of the if below).  Note that
2397        # IE6 doesn't support the disabled attribute on OPTIONs at all.
2398        found = [o for o in self.items if o.selected]
2399        if not found:
2400            if not self.multiple or self._select_default:
2401                for o in self.items:
2402                    if not o.disabled:
2403                        was_disabled = self.disabled
2404                        self.disabled = False
2405                        try:
2406                            o.selected = True
2407                        finally:
2408                            o.disabled = was_disabled
2409                        break
2410        elif not self.multiple:
2411            # Ensure only one item selected.  Choose the last one,
2412            # following IE and Firefox.
2413            for o in found[:-1]:
2414                o.selected = False
2415
2416
2417#---------------------------------------------------
2418class SubmitControl(ScalarControl):
2419    """
2420    Covers:
2421
2422    INPUT/SUBMIT
2423    BUTTON/SUBMIT
2424
2425    """
2426    def __init__(self, type, name, attrs, index=None):
2427        ScalarControl.__init__(self, type, name, attrs, index)
2428        # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
2429        # blank, Konqueror 3.1 defaults to "Submit".  HTML spec. doesn't seem
2430        # to define this.
2431        if self.value is None: self.value = ""
2432        self.readonly = True
2433
2434    def get_labels(self):
2435        res = []
2436        if self.value:
2437            res.append(Label({"__text": self.value}))
2438        res.extend(ScalarControl.get_labels(self))
2439        return res
2440
2441    def is_of_kind(self, kind): return kind == "clickable"
2442
2443    def _click(self, form, coord, return_type, request_class=urllib2.Request):
2444        self._clicked = coord
2445        r = form._switch_click(return_type, request_class)
2446        self._clicked = False
2447        return r
2448
2449    def _totally_ordered_pairs(self):
2450        if not self._clicked:
2451            return []
2452        return ScalarControl._totally_ordered_pairs(self)
2453
2454
2455#---------------------------------------------------
2456class ImageControl(SubmitControl):
2457    """
2458    Covers:
2459
2460    INPUT/IMAGE
2461
2462    Coordinates are specified using one of the HTMLForm.click* methods.
2463
2464    """
2465    def __init__(self, type, name, attrs, index=None):
2466        SubmitControl.__init__(self, type, name, attrs, index)
2467        self.readonly = False
2468
2469    def _totally_ordered_pairs(self):
2470        clicked = self._clicked
2471        if self.disabled or not clicked:
2472            return []
2473        name = self.name
2474        if name is None: return []
2475        pairs = [
2476            (self._index, "%s.x" % name, str(clicked[0])),
2477            (self._index+1, "%s.y" % name, str(clicked[1])),
2478            ]
2479        value = self._value
2480        if value:
2481            pairs.append((self._index+2, name, value))
2482        return pairs
2483
2484    get_labels = ScalarControl.get_labels
2485
2486# aliases, just to make str(control) and str(form) clearer
2487class PasswordControl(TextControl): pass
2488class HiddenControl(TextControl): pass
2489class TextareaControl(TextControl): pass
2490class SubmitButtonControl(SubmitControl): pass
2491
2492
2493def is_listcontrol(control): return control.is_of_kind("list")
2494
2495
2496class HTMLForm:
2497    """Represents a single HTML <form> ... </form> element.
2498
2499    A form consists of a sequence of controls that usually have names, and
2500    which can take on various values.  The values of the various types of
2501    controls represent variously: text, zero-or-one-of-many or many-of-many
2502    choices, and files to be uploaded.  Some controls can be clicked on to
2503    submit the form, and clickable controls' values sometimes include the
2504    coordinates of the click.
2505
2506    Forms can be filled in with data to be returned to the server, and then
2507    submitted, using the click method to generate a request object suitable for
2508    passing to urllib2.urlopen (or the click_request_data or click_pairs
2509    methods if you're not using urllib2).
2510
2511    import ClientForm
2512    forms = ClientForm.ParseFile(html, base_uri)
2513    form = forms[0]
2514
2515    form["query"] = "Python"
2516    form.find_control("nr_results").get("lots").selected = True
2517
2518    response = urllib2.urlopen(form.click())
2519
2520    Usually, HTMLForm instances are not created directly.  Instead, the
2521    ParseFile or ParseResponse factory functions are used.  If you do construct
2522    HTMLForm objects yourself, however, note that an HTMLForm instance is only
2523    properly initialised after the fixup method has been called (ParseFile and
2524    ParseResponse do this for you).  See ListControl.__doc__ for the reason
2525    this is required.
2526
2527    Indexing a form (form["control_name"]) returns the named Control's value
2528    attribute.  Assignment to a form index (form["control_name"] = something)
2529    is equivalent to assignment to the named Control's value attribute.  If you
2530    need to be more specific than just supplying the control's name, use the
2531    set_value and get_value methods.
2532
2533    ListControl values are lists of item names (specifically, the names of the
2534    items that are selected and not disabled, and hence are "successful" -- ie.
2535    cause data to be returned to the server).  The list item's name is the
2536    value of the corresponding HTML element's"value" attribute.
2537
2538    Example:
2539
2540      <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
2541      <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
2542
2543    defines a CHECKBOX control with name "cheeses" which has two items, named
2544    "leicester" and "cheddar".
2545
2546    Another example:
2547
2548      <SELECT name="more_cheeses">
2549        <OPTION>1</OPTION>
2550        <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
2551      </SELECT>
2552
2553    defines a SELECT control with name "more_cheeses" which has two items,
2554    named "1" and "2" (because the OPTION element's value HTML attribute
2555    defaults to the element contents -- see SelectControl.__doc__ for more on
2556    these defaulting rules).
2557
2558    To select, deselect or otherwise manipulate individual list items, use the
2559    HTMLForm.find_control() and ListControl.get() methods.  To set the whole
2560    value, do as for any other control: use indexing or the set_/get_value
2561    methods.
2562
2563    Example:
2564
2565    # select *only* the item named "cheddar"
2566    form["cheeses"] = ["cheddar"]
2567    # select "cheddar", leave other items unaffected
2568    form.find_control("cheeses").get("cheddar").selected = True
2569
2570    Some controls (RADIO and SELECT without the multiple attribute) can only
2571    have zero or one items selected at a time.  Some controls (CHECKBOX and
2572    SELECT with the multiple attribute) can have multiple items selected at a
2573    time.  To set the whole value of a ListControl, assign a sequence to a form
2574    index:
2575
2576    form["cheeses"] = ["cheddar", "leicester"]
2577
2578    If the ListControl is not multiple-selection, the assigned list must be of
2579    length one.
2580
2581    To check if a control has an item, if an item is selected, or if an item is
2582    successful (selected and not disabled), respectively:
2583
2584    "cheddar" in [item.name for item in form.find_control("cheeses").items]
2585    "cheddar" in [item.name for item in form.find_control("cheeses").items and
2586                  item.selected]
2587    "cheddar" in form["cheeses"]  # (or "cheddar" in form.get_value("cheeses"))
2588
2589    Note that some list items may be disabled (see below).
2590
2591    Note the following mistake:
2592
2593    form[control_name] = control_value
2594    assert form[control_name] == control_value  # not necessarily true
2595
2596    The reason for this is that form[control_name] always gives the list items
2597    in the order they were listed in the HTML.
2598
2599    List items (hence list values, too) can be referred to in terms of list
2600    item labels rather than list item names using the appropriate label
2601    arguments.  Note that each item may have several labels.
2602
2603    The question of default values of OPTION contents, labels and values is
2604    somewhat complicated: see SelectControl.__doc__ and
2605    ListControl.get_item_attrs.__doc__ if you think you need to know.
2606
2607    Controls can be disabled or readonly.  In either case, the control's value
2608    cannot be changed until you clear those flags (see example below).
2609    Disabled is the state typically represented by browsers by 'greying out' a
2610    control.  Disabled controls are not 'successful' -- they don't cause data
2611    to get returned to the server.  Readonly controls usually appear in
2612    browsers as read-only text boxes.  Readonly controls are successful.  List
2613    items can also be disabled.  Attempts to select or deselect disabled items
2614    fail with AttributeError.
2615
2616    If a lot of controls are readonly, it can be useful to do this:
2617
2618    form.set_all_readonly(False)
2619
2620    To clear a control's value attribute, so that it is not successful (until a
2621    value is subsequently set):
2622
2623    form.clear("cheeses")
2624
2625    More examples:
2626
2627    control = form.find_control("cheeses")
2628    control.disabled = False
2629    control.readonly = False
2630    control.get("gruyere").disabled = True
2631    control.items[0].selected = True
2632
2633    See the various Control classes for further documentation.  Many methods
2634    take name, type, kind, id, label and nr arguments to specify the control to
2635    be operated on: see HTMLForm.find_control.__doc__.
2636
2637    ControlNotFoundError (subclass of ValueError) is raised if the specified
2638    control can't be found.  This includes occasions where a non-ListControl
2639    is found, but the method (set, for example) requires a ListControl.
2640    ItemNotFoundError (subclass of ValueError) is raised if a list item can't
2641    be found.  ItemCountError (subclass of ValueError) is raised if an attempt
2642    is made to select more than one item and the control doesn't allow that, or
2643    set/get_single are called and the control contains more than one item.
2644    AttributeError is raised if a control or item is readonly or disabled and
2645    an attempt is made to alter its value.
2646
2647    Security note: Remember that any passwords you store in HTMLForm instances
2648    will be saved to disk in the clear if you pickle them (directly or
2649    indirectly).  The simplest solution to this is to avoid pickling HTMLForm
2650    objects.  You could also pickle before filling in any password, or just set
2651    the password to "" before pickling.
2652
2653
2654    Public attributes:
2655
2656    action: full (absolute URI) form action
2657    method: "GET" or "POST"
2658    enctype: form transfer encoding MIME type
2659    name: name of form (None if no name was specified)
2660    attrs: dictionary mapping original HTML form attributes to their values
2661
2662    controls: list of Control instances; do not alter this list
2663     (instead, call form.new_control to make a Control and add it to the
2664     form, or control.add_to_form if you already have a Control instance)
2665
2666
2667
2668    Methods for form filling:
2669    -------------------------
2670
2671    Most of the these methods have very similar arguments.  See
2672    HTMLForm.find_control.__doc__ for details of the name, type, kind, label
2673    and nr arguments.
2674
2675    def find_control(self,
2676                     name=None, type=None, kind=None, id=None, predicate=None,
2677                     nr=None, label=None)
2678
2679    get_value(name=None, type=None, kind=None, id=None, nr=None,
2680              by_label=False,  # by_label is deprecated
2681              label=None)
2682    set_value(value,
2683              name=None, type=None, kind=None, id=None, nr=None,
2684              by_label=False,  # by_label is deprecated
2685              label=None)
2686
2687    clear_all()
2688    clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
2689
2690    set_all_readonly(readonly)
2691
2692
2693    Method applying only to FileControls:
2694
2695    add_file(file_object,
2696             content_type="application/octet-stream", filename=None,
2697             name=None, id=None, nr=None, label=None)
2698
2699
2700    Methods applying only to clickable controls:
2701
2702    click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
2703    click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
2704                       label=None)
2705    click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
2706
2707    """
2708
2709    type2class = {
2710        "text": TextControl,
2711        "password": PasswordControl,
2712        "hidden": HiddenControl,
2713        "textarea": TextareaControl,
2714
2715        "isindex": IsindexControl,
2716
2717        "file": FileControl,
2718
2719        "button": IgnoreControl,
2720        "buttonbutton": IgnoreControl,
2721        "reset": IgnoreControl,
2722        "resetbutton": IgnoreControl,
2723
2724        "submit": SubmitControl,
2725        "submitbutton": SubmitButtonControl,
2726        "image": ImageControl,
2727
2728        "radio": RadioControl,
2729        "checkbox": CheckboxControl,
2730        "select": SelectControl,
2731        }
2732
2733#---------------------------------------------------
2734# Initialisation.  Use ParseResponse / ParseFile instead.
2735
2736    def __init__(self, action, method="GET",
2737                 enctype="application/x-www-form-urlencoded",
2738                 name=None, attrs=None,
2739                 request_class=urllib2.Request,
2740                 forms=None, labels=None, id_to_labels=None,
2741                 backwards_compat=True):
2742        """
2743        In the usual case, use ParseResponse (or ParseFile) to create new
2744        HTMLForm objects.
2745
2746        action: full (absolute URI) form action
2747        method: "GET" or "POST"
2748        enctype: form transfer encoding MIME type
2749        name: name of form
2750        attrs: dictionary mapping original HTML form attributes to their values
2751
2752        """
2753        self.action = action
2754        self.method = method
2755        self.enctype = enctype
2756        self.name = name
2757        if attrs is not None:
2758            self.attrs = attrs.copy()
2759        else:
2760            self.attrs = {}
2761        self.controls = []
2762        self._request_class = request_class
2763
2764        # these attributes are used by zope.testbrowser
2765        self._forms = forms  # this is a semi-public API!
2766        self._labels = labels  # this is a semi-public API!
2767        self._id_to_labels = id_to_labels  # this is a semi-public API!
2768
2769        self.backwards_compat = backwards_compat  # note __setattr__
2770
2771        self._urlunparse = urlparse.urlunparse
2772        self._urlparse = urlparse.urlparse
2773
2774    def __getattr__(self, name):
2775        if name == "backwards_compat":
2776            return self._backwards_compat
2777        return getattr(HTMLForm, name)
2778
2779    def __setattr__(self, name, value):
2780        # yuck
2781        if name == "backwards_compat":
2782            name = "_backwards_compat"
2783            value = bool(value)
2784            for cc in self.controls:
2785                try:
2786                    items = cc.items
2787                except AttributeError:
2788                    continue
2789                else:
2790                    for ii in items:
2791                        for ll in ii.get_labels():
2792                            ll._backwards_compat = value
2793        self.__dict__[name] = value
2794
2795    def new_control(self, type, name, attrs,
2796                    ignore_unknown=False, select_default=False, index=None):
2797        """Adds a new control to the form.
2798
2799        This is usually called by ParseFile and ParseResponse.  Don't call it
2800        youself unless you're building your own Control instances.
2801
2802        Note that controls representing lists of items are built up from
2803        controls holding only a single list item.  See ListControl.__doc__ for
2804        further information.
2805
2806        type: type of control (see Control.__doc__ for a list)
2807        attrs: HTML attributes of control
2808        ignore_unknown: if true, use a dummy Control instance for controls of
2809         unknown type; otherwise, use a TextControl
2810        select_default: for RADIO and multiple-selection SELECT controls, pick
2811         the first item as the default if no 'selected' HTML attribute is
2812         present (this defaulting happens when the HTMLForm.fixup method is
2813         called)
2814        index: index of corresponding element in HTML (see
2815         MoreFormTests.test_interspersed_controls for motivation)
2816
2817        """
2818        type = type.lower()
2819        klass = self.type2class.get(type)
2820        if klass is None:
2821            if ignore_unknown:
2822                klass = IgnoreControl
2823            else:
2824                klass = TextControl
2825
2826        a = attrs.copy()
2827        if issubclass(klass, ListControl):
2828            control = klass(type, name, a, select_default, index)
2829        else:
2830            control = klass(type, name, a, index)
2831        control.add_to_form(self)
2832        control._urlparse = self._urlparse
2833        control._urlunparse = self._urlunparse
2834
2835    def fixup(self):
2836        """Normalise form after all controls have been added.
2837
2838        This is usually called by ParseFile and ParseResponse.  Don't call it
2839        youself unless you're building your own Control instances.
2840
2841        This method should only be called once, after all controls have been
2842        added to the form.
2843
2844        """
2845        for control in self.controls:
2846            control.fixup()
2847        self.backwards_compat = self._backwards_compat
2848
2849#---------------------------------------------------
2850    def __str__(self):
2851        header = "%s%s %s %s" % (
2852            (self.name and self.name+" " or ""),
2853            self.method, self.action, self.enctype)
2854        rep = [header]
2855        for control in self.controls:
2856            rep.append("  %s" % str(control))
2857        return "<%s>" % "\n".join(rep)
2858
2859#---------------------------------------------------
2860# Form-filling methods.
2861
2862    def __getitem__(self, name):
2863        return self.find_control(name).value
2864    def __contains__(self, name):
2865        return bool(self.find_control(name))
2866    def __setitem__(self, name, value):
2867        control = self.find_control(name)
2868        try:
2869            control.value = value
2870        except AttributeError, e:
2871            raise ValueError(str(e))
2872
2873    def get_value(self,
2874                  name=None, type=None, kind=None, id=None, nr=None,
2875                  by_label=False,  # by_label is deprecated
2876                  label=None):
2877        """Return value of control.
2878
2879        If only name and value arguments are supplied, equivalent to
2880
2881        form[name]
2882
2883        """
2884        if by_label:
2885            deprecation("form.get_value_by_label(...)")
2886        c = self.find_control(name, type, kind, id, label=label, nr=nr)
2887        if by_label:
2888            try:
2889                meth = c.get_value_by_label
2890            except AttributeError:
2891                raise NotImplementedError(
2892                    "control '%s' does not yet support by_label" % c.name)
2893            else:
2894                return meth()
2895        else:
2896            return c.value
2897    def set_value(self, value,
2898                  name=None, type=None, kind=None, id=None, nr=None,
2899                  by_label=False,  # by_label is deprecated
2900                  label=None):
2901        """Set value of control.
2902
2903        If only name and value arguments are supplied, equivalent to
2904
2905        form[name] = value
2906
2907        """
2908        if by_label:
2909            deprecation("form.get_value_by_label(...)")
2910        c = self.find_control(name, type, kind, id, label=label, nr=nr)
2911        if by_label:
2912            try:
2913                meth = c.set_value_by_label
2914            except AttributeError:
2915                raise NotImplementedError(
2916                    "control '%s' does not yet support by_label" % c.name)
2917            else:
2918                meth(value)
2919        else:
2920            c.value = value
2921    def get_value_by_label(
2922        self, name=None, type=None, kind=None, id=None, label=None, nr=None):
2923        """
2924
2925        All arguments should be passed by name.
2926
2927        """
2928        c = self.find_control(name, type, kind, id, label=label, nr=nr)
2929        return c.get_value_by_label()
2930
2931    def set_value_by_label(
2932        self, value,
2933        name=None, type=None, kind=None, id=None, label=None, nr=None):
2934        """
2935
2936        All arguments should be passed by name.
2937
2938        """
2939        c = self.find_control(name, type, kind, id, label=label, nr=nr)
2940        c.set_value_by_label(value)
2941
2942    def set_all_readonly(self, readonly):
2943        for control in self.controls:
2944            control.readonly = bool(readonly)
2945
2946    def clear_all(self):
2947        """Clear the value attributes of all controls in the form.
2948
2949        See HTMLForm.clear.__doc__.
2950
2951        """
2952        for control in self.controls:
2953            control.clear()
2954
2955    def clear(self,
2956              name=None, type=None, kind=None, id=None, nr=None, label=None):
2957        """Clear the value attribute of a control.
2958
2959        As a result, the affected control will not be successful until a value
2960        is subsequently set.  AttributeError is raised on readonly controls.
2961
2962        """
2963        c = self.find_control(name, type, kind, id, label=label, nr=nr)
2964        c.clear()
2965
2966
2967#---------------------------------------------------
2968# Form-filling methods applying only to ListControls.
2969
2970    def possible_items(self,  # deprecated
2971                       name=None, type=None, kind=None, id=None,
2972                       nr=None, by_label=False, label=None):
2973        """Return a list of all values that the specified control can take."""
2974        c = self._find_list_control(name, type, kind, id, label, nr)
2975        return c.possible_items(by_label)
2976
2977    def set(self, selected, item_name,  # deprecated
2978            name=None, type=None, kind=None, id=None, nr=None,
2979            by_label=False, label=None):
2980        """Select / deselect named list item.
2981
2982        selected: boolean selected state
2983
2984        """
2985        self._find_list_control(name, type, kind, id, label, nr).set(
2986            selected, item_name, by_label)
2987    def toggle(self, item_name,  # deprecated
2988               name=None, type=None, kind=None, id=None, nr=None,
2989               by_label=False, label=None):
2990        """Toggle selected state of named list item."""
2991        self._find_list_control(name, type, kind, id, label, nr).toggle(
2992            item_name, by_label)
2993
2994    def set_single(self, selected,  # deprecated
2995                   name=None, type=None, kind=None, id=None,
2996                   nr=None, by_label=None, label=None):
2997        """Select / deselect list item in a control having only one item.
2998
2999        If the control has multiple list items, ItemCountError is raised.
3000
3001        This is just a convenience method, so you don't need to know the item's
3002        name -- the item name in these single-item controls is usually
3003        something meaningless like "1" or "on".
3004
3005        For example, if a checkbox has a single item named "on", the following
3006        two calls are equivalent:
3007
3008        control.toggle("on")
3009        control.toggle_single()
3010
3011        """  # by_label ignored and deprecated
3012        self._find_list_control(
3013            name, type, kind, id, label, nr).set_single(selected)
3014    def toggle_single(self, name=None, type=None, kind=None, id=None,
3015                      nr=None, by_label=None, label=None):  # deprecated
3016        """Toggle selected state of list item in control having only one item.
3017
3018        The rest is as for HTMLForm.set_single.__doc__.
3019
3020        """  # by_label ignored and deprecated
3021        self._find_list_control(name, type, kind, id, label, nr).toggle_single()
3022
3023#---------------------------------------------------
3024# Form-filling method applying only to FileControls.
3025
3026    def add_file(self, file_object, content_type=None, filename=None,
3027                 name=None, id=None, nr=None, label=None):
3028        """Add a file to be uploaded.
3029
3030        file_object: file-like object (with read method) from which to read
3031         data to upload
3032        content_type: MIME content type of data to upload
3033        filename: filename to pass to server
3034
3035        If filename is None, no filename is sent to the server.
3036
3037        If content_type is None, the content type is guessed based on the
3038        filename and the data from read from the file object.
3039
3040        XXX
3041        At the moment, guessed content type is always application/octet-stream.
3042        Use sndhdr, imghdr modules.  Should also try to guess HTML, XML, and
3043        plain text.
3044
3045        Note the following useful HTML attributes of file upload controls (see
3046        HTML 4.01 spec, section 17):
3047
3048        accept: comma-separated list of content types that the server will
3049         handle correctly; you can use this to filter out non-conforming files
3050        size: XXX IIRC, this is indicative of whether form wants multiple or
3051         single files
3052        maxlength: XXX hint of max content length in bytes?
3053
3054        """
3055        self.find_control(name, "file", id=id, label=label, nr=nr).add_file(
3056            file_object, content_type, filename)
3057
3058#---------------------------------------------------
3059# Form submission methods, applying only to clickable controls.
3060
3061    def click(self, name=None, type=None, id=None, nr=0, coord=(1,1),
3062              request_class=urllib2.Request,
3063              label=None):
3064        """Return request that would result from clicking on a control.
3065
3066        The request object is a urllib2.Request instance, which you can pass to
3067        urllib2.urlopen (or ClientCookie.urlopen).
3068
3069        Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
3070        IMAGEs) can be clicked.
3071
3072        Will click on the first clickable control, subject to the name, type
3073        and nr arguments (as for find_control).  If no name, type, id or number
3074        is specified and there are no clickable controls, a request will be
3075        returned for the form in its current, un-clicked, state.
3076
3077        IndexError is raised if any of name, type, id or nr is specified but no
3078        matching control is found.  ValueError is raised if the HTMLForm has an
3079        enctype attribute that is not recognised.
3080
3081        You can optionally specify a coordinate to click at, which only makes a
3082        difference if you clicked on an image.
3083
3084        """
3085        return self._click(name, type, id, label, nr, coord, "request",
3086                           self._request_class)
3087
3088    def click_request_data(self,
3089                           name=None, type=None, id=None,
3090                           nr=0, coord=(1,1),
3091                           request_class=urllib2.Request,
3092                           label=None):
3093        """As for click method, but return a tuple (url, data, headers).
3094
3095        You can use this data to send a request to the server.  This is useful
3096        if you're using httplib or urllib rather than urllib2.  Otherwise, use
3097        the click method.
3098
3099        # Untested.  Have to subclass to add headers, I think -- so use urllib2
3100        # instead!
3101        import urllib
3102        url, data, hdrs = form.click_request_data()
3103        r = urllib.urlopen(url, data)
3104
3105        # Untested.  I don't know of any reason to use httplib -- you can get
3106        # just as much control with urllib2.
3107        import httplib, urlparse
3108        url, data, hdrs = form.click_request_data()
3109        tup = urlparse(url)
3110        host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
3111        conn = httplib.HTTPConnection(host)
3112        if data:
3113            httplib.request("POST", path, data, hdrs)
3114        else:
3115            httplib.request("GET", path, headers=hdrs)
3116        r = conn.getresponse()
3117
3118        """
3119        return self._click(name, type, id, label, nr, coord, "request_data",
3120                           self._request_class)
3121
3122    def click_pairs(self, name=None, type=None, id=None,
3123                    nr=0, coord=(1,1),
3124                    label=None):
3125        """As for click_request_data, but returns a list of (key, value) pairs.
3126
3127        You can use this list as an argument to ClientForm.urlencode.  This is
3128        usually only useful if you're using httplib or urllib rather than
3129        urllib2 or ClientCookie.  It may also be useful if you want to manually
3130        tweak the keys and/or values, but this should not be necessary.
3131        Otherwise, use the click method.
3132
3133        Note that this method is only useful for forms of MIME type
3134        x-www-form-urlencoded.  In particular, it does not return the
3135        information required for file upload.  If you need file upload and are
3136        not using urllib2, use click_request_data.
3137
3138        Also note that Python 2.0's urllib.urlencode is slightly broken: it
3139        only accepts a mapping, not a sequence of pairs, as an argument.  This
3140        messes up any ordering in the argument.  Use ClientForm.urlencode
3141        instead.
3142
3143        """
3144        return self._click(name, type, id, label, nr, coord, "pairs",
3145                           self._request_class)
3146
3147#---------------------------------------------------
3148
3149    def find_control(self,
3150                     name=None, type=None, kind=None, id=None,
3151                     predicate=None, nr=None,
3152                     label=None):
3153        """Locate and return some specific control within the form.
3154
3155        At least one of the name, type, kind, predicate and nr arguments must
3156        be supplied.  If no matching control is found, ControlNotFoundError is
3157        raised.
3158
3159        If name is specified, then the control must have the indicated name.
3160
3161        If type is specified then the control must have the specified type (in
3162        addition to the types possible for <input> HTML tags: "text",
3163        "password", "hidden", "submit", "image", "button", "radio", "checkbox",
3164        "file" we also have "reset", "buttonbutton", "submitbutton",
3165        "resetbutton", "textarea", "select" and "isindex").
3166
3167        If kind is specified, then the control must fall into the specified
3168        group, each of which satisfies a particular interface.  The types are
3169        "text", "list", "multilist", "singlelist", "clickable" and "file".
3170
3171        If id is specified, then the control must have the indicated id.
3172
3173        If predicate is specified, then the control must match that function.
3174        The predicate function is passed the control as its single argument,
3175        and should return a boolean value indicating whether the control
3176        matched.
3177
3178        nr, if supplied, is the sequence number of the control (where 0 is the
3179        first).  Note that control 0 is the first control matching all the
3180        other arguments (if supplied); it is not necessarily the first control
3181        in the form.  If no nr is supplied, AmbiguityError is raised if
3182        multiple controls match the other arguments (unless the
3183        .backwards-compat attribute is true).
3184
3185        If label is specified, then the control must have this label.  Note
3186        that radio controls and checkboxes never have labels: their items do.
3187
3188        """
3189        if ((name is None) and (type is None) and (kind is None) and
3190            (id is None) and (label is None) and (predicate is None) and
3191            (nr is None)):
3192            raise ValueError(
3193                "at least one argument must be supplied to specify control")
3194        return self._find_control(name, type, kind, id, label, predicate, nr)
3195
3196#---------------------------------------------------
3197# Private methods.
3198
3199    def _find_list_control(self,
3200                           name=None, type=None, kind=None, id=None,
3201                           label=None, nr=None):
3202        if ((name is None) and (type is None) and (kind is None) and
3203            (id is None) and (label is None) and (nr is None)):
3204            raise ValueError(
3205                "at least one argument must be supplied to specify control")
3206
3207        return self._find_control(name, type, kind, id, label,
3208                                  is_listcontrol, nr)
3209
3210    def _find_control(self, name, type, kind, id, label, predicate, nr):
3211        if ((name is not None) and (name is not Missing) and
3212            not isstringlike(name)):
3213            raise TypeError("control name must be string-like")
3214        if (type is not None) and not isstringlike(type):
3215            raise TypeError("control type must be string-like")
3216        if (kind is not None) and not isstringlike(kind):
3217            raise TypeError("control kind must be string-like")
3218        if (id is not None) and not isstringlike(id):
3219            raise TypeError("control id must be string-like")
3220        if (label is not None) and not isstringlike(label):
3221            raise TypeError("control label must be string-like")
3222        if (predicate is not None) and not callable(predicate):
3223            raise TypeError("control predicate must be callable")
3224        if (nr is not None) and nr < 0:
3225            raise ValueError("control number must be a positive integer")
3226
3227        orig_nr = nr
3228        found = None
3229        ambiguous = False
3230        if nr is None and self.backwards_compat:
3231            nr = 0
3232
3233        for control in self.controls:
3234            if ((name is not None and name != control.name) and
3235                (name is not Missing or control.name is not None)):
3236                continue
3237            if type is not None and type != control.type:
3238                continue
3239            if kind is not None and not control.is_of_kind(kind):
3240                continue
3241            if id is not None and id != control.id:
3242                continue
3243            if predicate and not predicate(control):
3244                continue
3245            if label:
3246                for l in control.get_labels():
3247                    if l.text.find(label) > -1:
3248                        break
3249                else:
3250                    continue
3251            if nr is not None:
3252                if nr == 0:
3253                    return control  # early exit: unambiguous due to nr
3254                nr -= 1
3255                continue
3256            if found:
3257                ambiguous = True
3258                break
3259            found = control
3260
3261        if found and not ambiguous:
3262            return found
3263
3264        description = []
3265        if name is not None: description.append("name %s" % repr(name))
3266        if type is not None: description.append("type '%s'" % type)
3267        if kind is not None: description.append("kind '%s'" % kind)
3268        if id is not None: description.append("id '%s'" % id)
3269        if label is not None: description.append("label '%s'" % label)
3270        if predicate is not None:
3271            description.append("predicate %s" % predicate)
3272        if orig_nr: description.append("nr %d" % orig_nr)
3273        description = ", ".join(description)
3274
3275        if ambiguous:
3276            raise AmbiguityError("more than one control matching "+description)
3277        elif not found:
3278            raise ControlNotFoundError("no control matching "+description)
3279        assert False
3280
3281    def _click(self, name, type, id, label, nr, coord, return_type,
3282               request_class=urllib2.Request):
3283        try:
3284            control = self._find_control(
3285                name, type, "clickable", id, label, None, nr)
3286        except ControlNotFoundError:
3287            if ((name is not None) or (type is not None) or (id is not None) or
3288                (nr != 0)):
3289                raise
3290            # no clickable controls, but no control was explicitly requested,
3291            # so return state without clicking any control
3292            return self._switch_click(return_type, request_class)
3293        else:
3294            return control._click(self, coord, return_type, request_class)
3295
3296    def _pairs(self):
3297        """Return sequence of (key, value) pairs suitable for urlencoding."""
3298        return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()]
3299
3300
3301    def _pairs_and_controls(self):
3302        """Return sequence of (index, key, value, control_index)
3303        of totally ordered pairs suitable for urlencoding.
3304
3305        control_index is the index of the control in self.controls
3306        """
3307        pairs = []
3308        for control_index in range(len(self.controls)):
3309            control = self.controls[control_index]
3310            for ii, key, val in control._totally_ordered_pairs():
3311                pairs.append((ii, key, val, control_index))
3312
3313        # stable sort by ONLY first item in tuple
3314        pairs.sort()
3315
3316        return pairs
3317
3318    def _request_data(self):
3319        """Return a tuple (url, data, headers)."""
3320        method = self.method.upper()
3321        #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
3322        parts = self._urlparse(self.action)
3323        rest, (query, frag) = parts[:-2], parts[-2:]
3324
3325        if method == "GET":
3326            if self.enctype != "application/x-www-form-urlencoded":
3327                raise ValueError(
3328                    "unknown GET form encoding type '%s'" % self.enctype)
3329            parts = rest + (urlencode(self._pairs()), None)
3330            uri = self._urlunparse(parts)
3331            return uri, None, []
3332        elif method == "POST":
3333            parts = rest + (query, None)
3334            uri = self._urlunparse(parts)
3335            if self.enctype == "application/x-www-form-urlencoded":
3336                return (uri, urlencode(self._pairs()),
3337                        [("Content-type", self.enctype)])
3338            elif self.enctype == "multipart/form-data":
3339                data = StringIO()
3340                http_hdrs = []
3341                mw = MimeWriter(data, http_hdrs)
3342                f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
3343                                          prefix=0)
3344                for ii, k, v, control_index in self._pairs_and_controls():
3345                    self.controls[control_index]._write_mime_data(mw, k, v)
3346                mw.lastpart()
3347                return uri, data.getvalue(), http_hdrs
3348            else:
3349                raise ValueError(
3350                    "unknown POST form encoding type '%s'" % self.enctype)
3351        else:
3352            raise ValueError("Unknown method '%s'" % method)
3353
3354    def _switch_click(self, return_type, request_class=urllib2.Request):
3355        # This is called by HTMLForm and clickable Controls to hide switching
3356        # on return_type.
3357        if return_type == "pairs":
3358            return self._pairs()
3359        elif return_type == "request_data":
3360            return self._request_data()
3361        else:
3362            req_data = self._request_data()
3363            req = request_class(req_data[0], req_data[1])
3364            for key, val in req_data[2]:
3365                add_hdr = req.add_header
3366                if key.lower() == "content-type":
3367                    try:
3368                        add_hdr = req.add_unredirected_header
3369                    except AttributeError:
3370                        # pre-2.4 and not using ClientCookie
3371                        pass
3372                add_hdr(key, val)
3373            return req
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。