1 | """HTML form handling for web clients. |
---|
2 | |
---|
3 | ClientForm is a Python module for handling HTML forms on the client |
---|
4 | side, useful for parsing HTML forms, filling them in and returning the |
---|
5 | completed forms to the server. It has developed from a port of Gisle |
---|
6 | Aas' Perl module HTML::Form, from the libwww-perl library, but the |
---|
7 | interface is not the same. |
---|
8 | |
---|
9 | The most useful docstring is the one for HTMLForm. |
---|
10 | |
---|
11 | RFC 1866: HTML 2.0 |
---|
12 | RFC 1867: Form-based File Upload in HTML |
---|
13 | RFC 2388: Returning Values from Forms: multipart/form-data |
---|
14 | HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX) |
---|
15 | HTML 4.01 Specification, W3C Recommendation 24 December 1999 |
---|
16 | |
---|
17 | |
---|
18 | Copyright 2002-2006 John J. Lee <jjl@pobox.com> |
---|
19 | Copyright 2005 Gary Poster |
---|
20 | Copyright 2005 Zope Corporation |
---|
21 | Copyright 1998-2000 Gisle Aas. |
---|
22 | |
---|
23 | This code is free software; you can redistribute it and/or modify it |
---|
24 | under the terms of the BSD or ZPL 2.1 licenses (see the file |
---|
25 | COPYING.txt included with the distribution). |
---|
26 | |
---|
27 | """ |
---|
28 | |
---|
29 | # XXX |
---|
30 | # add an __all__ |
---|
31 | # Remove parser testing hack |
---|
32 | # safeUrl()-ize action |
---|
33 | # Switch to unicode throughout (would be 0.3.x) |
---|
34 | # See Wichert Akkerman's 2004-01-22 message to c.l.py. |
---|
35 | # Add charset parameter to Content-type headers? How to find value?? |
---|
36 | # Add some more functional tests |
---|
37 | # Especially single and multiple file upload on the internet. |
---|
38 | # Does file upload work when name is missing? Sourceforge tracker form |
---|
39 | # doesn't like it. Check standards, and test with Apache. Test |
---|
40 | # binary upload with Apache. |
---|
41 | # mailto submission & enctype text/plain |
---|
42 | # I'm not going to fix this unless somebody tells me what real servers |
---|
43 | # that want this encoding actually expect: If enctype is |
---|
44 | # application/x-www-form-urlencoded and there's a FILE control present. |
---|
45 | # Strictly, it should be 'name=data' (see HTML 4.01 spec., section |
---|
46 | # 17.13.2), but I send "name=" ATM. What about multiple file upload?? |
---|
47 | |
---|
48 | # Would be nice, but I'm not going to do it myself: |
---|
49 | # ------------------------------------------------- |
---|
50 | # Maybe a 0.4.x? |
---|
51 | # Replace by_label etc. with moniker / selector concept. Allows, eg., |
---|
52 | # a choice between selection by value / id / label / element |
---|
53 | # contents. Or choice between matching labels exactly or by |
---|
54 | # substring. Etc. |
---|
55 | # Remove deprecated methods. |
---|
56 | # ...what else? |
---|
57 | # Work on DOMForm. |
---|
58 | # XForms? Don't know if there's a need here. |
---|
59 | |
---|
60 | |
---|
61 | try: True |
---|
62 | except NameError: |
---|
63 | True = 1 |
---|
64 | False = 0 |
---|
65 | |
---|
66 | try: bool |
---|
67 | except NameError: |
---|
68 | def bool(expr): |
---|
69 | if expr: return True |
---|
70 | else: return False |
---|
71 | |
---|
72 | try: |
---|
73 | import logging |
---|
74 | except ImportError: |
---|
75 | def debug(msg, *args, **kwds): |
---|
76 | pass |
---|
77 | else: |
---|
78 | _logger = logging.getLogger("ClientForm") |
---|
79 | OPTIMIZATION_HACK = True |
---|
80 | |
---|
81 | def debug(msg, *args, **kwds): |
---|
82 | if OPTIMIZATION_HACK: |
---|
83 | return |
---|
84 | |
---|
85 | try: |
---|
86 | raise Exception() |
---|
87 | except: |
---|
88 | caller_name = ( |
---|
89 | sys.exc_info()[2].tb_frame.f_back.f_back.f_code.co_name) |
---|
90 | extended_msg = '%%s %s' % msg |
---|
91 | extended_args = (caller_name,)+args |
---|
92 | debug = _logger.debug(extended_msg, *extended_args, **kwds) |
---|
93 | |
---|
94 | def _show_debug_messages(): |
---|
95 | global OPTIMIZATION_HACK |
---|
96 | OPTIMIZATION_HACK = False |
---|
97 | _logger.setLevel(logging.DEBUG) |
---|
98 | handler = logging.StreamHandler(sys.stdout) |
---|
99 | handler.setLevel(logging.DEBUG) |
---|
100 | _logger.addHandler(handler) |
---|
101 | |
---|
102 | import sys, urllib, urllib2, types, mimetools, copy, urlparse, \ |
---|
103 | htmlentitydefs, re, random |
---|
104 | from cStringIO import StringIO |
---|
105 | |
---|
106 | import sgmllib |
---|
107 | # monkeypatch to fix http://www.python.org/sf/803422 :-( |
---|
108 | sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]") |
---|
109 | |
---|
110 | # HTMLParser.HTMLParser is recent, so live without it if it's not available |
---|
111 | # (also, sgmllib.SGMLParser is much more tolerant of bad HTML) |
---|
112 | try: |
---|
113 | import HTMLParser |
---|
114 | except ImportError: |
---|
115 | HAVE_MODULE_HTMLPARSER = False |
---|
116 | else: |
---|
117 | HAVE_MODULE_HTMLPARSER = True |
---|
118 | |
---|
119 | try: |
---|
120 | import warnings |
---|
121 | except ImportError: |
---|
122 | def deprecation(message): |
---|
123 | pass |
---|
124 | else: |
---|
125 | def deprecation(message): |
---|
126 | warnings.warn(message, DeprecationWarning, stacklevel=2) |
---|
127 | |
---|
128 | VERSION = "0.2.7" |
---|
129 | |
---|
130 | CHUNK = 1024 # size of chunks fed to parser, in bytes |
---|
131 | |
---|
132 | DEFAULT_ENCODING = "latin-1" |
---|
133 | |
---|
134 | class Missing: pass |
---|
135 | |
---|
136 | _compress_re = re.compile(r"\s+") |
---|
137 | def compress_text(text): return _compress_re.sub(" ", text.strip()) |
---|
138 | |
---|
139 | def normalize_line_endings(text): |
---|
140 | return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text) |
---|
141 | |
---|
142 | |
---|
143 | # This version of urlencode is from my Python 1.5.2 back-port of the |
---|
144 | # Python 2.1 CVS maintenance branch of urllib. It will accept a sequence |
---|
145 | # of pairs instead of a mapping -- the 2.0 version only accepts a mapping. |
---|
146 | def urlencode(query,doseq=False,): |
---|
147 | """Encode a sequence of two-element tuples or dictionary into a URL query \ |
---|
148 | string. |
---|
149 | |
---|
150 | If any values in the query arg are sequences and doseq is true, each |
---|
151 | sequence element is converted to a separate parameter. |
---|
152 | |
---|
153 | If the query arg is a sequence of two-element tuples, the order of the |
---|
154 | parameters in the output will match the order of parameters in the |
---|
155 | input. |
---|
156 | """ |
---|
157 | |
---|
158 | if hasattr(query,"items"): |
---|
159 | # mapping objects |
---|
160 | query = query.items() |
---|
161 | else: |
---|
162 | # it's a bother at times that strings and string-like objects are |
---|
163 | # sequences... |
---|
164 | try: |
---|
165 | # non-sequence items should not work with len() |
---|
166 | x = len(query) |
---|
167 | # non-empty strings will fail this |
---|
168 | if len(query) and type(query[0]) != types.TupleType: |
---|
169 | raise TypeError() |
---|
170 | # zero-length sequences of all types will get here and succeed, |
---|
171 | # but that's a minor nit - since the original implementation |
---|
172 | # allowed empty dicts that type of behavior probably should be |
---|
173 | # preserved for consistency |
---|
174 | except TypeError: |
---|
175 | ty,va,tb = sys.exc_info() |
---|
176 | raise TypeError("not a valid non-string sequence or mapping " |
---|
177 | "object", tb) |
---|
178 | |
---|
179 | l = [] |
---|
180 | if not doseq: |
---|
181 | # preserve old behavior |
---|
182 | for k, v in query: |
---|
183 | k = urllib.quote_plus(str(k)) |
---|
184 | v = urllib.quote_plus(str(v)) |
---|
185 | l.append(k + '=' + v) |
---|
186 | else: |
---|
187 | for k, v in query: |
---|
188 | k = urllib.quote_plus(str(k)) |
---|
189 | if type(v) == types.StringType: |
---|
190 | v = urllib.quote_plus(v) |
---|
191 | l.append(k + '=' + v) |
---|
192 | elif type(v) == types.UnicodeType: |
---|
193 | # is there a reasonable way to convert to ASCII? |
---|
194 | # encode generates a string, but "replace" or "ignore" |
---|
195 | # lose information and "strict" can raise UnicodeError |
---|
196 | v = urllib.quote_plus(v.encode("ASCII","replace")) |
---|
197 | l.append(k + '=' + v) |
---|
198 | else: |
---|
199 | try: |
---|
200 | # is this a sufficient test for sequence-ness? |
---|
201 | x = len(v) |
---|
202 | except TypeError: |
---|
203 | # not a sequence |
---|
204 | v = urllib.quote_plus(str(v)) |
---|
205 | l.append(k + '=' + v) |
---|
206 | else: |
---|
207 | # loop over the sequence |
---|
208 | for elt in v: |
---|
209 | l.append(k + '=' + urllib.quote_plus(str(elt))) |
---|
210 | return '&'.join(l) |
---|
211 | |
---|
212 | def unescape(data, entities, encoding=DEFAULT_ENCODING): |
---|
213 | if data is None or "&" not in data: |
---|
214 | return data |
---|
215 | |
---|
216 | def replace_entities(match, entities=entities, encoding=encoding): |
---|
217 | ent = match.group() |
---|
218 | if ent[1] == "#": |
---|
219 | return unescape_charref(ent[2:-1], encoding) |
---|
220 | |
---|
221 | repl = entities.get(ent) |
---|
222 | if repl is not None: |
---|
223 | if type(repl) != type(""): |
---|
224 | try: |
---|
225 | repl = repl.encode(encoding) |
---|
226 | except UnicodeError: |
---|
227 | repl = ent |
---|
228 | else: |
---|
229 | repl = ent |
---|
230 | |
---|
231 | return repl |
---|
232 | |
---|
233 | return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data) |
---|
234 | |
---|
235 | def unescape_charref(data, encoding): |
---|
236 | name, base = data, 10 |
---|
237 | if name.startswith("x"): |
---|
238 | name, base= name[1:], 16 |
---|
239 | uc = unichr(int(name, base)) |
---|
240 | if encoding is None: |
---|
241 | return uc |
---|
242 | else: |
---|
243 | try: |
---|
244 | repl = uc.encode(encoding) |
---|
245 | except UnicodeError: |
---|
246 | repl = "&#%s;" % data |
---|
247 | return repl |
---|
248 | |
---|
249 | def get_entitydefs(): |
---|
250 | import htmlentitydefs |
---|
251 | from codecs import latin_1_decode |
---|
252 | entitydefs = {} |
---|
253 | try: |
---|
254 | htmlentitydefs.name2codepoint |
---|
255 | except AttributeError: |
---|
256 | entitydefs = {} |
---|
257 | for name, char in htmlentitydefs.entitydefs.items(): |
---|
258 | uc = latin_1_decode(char)[0] |
---|
259 | if uc.startswith("&#") and uc.endswith(";"): |
---|
260 | uc = unescape_charref(uc[2:-1], None) |
---|
261 | entitydefs["&%s;" % name] = uc |
---|
262 | else: |
---|
263 | for name, codepoint in htmlentitydefs.name2codepoint.items(): |
---|
264 | entitydefs["&%s;" % name] = unichr(codepoint) |
---|
265 | return entitydefs |
---|
266 | |
---|
267 | |
---|
268 | def issequence(x): |
---|
269 | try: |
---|
270 | x[0] |
---|
271 | except (TypeError, KeyError): |
---|
272 | return False |
---|
273 | except IndexError: |
---|
274 | pass |
---|
275 | return True |
---|
276 | |
---|
277 | def isstringlike(x): |
---|
278 | try: x+"" |
---|
279 | except: return False |
---|
280 | else: return True |
---|
281 | |
---|
282 | |
---|
283 | def choose_boundary(): |
---|
284 | """Return a string usable as a multipart boundary.""" |
---|
285 | # follow IE and firefox |
---|
286 | nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2]) |
---|
287 | return "-"*27 + nonce |
---|
288 | |
---|
289 | # This cut-n-pasted MimeWriter from standard library is here so can add |
---|
290 | # to HTTP headers rather than message body when appropriate. It also uses |
---|
291 | # \r\n in place of \n. This is a bit nasty. |
---|
292 | class MimeWriter: |
---|
293 | |
---|
294 | """Generic MIME writer. |
---|
295 | |
---|
296 | Methods: |
---|
297 | |
---|
298 | __init__() |
---|
299 | addheader() |
---|
300 | flushheaders() |
---|
301 | startbody() |
---|
302 | startmultipartbody() |
---|
303 | nextpart() |
---|
304 | lastpart() |
---|
305 | |
---|
306 | A MIME writer is much more primitive than a MIME parser. It |
---|
307 | doesn't seek around on the output file, and it doesn't use large |
---|
308 | amounts of buffer space, so you have to write the parts in the |
---|
309 | order they should occur on the output file. It does buffer the |
---|
310 | headers you add, allowing you to rearrange their order. |
---|
311 | |
---|
312 | General usage is: |
---|
313 | |
---|
314 | f = <open the output file> |
---|
315 | w = MimeWriter(f) |
---|
316 | ...call w.addheader(key, value) 0 or more times... |
---|
317 | |
---|
318 | followed by either: |
---|
319 | |
---|
320 | f = w.startbody(content_type) |
---|
321 | ...call f.write(data) for body data... |
---|
322 | |
---|
323 | or: |
---|
324 | |
---|
325 | w.startmultipartbody(subtype) |
---|
326 | for each part: |
---|
327 | subwriter = w.nextpart() |
---|
328 | ...use the subwriter's methods to create the subpart... |
---|
329 | w.lastpart() |
---|
330 | |
---|
331 | The subwriter is another MimeWriter instance, and should be |
---|
332 | treated in the same way as the toplevel MimeWriter. This way, |
---|
333 | writing recursive body parts is easy. |
---|
334 | |
---|
335 | Warning: don't forget to call lastpart()! |
---|
336 | |
---|
337 | XXX There should be more state so calls made in the wrong order |
---|
338 | are detected. |
---|
339 | |
---|
340 | Some special cases: |
---|
341 | |
---|
342 | - startbody() just returns the file passed to the constructor; |
---|
343 | but don't use this knowledge, as it may be changed. |
---|
344 | |
---|
345 | - startmultipartbody() actually returns a file as well; |
---|
346 | this can be used to write the initial 'if you can read this your |
---|
347 | mailer is not MIME-aware' message. |
---|
348 | |
---|
349 | - If you call flushheaders(), the headers accumulated so far are |
---|
350 | written out (and forgotten); this is useful if you don't need a |
---|
351 | body part at all, e.g. for a subpart of type message/rfc822 |
---|
352 | that's (mis)used to store some header-like information. |
---|
353 | |
---|
354 | - Passing a keyword argument 'prefix=<flag>' to addheader(), |
---|
355 | start*body() affects where the header is inserted; 0 means |
---|
356 | append at the end, 1 means insert at the start; default is |
---|
357 | append for addheader(), but insert for start*body(), which use |
---|
358 | it to determine where the Content-type header goes. |
---|
359 | |
---|
360 | """ |
---|
361 | |
---|
362 | def __init__(self, fp, http_hdrs=None): |
---|
363 | self._http_hdrs = http_hdrs |
---|
364 | self._fp = fp |
---|
365 | self._headers = [] |
---|
366 | self._boundary = [] |
---|
367 | self._first_part = True |
---|
368 | |
---|
369 | def addheader(self, key, value, prefix=0, |
---|
370 | add_to_http_hdrs=0): |
---|
371 | """ |
---|
372 | prefix is ignored if add_to_http_hdrs is true. |
---|
373 | """ |
---|
374 | lines = value.split("\r\n") |
---|
375 | while lines and not lines[-1]: del lines[-1] |
---|
376 | while lines and not lines[0]: del lines[0] |
---|
377 | if add_to_http_hdrs: |
---|
378 | value = "".join(lines) |
---|
379 | self._http_hdrs.append((key, value)) |
---|
380 | else: |
---|
381 | for i in range(1, len(lines)): |
---|
382 | lines[i] = " " + lines[i].strip() |
---|
383 | value = "\r\n".join(lines) + "\r\n" |
---|
384 | line = key + ": " + value |
---|
385 | if prefix: |
---|
386 | self._headers.insert(0, line) |
---|
387 | else: |
---|
388 | self._headers.append(line) |
---|
389 | |
---|
390 | def flushheaders(self): |
---|
391 | self._fp.writelines(self._headers) |
---|
392 | self._headers = [] |
---|
393 | |
---|
394 | def startbody(self, ctype=None, plist=[], prefix=1, |
---|
395 | add_to_http_hdrs=0, content_type=1): |
---|
396 | """ |
---|
397 | prefix is ignored if add_to_http_hdrs is true. |
---|
398 | """ |
---|
399 | if content_type and ctype: |
---|
400 | for name, value in plist: |
---|
401 | ctype = ctype + ';\r\n %s=%s' % (name, value) |
---|
402 | self.addheader("Content-type", ctype, prefix=prefix, |
---|
403 | add_to_http_hdrs=add_to_http_hdrs) |
---|
404 | self.flushheaders() |
---|
405 | if not add_to_http_hdrs: self._fp.write("\r\n") |
---|
406 | self._first_part = True |
---|
407 | return self._fp |
---|
408 | |
---|
409 | def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1, |
---|
410 | add_to_http_hdrs=0, content_type=1): |
---|
411 | boundary = boundary or choose_boundary() |
---|
412 | self._boundary.append(boundary) |
---|
413 | return self.startbody("multipart/" + subtype, |
---|
414 | [("boundary", boundary)] + plist, |
---|
415 | prefix=prefix, |
---|
416 | add_to_http_hdrs=add_to_http_hdrs, |
---|
417 | content_type=content_type) |
---|
418 | |
---|
419 | def nextpart(self): |
---|
420 | boundary = self._boundary[-1] |
---|
421 | if self._first_part: |
---|
422 | self._first_part = False |
---|
423 | else: |
---|
424 | self._fp.write("\r\n") |
---|
425 | self._fp.write("--" + boundary + "\r\n") |
---|
426 | return self.__class__(self._fp) |
---|
427 | |
---|
428 | def lastpart(self): |
---|
429 | if self._first_part: |
---|
430 | self.nextpart() |
---|
431 | boundary = self._boundary.pop() |
---|
432 | self._fp.write("\r\n--" + boundary + "--\r\n") |
---|
433 | |
---|
434 | |
---|
435 | class LocateError(ValueError): pass |
---|
436 | class AmbiguityError(LocateError): pass |
---|
437 | class ControlNotFoundError(LocateError): pass |
---|
438 | class ItemNotFoundError(LocateError): pass |
---|
439 | |
---|
440 | class ItemCountError(ValueError): pass |
---|
441 | |
---|
442 | # for backwards compatibility, ParseError derives from exceptions that were |
---|
443 | # raised by versions of ClientForm <= 0.2.5 |
---|
444 | if HAVE_MODULE_HTMLPARSER: |
---|
445 | SGMLLIB_PARSEERROR = sgmllib.SGMLParseError |
---|
446 | class ParseError(sgmllib.SGMLParseError, |
---|
447 | HTMLParser.HTMLParseError, |
---|
448 | ): |
---|
449 | pass |
---|
450 | else: |
---|
451 | if hasattr(sgmllib, "SGMLParseError"): |
---|
452 | SGMLLIB_PARSEERROR = sgmllib.SGMLParseError |
---|
453 | class ParseError(sgmllib.SGMLParseError): |
---|
454 | pass |
---|
455 | else: |
---|
456 | SGMLLIB_PARSEERROR = RuntimeError |
---|
457 | class ParseError(RuntimeError): |
---|
458 | pass |
---|
459 | |
---|
460 | |
---|
461 | class _AbstractFormParser: |
---|
462 | """forms attribute contains HTMLForm instances on completion.""" |
---|
463 | # thanks to Moshe Zadka for an example of sgmllib/htmllib usage |
---|
464 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
---|
465 | if entitydefs is None: |
---|
466 | entitydefs = get_entitydefs() |
---|
467 | self._entitydefs = entitydefs |
---|
468 | self._encoding = encoding |
---|
469 | |
---|
470 | self.base = None |
---|
471 | self.forms = [] |
---|
472 | self.labels = [] |
---|
473 | self._current_label = None |
---|
474 | self._current_form = None |
---|
475 | self._select = None |
---|
476 | self._optgroup = None |
---|
477 | self._option = None |
---|
478 | self._textarea = None |
---|
479 | |
---|
480 | # forms[0] will contain all controls that are outside of any form |
---|
481 | # self._global_form is an alias for self.forms[0] |
---|
482 | self._global_form = None |
---|
483 | self.start_form([]) |
---|
484 | self.end_form() |
---|
485 | self._current_form = self._global_form = self.forms[0] |
---|
486 | |
---|
487 | def do_base(self, attrs): |
---|
488 | debug("%s", attrs) |
---|
489 | for key, value in attrs: |
---|
490 | if key == "href": |
---|
491 | self.base = value |
---|
492 | |
---|
493 | def end_body(self): |
---|
494 | debug("") |
---|
495 | if self._current_label is not None: |
---|
496 | self.end_label() |
---|
497 | if self._current_form is not self._global_form: |
---|
498 | self.end_form() |
---|
499 | |
---|
500 | def start_form(self, attrs): |
---|
501 | debug("%s", attrs) |
---|
502 | if self._current_form is not self._global_form: |
---|
503 | raise ParseError("nested FORMs") |
---|
504 | name = None |
---|
505 | action = None |
---|
506 | enctype = "application/x-www-form-urlencoded" |
---|
507 | method = "GET" |
---|
508 | d = {} |
---|
509 | for key, value in attrs: |
---|
510 | if key == "name": |
---|
511 | name = value |
---|
512 | elif key == "action": |
---|
513 | action = value |
---|
514 | elif key == "method": |
---|
515 | method = value.upper() |
---|
516 | elif key == "enctype": |
---|
517 | enctype = value.lower() |
---|
518 | d[key] = value |
---|
519 | controls = [] |
---|
520 | self._current_form = (name, action, method, enctype), d, controls |
---|
521 | |
---|
522 | def end_form(self): |
---|
523 | debug("") |
---|
524 | if self._current_label is not None: |
---|
525 | self.end_label() |
---|
526 | if self._current_form is self._global_form: |
---|
527 | raise ParseError("end of FORM before start") |
---|
528 | self.forms.append(self._current_form) |
---|
529 | self._current_form = self._global_form |
---|
530 | |
---|
531 | def start_select(self, attrs): |
---|
532 | debug("%s", attrs) |
---|
533 | if self._select is not None: |
---|
534 | raise ParseError("nested SELECTs") |
---|
535 | if self._textarea is not None: |
---|
536 | raise ParseError("SELECT inside TEXTAREA") |
---|
537 | d = {} |
---|
538 | for key, val in attrs: |
---|
539 | d[key] = val |
---|
540 | |
---|
541 | self._select = d |
---|
542 | self._add_label(d) |
---|
543 | |
---|
544 | self._append_select_control({"__select": d}) |
---|
545 | |
---|
546 | def end_select(self): |
---|
547 | debug("") |
---|
548 | if self._current_form is self._global_form: |
---|
549 | return |
---|
550 | if self._select is None: |
---|
551 | raise ParseError("end of SELECT before start") |
---|
552 | |
---|
553 | if self._option is not None: |
---|
554 | self._end_option() |
---|
555 | |
---|
556 | self._select = None |
---|
557 | |
---|
558 | def start_optgroup(self, attrs): |
---|
559 | debug("%s", attrs) |
---|
560 | if self._select is None: |
---|
561 | raise ParseError("OPTGROUP outside of SELECT") |
---|
562 | d = {} |
---|
563 | for key, val in attrs: |
---|
564 | d[key] = val |
---|
565 | |
---|
566 | self._optgroup = d |
---|
567 | |
---|
568 | def end_optgroup(self): |
---|
569 | debug("") |
---|
570 | if self._optgroup is None: |
---|
571 | raise ParseError("end of OPTGROUP before start") |
---|
572 | self._optgroup = None |
---|
573 | |
---|
574 | def _start_option(self, attrs): |
---|
575 | debug("%s", attrs) |
---|
576 | if self._select is None: |
---|
577 | raise ParseError("OPTION outside of SELECT") |
---|
578 | if self._option is not None: |
---|
579 | self._end_option() |
---|
580 | |
---|
581 | d = {} |
---|
582 | for key, val in attrs: |
---|
583 | d[key] = val |
---|
584 | |
---|
585 | self._option = {} |
---|
586 | self._option.update(d) |
---|
587 | if (self._optgroup and self._optgroup.has_key("disabled") and |
---|
588 | not self._option.has_key("disabled")): |
---|
589 | self._option["disabled"] = None |
---|
590 | |
---|
591 | def _end_option(self): |
---|
592 | debug("") |
---|
593 | if self._option is None: |
---|
594 | raise ParseError("end of OPTION before start") |
---|
595 | |
---|
596 | contents = self._option.get("contents", "").strip() |
---|
597 | self._option["contents"] = contents |
---|
598 | if not self._option.has_key("value"): |
---|
599 | self._option["value"] = contents |
---|
600 | if not self._option.has_key("label"): |
---|
601 | self._option["label"] = contents |
---|
602 | # stuff dict of SELECT HTML attrs into a special private key |
---|
603 | # (gets deleted again later) |
---|
604 | self._option["__select"] = self._select |
---|
605 | self._append_select_control(self._option) |
---|
606 | self._option = None |
---|
607 | |
---|
608 | def _append_select_control(self, attrs): |
---|
609 | debug("%s", attrs) |
---|
610 | controls = self._current_form[2] |
---|
611 | name = self._select.get("name") |
---|
612 | controls.append(("select", name, attrs)) |
---|
613 | |
---|
614 | def start_textarea(self, attrs): |
---|
615 | debug("%s", attrs) |
---|
616 | if self._textarea is not None: |
---|
617 | raise ParseError("nested TEXTAREAs") |
---|
618 | if self._select is not None: |
---|
619 | raise ParseError("TEXTAREA inside SELECT") |
---|
620 | d = {} |
---|
621 | for key, val in attrs: |
---|
622 | d[key] = val |
---|
623 | self._add_label(d) |
---|
624 | |
---|
625 | self._textarea = d |
---|
626 | |
---|
627 | def end_textarea(self): |
---|
628 | debug("") |
---|
629 | if self._current_form is self._global_form: |
---|
630 | return |
---|
631 | if self._textarea is None: |
---|
632 | raise ParseError("end of TEXTAREA before start") |
---|
633 | controls = self._current_form[2] |
---|
634 | name = self._textarea.get("name") |
---|
635 | controls.append(("textarea", name, self._textarea)) |
---|
636 | self._textarea = None |
---|
637 | |
---|
638 | def start_label(self, attrs): |
---|
639 | debug("%s", attrs) |
---|
640 | if self._current_label: |
---|
641 | self.end_label() |
---|
642 | d = {} |
---|
643 | for key, val in attrs: |
---|
644 | d[key] = val |
---|
645 | taken = bool(d.get("for")) # empty id is invalid |
---|
646 | d["__text"] = "" |
---|
647 | d["__taken"] = taken |
---|
648 | if taken: |
---|
649 | self.labels.append(d) |
---|
650 | self._current_label = d |
---|
651 | |
---|
652 | def end_label(self): |
---|
653 | debug("") |
---|
654 | label = self._current_label |
---|
655 | if label is None: |
---|
656 | # something is ugly in the HTML, but we're ignoring it |
---|
657 | return |
---|
658 | self._current_label = None |
---|
659 | label["__text"] = label["__text"] |
---|
660 | # if it is staying around, it is True in all cases |
---|
661 | del label["__taken"] |
---|
662 | |
---|
663 | def _add_label(self, d): |
---|
664 | #debug("%s", d) |
---|
665 | if self._current_label is not None: |
---|
666 | if self._current_label["__taken"]: |
---|
667 | self.end_label() # be fuzzy |
---|
668 | else: |
---|
669 | self._current_label["__taken"] = True |
---|
670 | d["__label"] = self._current_label |
---|
671 | |
---|
672 | def handle_data(self, data): |
---|
673 | debug("%s", data) |
---|
674 | |
---|
675 | # according to http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 |
---|
676 | # line break immediately after start tags or immediately before end |
---|
677 | # tags must be ignored, but real browsers only ignore a line break |
---|
678 | # after a start tag, so we'll do that. |
---|
679 | if data[0:2] == "\r\n": |
---|
680 | data = data[2:] |
---|
681 | if data[0:1] in ["\n", "\r"]: |
---|
682 | data = data[1:] |
---|
683 | |
---|
684 | if self._option is not None: |
---|
685 | # self._option is a dictionary of the OPTION element's HTML |
---|
686 | # attributes, but it has two special keys, one of which is the |
---|
687 | # special "contents" key contains text between OPTION tags (the |
---|
688 | # other is the "__select" key: see the end_option method) |
---|
689 | map = self._option |
---|
690 | key = "contents" |
---|
691 | elif self._textarea is not None: |
---|
692 | map = self._textarea |
---|
693 | key = "value" |
---|
694 | data = normalize_line_endings(data) |
---|
695 | # not if within option or textarea |
---|
696 | elif self._current_label is not None: |
---|
697 | map = self._current_label |
---|
698 | key = "__text" |
---|
699 | else: |
---|
700 | return |
---|
701 | |
---|
702 | if not map.has_key(key): |
---|
703 | map[key] = data |
---|
704 | else: |
---|
705 | map[key] = map[key] + data |
---|
706 | |
---|
707 | def do_button(self, attrs): |
---|
708 | debug("%s", attrs) |
---|
709 | d = {} |
---|
710 | d["type"] = "submit" # default |
---|
711 | for key, val in attrs: |
---|
712 | d[key] = val |
---|
713 | controls = self._current_form[2] |
---|
714 | |
---|
715 | type = d["type"] |
---|
716 | name = d.get("name") |
---|
717 | # we don't want to lose information, so use a type string that |
---|
718 | # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON} |
---|
719 | # e.g. type for BUTTON/RESET is "resetbutton" |
---|
720 | # (type for INPUT/RESET is "reset") |
---|
721 | type = type+"button" |
---|
722 | self._add_label(d) |
---|
723 | controls.append((type, name, d)) |
---|
724 | |
---|
725 | def do_input(self, attrs): |
---|
726 | debug("%s", attrs) |
---|
727 | d = {} |
---|
728 | d["type"] = "text" # default |
---|
729 | for key, val in attrs: |
---|
730 | d[key] = val |
---|
731 | controls = self._current_form[2] |
---|
732 | |
---|
733 | type = d["type"] |
---|
734 | name = d.get("name") |
---|
735 | self._add_label(d) |
---|
736 | controls.append((type, name, d)) |
---|
737 | |
---|
738 | def do_isindex(self, attrs): |
---|
739 | debug("%s", attrs) |
---|
740 | d = {} |
---|
741 | for key, val in attrs: |
---|
742 | d[key] = val |
---|
743 | controls = self._current_form[2] |
---|
744 | |
---|
745 | self._add_label(d) |
---|
746 | # isindex doesn't have type or name HTML attributes |
---|
747 | controls.append(("isindex", None, d)) |
---|
748 | |
---|
749 | def handle_entityref(self, name): |
---|
750 | #debug("%s", name) |
---|
751 | self.handle_data(unescape( |
---|
752 | '&%s;' % name, self._entitydefs, self._encoding)) |
---|
753 | |
---|
754 | def handle_charref(self, name): |
---|
755 | #debug("%s", name) |
---|
756 | self.handle_data(unescape_charref(name, self._encoding)) |
---|
757 | |
---|
758 | def unescape_attr(self, name): |
---|
759 | #debug("%s", name) |
---|
760 | return unescape(name, self._entitydefs, self._encoding) |
---|
761 | |
---|
762 | def unescape_attrs(self, attrs): |
---|
763 | #debug("%s", attrs) |
---|
764 | escaped_attrs = {} |
---|
765 | for key, val in attrs.items(): |
---|
766 | try: |
---|
767 | val.items |
---|
768 | except AttributeError: |
---|
769 | escaped_attrs[key] = self.unescape_attr(val) |
---|
770 | else: |
---|
771 | # e.g. "__select" -- yuck! |
---|
772 | escaped_attrs[key] = self.unescape_attrs(val) |
---|
773 | return escaped_attrs |
---|
774 | |
---|
775 | def unknown_entityref(self, ref): self.handle_data("&%s;" % ref) |
---|
776 | def unknown_charref(self, ref): self.handle_data("&#%s;" % ref) |
---|
777 | |
---|
778 | |
---|
779 | if not HAVE_MODULE_HTMLPARSER: |
---|
780 | class XHTMLCompatibleFormParser: |
---|
781 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
---|
782 | raise ValueError("HTMLParser could not be imported") |
---|
783 | else: |
---|
784 | class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser): |
---|
785 | """Good for XHTML, bad for tolerance of incorrect HTML.""" |
---|
786 | # thanks to Michael Howitz for this! |
---|
787 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
---|
788 | HTMLParser.HTMLParser.__init__(self) |
---|
789 | _AbstractFormParser.__init__(self, entitydefs, encoding) |
---|
790 | |
---|
791 | def feed(self, data): |
---|
792 | try: |
---|
793 | HTMLParser.HTMLParser.feed(self, data) |
---|
794 | except HTMLParser.HTMLParseError, exc: |
---|
795 | raise ParseError(exc) |
---|
796 | |
---|
797 | def start_option(self, attrs): |
---|
798 | _AbstractFormParser._start_option(self, attrs) |
---|
799 | |
---|
800 | def end_option(self): |
---|
801 | _AbstractFormParser._end_option(self) |
---|
802 | |
---|
803 | def handle_starttag(self, tag, attrs): |
---|
804 | try: |
---|
805 | method = getattr(self, "start_" + tag) |
---|
806 | except AttributeError: |
---|
807 | try: |
---|
808 | method = getattr(self, "do_" + tag) |
---|
809 | except AttributeError: |
---|
810 | pass # unknown tag |
---|
811 | else: |
---|
812 | method(attrs) |
---|
813 | else: |
---|
814 | method(attrs) |
---|
815 | |
---|
816 | def handle_endtag(self, tag): |
---|
817 | try: |
---|
818 | method = getattr(self, "end_" + tag) |
---|
819 | except AttributeError: |
---|
820 | pass # unknown tag |
---|
821 | else: |
---|
822 | method() |
---|
823 | |
---|
824 | def unescape(self, name): |
---|
825 | # Use the entitydefs passed into constructor, not |
---|
826 | # HTMLParser.HTMLParser's entitydefs. |
---|
827 | return self.unescape_attr(name) |
---|
828 | |
---|
829 | def unescape_attr_if_required(self, name): |
---|
830 | return name # HTMLParser.HTMLParser already did it |
---|
831 | def unescape_attrs_if_required(self, attrs): |
---|
832 | return attrs # ditto |
---|
833 | |
---|
834 | |
---|
835 | class _AbstractSgmllibParser(_AbstractFormParser): |
---|
836 | |
---|
837 | def do_option(self, attrs): |
---|
838 | _AbstractFormParser._start_option(self, attrs) |
---|
839 | |
---|
840 | if sys.version_info[:2] >= (2,5): |
---|
841 | # we override this attr to decode hex charrefs |
---|
842 | entity_or_charref = re.compile( |
---|
843 | '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)') |
---|
844 | def convert_entityref(self, name): |
---|
845 | return unescape("&%s;" % name, self._entitydefs, self._encoding) |
---|
846 | def convert_charref(self, name): |
---|
847 | return unescape_charref("%s" % name, self._encoding) |
---|
848 | def unescape_attr_if_required(self, name): |
---|
849 | return name # sgmllib already did it |
---|
850 | def unescape_attrs_if_required(self, attrs): |
---|
851 | return attrs # ditto |
---|
852 | else: |
---|
853 | def unescape_attr_if_required(self, name): |
---|
854 | return self.unescape_attr(name) |
---|
855 | def unescape_attrs_if_required(self, attrs): |
---|
856 | return self.unescape_attrs(attrs) |
---|
857 | |
---|
858 | |
---|
859 | class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser): |
---|
860 | """Good for tolerance of incorrect HTML, bad for XHTML.""" |
---|
861 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
---|
862 | sgmllib.SGMLParser.__init__(self) |
---|
863 | _AbstractFormParser.__init__(self, entitydefs, encoding) |
---|
864 | |
---|
865 | def feed(self, data): |
---|
866 | try: |
---|
867 | sgmllib.SGMLParser.feed(self, data) |
---|
868 | except SGMLLIB_PARSEERROR, exc: |
---|
869 | raise ParseError(exc) |
---|
870 | |
---|
871 | |
---|
872 | |
---|
873 | # sigh, must support mechanize by allowing dynamic creation of classes based on |
---|
874 | # its bundled copy of BeautifulSoup (which was necessary because of dependency |
---|
875 | # problems) |
---|
876 | |
---|
877 | def _create_bs_classes(bs, |
---|
878 | icbinbs, |
---|
879 | ): |
---|
880 | class _AbstractBSFormParser(_AbstractSgmllibParser): |
---|
881 | bs_base_class = None |
---|
882 | def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING): |
---|
883 | _AbstractFormParser.__init__(self, entitydefs, encoding) |
---|
884 | self.bs_base_class.__init__(self) |
---|
885 | def handle_data(self, data): |
---|
886 | _AbstractFormParser.handle_data(self, data) |
---|
887 | self.bs_base_class.handle_data(self, data) |
---|
888 | def feed(self, data): |
---|
889 | try: |
---|
890 | self.bs_base_class.feed(self, data) |
---|
891 | except SGMLLIB_PARSEERROR, exc: |
---|
892 | raise ParseError(exc) |
---|
893 | |
---|
894 | |
---|
895 | class RobustFormParser(_AbstractBSFormParser, bs): |
---|
896 | """Tries to be highly tolerant of incorrect HTML.""" |
---|
897 | pass |
---|
898 | RobustFormParser.bs_base_class = bs |
---|
899 | class NestingRobustFormParser(_AbstractBSFormParser, icbinbs): |
---|
900 | """Tries to be highly tolerant of incorrect HTML. |
---|
901 | |
---|
902 | Different from RobustFormParser in that it more often guesses nesting |
---|
903 | above missing end tags (see BeautifulSoup docs). |
---|
904 | |
---|
905 | """ |
---|
906 | pass |
---|
907 | NestingRobustFormParser.bs_base_class = icbinbs |
---|
908 | |
---|
909 | return RobustFormParser, NestingRobustFormParser |
---|
910 | |
---|
911 | try: |
---|
912 | if sys.version_info[:2] < (2, 2): |
---|
913 | raise ImportError # BeautifulSoup uses generators |
---|
914 | import BeautifulSoup |
---|
915 | except ImportError: |
---|
916 | pass |
---|
917 | else: |
---|
918 | RobustFormParser, NestingRobustFormParser = _create_bs_classes( |
---|
919 | BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup |
---|
920 | ) |
---|
921 | |
---|
922 | |
---|
923 | #FormParser = XHTMLCompatibleFormParser # testing hack |
---|
924 | #FormParser = RobustFormParser # testing hack |
---|
925 | |
---|
926 | |
---|
927 | def ParseResponseEx(response, |
---|
928 | select_default=False, |
---|
929 | form_parser_class=FormParser, |
---|
930 | request_class=urllib2.Request, |
---|
931 | entitydefs=None, |
---|
932 | encoding=DEFAULT_ENCODING, |
---|
933 | |
---|
934 | # private |
---|
935 | _urljoin=urlparse.urljoin, |
---|
936 | _urlparse=urlparse.urlparse, |
---|
937 | _urlunparse=urlparse.urlunparse, |
---|
938 | ): |
---|
939 | """Identical to ParseResponse, except that: |
---|
940 | |
---|
941 | 1. The returned list contains an extra item. The first form in the list |
---|
942 | contains all controls not contained in any FORM element. |
---|
943 | |
---|
944 | 2. The arguments ignore_errors and backwards_compat have been removed. |
---|
945 | |
---|
946 | 3. Backwards-compatibility mode (backwards_compat=True) is not available. |
---|
947 | """ |
---|
948 | return _ParseFileEx(response, response.geturl(), |
---|
949 | select_default, |
---|
950 | False, |
---|
951 | form_parser_class, |
---|
952 | request_class, |
---|
953 | entitydefs, |
---|
954 | False, |
---|
955 | encoding, |
---|
956 | _urljoin=_urljoin, |
---|
957 | _urlparse=_urlparse, |
---|
958 | _urlunparse=_urlunparse, |
---|
959 | ) |
---|
960 | |
---|
961 | def ParseFileEx(file, base_uri, |
---|
962 | select_default=False, |
---|
963 | form_parser_class=FormParser, |
---|
964 | request_class=urllib2.Request, |
---|
965 | entitydefs=None, |
---|
966 | encoding=DEFAULT_ENCODING, |
---|
967 | |
---|
968 | # private |
---|
969 | _urljoin=urlparse.urljoin, |
---|
970 | _urlparse=urlparse.urlparse, |
---|
971 | _urlunparse=urlparse.urlunparse, |
---|
972 | ): |
---|
973 | """Identical to ParseFile, except that: |
---|
974 | |
---|
975 | 1. The returned list contains an extra item. The first form in the list |
---|
976 | contains all controls not contained in any FORM element. |
---|
977 | |
---|
978 | 2. The arguments ignore_errors and backwards_compat have been removed. |
---|
979 | |
---|
980 | 3. Backwards-compatibility mode (backwards_compat=True) is not available. |
---|
981 | """ |
---|
982 | return _ParseFileEx(file, base_uri, |
---|
983 | select_default, |
---|
984 | False, |
---|
985 | form_parser_class, |
---|
986 | request_class, |
---|
987 | entitydefs, |
---|
988 | False, |
---|
989 | encoding, |
---|
990 | _urljoin=_urljoin, |
---|
991 | _urlparse=_urlparse, |
---|
992 | _urlunparse=_urlunparse, |
---|
993 | ) |
---|
994 | |
---|
995 | def ParseResponse(response, *args, **kwds): |
---|
996 | """Parse HTTP response and return a list of HTMLForm instances. |
---|
997 | |
---|
998 | The return value of urllib2.urlopen can be conveniently passed to this |
---|
999 | function as the response parameter. |
---|
1000 | |
---|
1001 | ClientForm.ParseError is raised on parse errors. |
---|
1002 | |
---|
1003 | response: file-like object (supporting read() method) with a method |
---|
1004 | geturl(), returning the URI of the HTTP response |
---|
1005 | select_default: for multiple-selection SELECT controls and RADIO controls, |
---|
1006 | pick the first item as the default if none are selected in the HTML |
---|
1007 | form_parser_class: class to instantiate and use to pass |
---|
1008 | request_class: class to return from .click() method (default is |
---|
1009 | urllib2.Request) |
---|
1010 | entitydefs: mapping like {"&": "&", ...} containing HTML entity |
---|
1011 | definitions (a sensible default is used) |
---|
1012 | encoding: character encoding used for encoding numeric character references |
---|
1013 | when matching link text. ClientForm does not attempt to find the encoding |
---|
1014 | in a META HTTP-EQUIV attribute in the document itself (mechanize, for |
---|
1015 | example, does do that and will pass the correct value to ClientForm using |
---|
1016 | this parameter). |
---|
1017 | |
---|
1018 | backwards_compat: boolean that determines whether the returned HTMLForm |
---|
1019 | objects are backwards-compatible with old code. If backwards_compat is |
---|
1020 | true: |
---|
1021 | |
---|
1022 | - ClientForm 0.1 code will continue to work as before. |
---|
1023 | |
---|
1024 | - Label searches that do not specify a nr (number or count) will always |
---|
1025 | get the first match, even if other controls match. If |
---|
1026 | backwards_compat is False, label searches that have ambiguous results |
---|
1027 | will raise an AmbiguityError. |
---|
1028 | |
---|
1029 | - Item label matching is done by strict string comparison rather than |
---|
1030 | substring matching. |
---|
1031 | |
---|
1032 | - De-selecting individual list items is allowed even if the Item is |
---|
1033 | disabled. |
---|
1034 | |
---|
1035 | The backwards_compat argument will be deprecated in a future release. |
---|
1036 | |
---|
1037 | Pass a true value for select_default if you want the behaviour specified by |
---|
1038 | RFC 1866 (the HTML 2.0 standard), which is to select the first item in a |
---|
1039 | RADIO or multiple-selection SELECT control if none were selected in the |
---|
1040 | HTML. Most browsers (including Microsoft Internet Explorer (IE) and |
---|
1041 | Netscape Navigator) instead leave all items unselected in these cases. The |
---|
1042 | W3C HTML 4.0 standard leaves this behaviour undefined in the case of |
---|
1043 | multiple-selection SELECT controls, but insists that at least one RADIO |
---|
1044 | button should be checked at all times, in contradiction to browser |
---|
1045 | behaviour. |
---|
1046 | |
---|
1047 | There is a choice of parsers. ClientForm.XHTMLCompatibleFormParser (uses |
---|
1048 | HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses |
---|
1049 | sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML. |
---|
1050 | Note that HTMLParser is only available in Python 2.2 and later. You can |
---|
1051 | pass your own class in here as a hack to work around bad HTML, but at your |
---|
1052 | own risk: there is no well-defined interface. |
---|
1053 | |
---|
1054 | """ |
---|
1055 | return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:] |
---|
1056 | |
---|
1057 | def ParseFile(file, base_uri, *args, **kwds): |
---|
1058 | """Parse HTML and return a list of HTMLForm instances. |
---|
1059 | |
---|
1060 | ClientForm.ParseError is raised on parse errors. |
---|
1061 | |
---|
1062 | file: file-like object (supporting read() method) containing HTML with zero |
---|
1063 | or more forms to be parsed |
---|
1064 | base_uri: the URI of the document (note that the base URI used to submit |
---|
1065 | the form will be that given in the BASE element if present, not that of |
---|
1066 | the document) |
---|
1067 | |
---|
1068 | For the other arguments and further details, see ParseResponse.__doc__. |
---|
1069 | |
---|
1070 | """ |
---|
1071 | return _ParseFileEx(file, base_uri, *args, **kwds)[1:] |
---|
1072 | |
---|
1073 | def _ParseFileEx(file, base_uri, |
---|
1074 | select_default=False, |
---|
1075 | ignore_errors=False, |
---|
1076 | form_parser_class=FormParser, |
---|
1077 | request_class=urllib2.Request, |
---|
1078 | entitydefs=None, |
---|
1079 | backwards_compat=True, |
---|
1080 | encoding=DEFAULT_ENCODING, |
---|
1081 | _urljoin=urlparse.urljoin, |
---|
1082 | _urlparse=urlparse.urlparse, |
---|
1083 | _urlunparse=urlparse.urlunparse, |
---|
1084 | ): |
---|
1085 | if backwards_compat: |
---|
1086 | deprecation("operating in backwards-compatibility mode") |
---|
1087 | fp = form_parser_class(entitydefs, encoding) |
---|
1088 | |
---|
1089 | file.seek(0) |
---|
1090 | |
---|
1091 | while 1: |
---|
1092 | data = file.read(CHUNK) |
---|
1093 | try: |
---|
1094 | fp.feed(data) |
---|
1095 | except ParseError, e: |
---|
1096 | e.base_uri = base_uri |
---|
1097 | raise |
---|
1098 | if len(data) != CHUNK: break |
---|
1099 | if fp.base is not None: |
---|
1100 | # HTML BASE element takes precedence over document URI |
---|
1101 | base_uri = fp.base |
---|
1102 | labels = [] # Label(label) for label in fp.labels] |
---|
1103 | id_to_labels = {} |
---|
1104 | for l in fp.labels: |
---|
1105 | label = Label(l) |
---|
1106 | labels.append(label) |
---|
1107 | for_id = l["for"] |
---|
1108 | coll = id_to_labels.get(for_id) |
---|
1109 | if coll is None: |
---|
1110 | id_to_labels[for_id] = [label] |
---|
1111 | else: |
---|
1112 | coll.append(label) |
---|
1113 | forms = [] |
---|
1114 | for (name, action, method, enctype), attrs, controls in fp.forms: |
---|
1115 | if action is None: |
---|
1116 | action = base_uri |
---|
1117 | else: |
---|
1118 | action = _urljoin(base_uri, action) |
---|
1119 | action = fp.unescape_attr_if_required(action) |
---|
1120 | name = fp.unescape_attr_if_required(name) |
---|
1121 | attrs = fp.unescape_attrs_if_required(attrs) |
---|
1122 | # would be nice to make HTMLForm class (form builder) pluggable |
---|
1123 | form = HTMLForm( |
---|
1124 | action, method, enctype, name, attrs, request_class, |
---|
1125 | forms, labels, id_to_labels, backwards_compat) |
---|
1126 | form._urlparse = _urlparse |
---|
1127 | form._urlunparse = _urlunparse |
---|
1128 | for ii in range(len(controls)): |
---|
1129 | type, name, attrs = controls[ii] |
---|
1130 | attrs = fp.unescape_attrs_if_required(attrs) |
---|
1131 | name = fp.unescape_attr_if_required(name) |
---|
1132 | # index=ii*10 allows ImageControl to return multiple ordered pairs |
---|
1133 | form.new_control(type, name, attrs, select_default=select_default, |
---|
1134 | index=ii*10) |
---|
1135 | forms.append(form) |
---|
1136 | for form in forms: |
---|
1137 | form.fixup() |
---|
1138 | return forms |
---|
1139 | |
---|
1140 | |
---|
1141 | class Label: |
---|
1142 | def __init__(self, attrs): |
---|
1143 | self.id = attrs.get("for") |
---|
1144 | self._text = attrs.get("__text").strip() |
---|
1145 | self._ctext = compress_text(self._text) |
---|
1146 | self.attrs = attrs |
---|
1147 | self._backwards_compat = False # maintained by HTMLForm |
---|
1148 | |
---|
1149 | def __getattr__(self, name): |
---|
1150 | if name == "text": |
---|
1151 | if self._backwards_compat: |
---|
1152 | return self._text |
---|
1153 | else: |
---|
1154 | return self._ctext |
---|
1155 | return getattr(Label, name) |
---|
1156 | |
---|
1157 | def __setattr__(self, name, value): |
---|
1158 | if name == "text": |
---|
1159 | # don't see any need for this, so make it read-only |
---|
1160 | raise AttributeError("text attribute is read-only") |
---|
1161 | self.__dict__[name] = value |
---|
1162 | |
---|
1163 | def __str__(self): |
---|
1164 | return "<Label(id=%r, text=%r)>" % (self.id, self.text) |
---|
1165 | |
---|
1166 | |
---|
1167 | def _get_label(attrs): |
---|
1168 | text = attrs.get("__label") |
---|
1169 | if text is not None: |
---|
1170 | return Label(text) |
---|
1171 | else: |
---|
1172 | return None |
---|
1173 | |
---|
1174 | class Control: |
---|
1175 | """An HTML form control. |
---|
1176 | |
---|
1177 | An HTMLForm contains a sequence of Controls. The Controls in an HTMLForm |
---|
1178 | are accessed using the HTMLForm.find_control method or the |
---|
1179 | HTMLForm.controls attribute. |
---|
1180 | |
---|
1181 | Control instances are usually constructed using the ParseFile / |
---|
1182 | ParseResponse functions. If you use those functions, you can ignore the |
---|
1183 | rest of this paragraph. A Control is only properly initialised after the |
---|
1184 | fixup method has been called. In fact, this is only strictly necessary for |
---|
1185 | ListControl instances. This is necessary because ListControls are built up |
---|
1186 | from ListControls each containing only a single item, and their initial |
---|
1187 | value(s) can only be known after the sequence is complete. |
---|
1188 | |
---|
1189 | The types and values that are acceptable for assignment to the value |
---|
1190 | attribute are defined by subclasses. |
---|
1191 | |
---|
1192 | If the disabled attribute is true, this represents the state typically |
---|
1193 | represented by browsers by 'greying out' a control. If the disabled |
---|
1194 | attribute is true, the Control will raise AttributeError if an attempt is |
---|
1195 | made to change its value. In addition, the control will not be considered |
---|
1196 | 'successful' as defined by the W3C HTML 4 standard -- ie. it will |
---|
1197 | contribute no data to the return value of the HTMLForm.click* methods. To |
---|
1198 | enable a control, set the disabled attribute to a false value. |
---|
1199 | |
---|
1200 | If the readonly attribute is true, the Control will raise AttributeError if |
---|
1201 | an attempt is made to change its value. To make a control writable, set |
---|
1202 | the readonly attribute to a false value. |
---|
1203 | |
---|
1204 | All controls have the disabled and readonly attributes, not only those that |
---|
1205 | may have the HTML attributes of the same names. |
---|
1206 | |
---|
1207 | On assignment to the value attribute, the following exceptions are raised: |
---|
1208 | TypeError, AttributeError (if the value attribute should not be assigned |
---|
1209 | to, because the control is disabled, for example) and ValueError. |
---|
1210 | |
---|
1211 | If the name or value attributes are None, or the value is an empty list, or |
---|
1212 | if the control is disabled, the control is not successful. |
---|
1213 | |
---|
1214 | Public attributes: |
---|
1215 | |
---|
1216 | type: string describing type of control (see the keys of the |
---|
1217 | HTMLForm.type2class dictionary for the allowable values) (readonly) |
---|
1218 | name: name of control (readonly) |
---|
1219 | value: current value of control (subclasses may allow a single value, a |
---|
1220 | sequence of values, or either) |
---|
1221 | disabled: disabled state |
---|
1222 | readonly: readonly state |
---|
1223 | id: value of id HTML attribute |
---|
1224 | |
---|
1225 | """ |
---|
1226 | def __init__(self, type, name, attrs, index=None): |
---|
1227 | """ |
---|
1228 | type: string describing type of control (see the keys of the |
---|
1229 | HTMLForm.type2class dictionary for the allowable values) |
---|
1230 | name: control name |
---|
1231 | attrs: HTML attributes of control's HTML element |
---|
1232 | |
---|
1233 | """ |
---|
1234 | raise NotImplementedError() |
---|
1235 | |
---|
1236 | def add_to_form(self, form): |
---|
1237 | self._form = form |
---|
1238 | form.controls.append(self) |
---|
1239 | |
---|
1240 | def fixup(self): |
---|
1241 | pass |
---|
1242 | |
---|
1243 | def is_of_kind(self, kind): |
---|
1244 | raise NotImplementedError() |
---|
1245 | |
---|
1246 | def clear(self): |
---|
1247 | raise NotImplementedError() |
---|
1248 | |
---|
1249 | def __getattr__(self, name): raise NotImplementedError() |
---|
1250 | def __setattr__(self, name, value): raise NotImplementedError() |
---|
1251 | |
---|
1252 | def pairs(self): |
---|
1253 | """Return list of (key, value) pairs suitable for passing to urlencode. |
---|
1254 | """ |
---|
1255 | return [(k, v) for (i, k, v) in self._totally_ordered_pairs()] |
---|
1256 | |
---|
1257 | def _totally_ordered_pairs(self): |
---|
1258 | """Return list of (key, value, index) tuples. |
---|
1259 | |
---|
1260 | Like pairs, but allows preserving correct ordering even where several |
---|
1261 | controls are involved. |
---|
1262 | |
---|
1263 | """ |
---|
1264 | raise NotImplementedError() |
---|
1265 | |
---|
1266 | def _write_mime_data(self, mw, name, value): |
---|
1267 | """Write data for a subitem of this control to a MimeWriter.""" |
---|
1268 | # called by HTMLForm |
---|
1269 | mw2 = mw.nextpart() |
---|
1270 | mw2.addheader("Content-disposition", |
---|
1271 | 'form-data; name="%s"' % name, 1) |
---|
1272 | f = mw2.startbody(prefix=0) |
---|
1273 | f.write(value) |
---|
1274 | |
---|
1275 | def __str__(self): |
---|
1276 | raise NotImplementedError() |
---|
1277 | |
---|
1278 | def get_labels(self): |
---|
1279 | """Return all labels (Label instances) for this control. |
---|
1280 | |
---|
1281 | If the control was surrounded by a <label> tag, that will be the first |
---|
1282 | label; all other labels, connected by 'for' and 'id', are in the order |
---|
1283 | that appear in the HTML. |
---|
1284 | |
---|
1285 | """ |
---|
1286 | res = [] |
---|
1287 | if self._label: |
---|
1288 | res.append(self._label) |
---|
1289 | if self.id: |
---|
1290 | res.extend(self._form._id_to_labels.get(self.id, ())) |
---|
1291 | return res |
---|
1292 | |
---|
1293 | |
---|
1294 | #--------------------------------------------------- |
---|
1295 | class ScalarControl(Control): |
---|
1296 | """Control whose value is not restricted to one of a prescribed set. |
---|
1297 | |
---|
1298 | Some ScalarControls don't accept any value attribute. Otherwise, takes a |
---|
1299 | single value, which must be string-like. |
---|
1300 | |
---|
1301 | Additional read-only public attribute: |
---|
1302 | |
---|
1303 | attrs: dictionary mapping the names of original HTML attributes of the |
---|
1304 | control to their values |
---|
1305 | |
---|
1306 | """ |
---|
1307 | def __init__(self, type, name, attrs, index=None): |
---|
1308 | self._index = index |
---|
1309 | self._label = _get_label(attrs) |
---|
1310 | self.__dict__["type"] = type.lower() |
---|
1311 | self.__dict__["name"] = name |
---|
1312 | self._value = attrs.get("value") |
---|
1313 | self.disabled = attrs.has_key("disabled") |
---|
1314 | self.readonly = attrs.has_key("readonly") |
---|
1315 | self.id = attrs.get("id") |
---|
1316 | |
---|
1317 | self.attrs = attrs.copy() |
---|
1318 | |
---|
1319 | self._clicked = False |
---|
1320 | |
---|
1321 | self._urlparse = urlparse.urlparse |
---|
1322 | self._urlunparse = urlparse.urlunparse |
---|
1323 | |
---|
1324 | def __getattr__(self, name): |
---|
1325 | if name == "value": |
---|
1326 | return self.__dict__["_value"] |
---|
1327 | else: |
---|
1328 | raise AttributeError("%s instance has no attribute '%s'" % |
---|
1329 | (self.__class__.__name__, name)) |
---|
1330 | |
---|
1331 | def __setattr__(self, name, value): |
---|
1332 | if name == "value": |
---|
1333 | if not isstringlike(value): |
---|
1334 | raise TypeError("must assign a string") |
---|
1335 | elif self.readonly: |
---|
1336 | raise AttributeError("control '%s' is readonly" % self.name) |
---|
1337 | elif self.disabled: |
---|
1338 | raise AttributeError("control '%s' is disabled" % self.name) |
---|
1339 | self.__dict__["_value"] = value |
---|
1340 | elif name in ("name", "type"): |
---|
1341 | raise AttributeError("%s attribute is readonly" % name) |
---|
1342 | else: |
---|
1343 | self.__dict__[name] = value |
---|
1344 | |
---|
1345 | def _totally_ordered_pairs(self): |
---|
1346 | name = self.name |
---|
1347 | value = self.value |
---|
1348 | if name is None or value is None or self.disabled: |
---|
1349 | return [] |
---|
1350 | return [(self._index, name, value)] |
---|
1351 | |
---|
1352 | def clear(self): |
---|
1353 | if self.readonly: |
---|
1354 | raise AttributeError("control '%s' is readonly" % self.name) |
---|
1355 | self.__dict__["_value"] = None |
---|
1356 | |
---|
1357 | def __str__(self): |
---|
1358 | name = self.name |
---|
1359 | value = self.value |
---|
1360 | if name is None: name = "<None>" |
---|
1361 | if value is None: value = "<None>" |
---|
1362 | |
---|
1363 | infos = [] |
---|
1364 | if self.disabled: infos.append("disabled") |
---|
1365 | if self.readonly: infos.append("readonly") |
---|
1366 | info = ", ".join(infos) |
---|
1367 | if info: info = " (%s)" % info |
---|
1368 | |
---|
1369 | return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) |
---|
1370 | |
---|
1371 | |
---|
1372 | #--------------------------------------------------- |
---|
1373 | class TextControl(ScalarControl): |
---|
1374 | """Textual input control. |
---|
1375 | |
---|
1376 | Covers: |
---|
1377 | |
---|
1378 | INPUT/TEXT |
---|
1379 | INPUT/PASSWORD |
---|
1380 | INPUT/HIDDEN |
---|
1381 | TEXTAREA |
---|
1382 | |
---|
1383 | """ |
---|
1384 | def __init__(self, type, name, attrs, index=None): |
---|
1385 | ScalarControl.__init__(self, type, name, attrs, index) |
---|
1386 | if self.type == "hidden": self.readonly = True |
---|
1387 | if self._value is None: |
---|
1388 | self._value = "" |
---|
1389 | |
---|
1390 | def is_of_kind(self, kind): return kind == "text" |
---|
1391 | |
---|
1392 | #--------------------------------------------------- |
---|
1393 | class FileControl(ScalarControl): |
---|
1394 | """File upload with INPUT TYPE=FILE. |
---|
1395 | |
---|
1396 | The value attribute of a FileControl is always None. Use add_file instead. |
---|
1397 | |
---|
1398 | Additional public method: add_file |
---|
1399 | |
---|
1400 | """ |
---|
1401 | |
---|
1402 | def __init__(self, type, name, attrs, index=None): |
---|
1403 | ScalarControl.__init__(self, type, name, attrs, index) |
---|
1404 | self._value = None |
---|
1405 | self._upload_data = [] |
---|
1406 | |
---|
1407 | def is_of_kind(self, kind): return kind == "file" |
---|
1408 | |
---|
1409 | def clear(self): |
---|
1410 | if self.readonly: |
---|
1411 | raise AttributeError("control '%s' is readonly" % self.name) |
---|
1412 | self._upload_data = [] |
---|
1413 | |
---|
1414 | def __setattr__(self, name, value): |
---|
1415 | if name in ("value", "name", "type"): |
---|
1416 | raise AttributeError("%s attribute is readonly" % name) |
---|
1417 | else: |
---|
1418 | self.__dict__[name] = value |
---|
1419 | |
---|
1420 | def add_file(self, file_object, content_type=None, filename=None): |
---|
1421 | if not hasattr(file_object, "read"): |
---|
1422 | raise TypeError("file-like object must have read method") |
---|
1423 | if content_type is not None and not isstringlike(content_type): |
---|
1424 | raise TypeError("content type must be None or string-like") |
---|
1425 | if filename is not None and not isstringlike(filename): |
---|
1426 | raise TypeError("filename must be None or string-like") |
---|
1427 | if content_type is None: |
---|
1428 | content_type = "application/octet-stream" |
---|
1429 | self._upload_data.append((file_object, content_type, filename)) |
---|
1430 | |
---|
1431 | def _totally_ordered_pairs(self): |
---|
1432 | # XXX should it be successful even if unnamed? |
---|
1433 | if self.name is None or self.disabled: |
---|
1434 | return [] |
---|
1435 | return [(self._index, self.name, "")] |
---|
1436 | |
---|
1437 | def _write_mime_data(self, mw, _name, _value): |
---|
1438 | # called by HTMLForm |
---|
1439 | # assert _name == self.name and _value == '' |
---|
1440 | if len(self._upload_data) == 1: |
---|
1441 | # single file |
---|
1442 | file_object, content_type, filename = self._upload_data[0] |
---|
1443 | mw2 = mw.nextpart() |
---|
1444 | fn_part = filename and ('; filename="%s"' % filename) or "" |
---|
1445 | disp = 'form-data; name="%s"%s' % (self.name, fn_part) |
---|
1446 | mw2.addheader("Content-disposition", disp, prefix=1) |
---|
1447 | fh = mw2.startbody(content_type, prefix=0) |
---|
1448 | fh.write(file_object.read()) |
---|
1449 | elif len(self._upload_data) != 0: |
---|
1450 | # multiple files |
---|
1451 | mw2 = mw.nextpart() |
---|
1452 | disp = 'form-data; name="%s"' % self.name |
---|
1453 | mw2.addheader("Content-disposition", disp, prefix=1) |
---|
1454 | fh = mw2.startmultipartbody("mixed", prefix=0) |
---|
1455 | for file_object, content_type, filename in self._upload_data: |
---|
1456 | mw3 = mw2.nextpart() |
---|
1457 | fn_part = filename and ('; filename="%s"' % filename) or "" |
---|
1458 | disp = "file%s" % fn_part |
---|
1459 | mw3.addheader("Content-disposition", disp, prefix=1) |
---|
1460 | fh2 = mw3.startbody(content_type, prefix=0) |
---|
1461 | fh2.write(file_object.read()) |
---|
1462 | mw2.lastpart() |
---|
1463 | |
---|
1464 | def __str__(self): |
---|
1465 | name = self.name |
---|
1466 | if name is None: name = "<None>" |
---|
1467 | |
---|
1468 | if not self._upload_data: |
---|
1469 | value = "<No files added>" |
---|
1470 | else: |
---|
1471 | value = [] |
---|
1472 | for file, ctype, filename in self._upload_data: |
---|
1473 | if filename is None: |
---|
1474 | value.append("<Unnamed file>") |
---|
1475 | else: |
---|
1476 | value.append(filename) |
---|
1477 | value = ", ".join(value) |
---|
1478 | |
---|
1479 | info = [] |
---|
1480 | if self.disabled: info.append("disabled") |
---|
1481 | if self.readonly: info.append("readonly") |
---|
1482 | info = ", ".join(info) |
---|
1483 | if info: info = " (%s)" % info |
---|
1484 | |
---|
1485 | return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info) |
---|
1486 | |
---|
1487 | |
---|
1488 | #--------------------------------------------------- |
---|
1489 | class IsindexControl(ScalarControl): |
---|
1490 | """ISINDEX control. |
---|
1491 | |
---|
1492 | ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really |
---|
1493 | part of regular HTML forms at all, and predates it. You're only allowed |
---|
1494 | one ISINDEX per HTML document. ISINDEX and regular form submission are |
---|
1495 | mutually exclusive -- either submit a form, or the ISINDEX. |
---|
1496 | |
---|
1497 | Having said this, since ISINDEX controls may appear in forms (which is |
---|
1498 | probably bad HTML), ParseFile / ParseResponse will include them in the |
---|
1499 | HTMLForm instances it returns. You can set the ISINDEX's value, as with |
---|
1500 | any other control (but note that ISINDEX controls have no name, so you'll |
---|
1501 | need to use the type argument of set_value!). When you submit the form, |
---|
1502 | the ISINDEX will not be successful (ie., no data will get returned to the |
---|
1503 | server as a result of its presence), unless you click on the ISINDEX |
---|
1504 | control, in which case the ISINDEX gets submitted instead of the form: |
---|
1505 | |
---|
1506 | form.set_value("my isindex value", type="isindex") |
---|
1507 | urllib2.urlopen(form.click(type="isindex")) |
---|
1508 | |
---|
1509 | ISINDEX elements outside of FORMs are ignored. If you want to submit one |
---|
1510 | by hand, do it like so: |
---|
1511 | |
---|
1512 | url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value")) |
---|
1513 | result = urllib2.urlopen(url) |
---|
1514 | |
---|
1515 | """ |
---|
1516 | def __init__(self, type, name, attrs, index=None): |
---|
1517 | ScalarControl.__init__(self, type, name, attrs, index) |
---|
1518 | if self._value is None: |
---|
1519 | self._value = "" |
---|
1520 | |
---|
1521 | def is_of_kind(self, kind): return kind in ["text", "clickable"] |
---|
1522 | |
---|
1523 | def _totally_ordered_pairs(self): |
---|
1524 | return [] |
---|
1525 | |
---|
1526 | def _click(self, form, coord, return_type, request_class=urllib2.Request): |
---|
1527 | # Relative URL for ISINDEX submission: instead of "foo=bar+baz", |
---|
1528 | # want "bar+baz". |
---|
1529 | # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is |
---|
1530 | # deprecated in 4.01, but it should still say how to submit it). |
---|
1531 | # Submission of ISINDEX is explained in the HTML 3.2 spec, though. |
---|
1532 | parts = self._urlparse(form.action) |
---|
1533 | rest, (query, frag) = parts[:-2], parts[-2:] |
---|
1534 | parts = rest + (urllib.quote_plus(self.value), None) |
---|
1535 | url = self._urlunparse(parts) |
---|
1536 | req_data = url, None, [] |
---|
1537 | |
---|
1538 | if return_type == "pairs": |
---|
1539 | return [] |
---|
1540 | elif return_type == "request_data": |
---|
1541 | return req_data |
---|
1542 | else: |
---|
1543 | return request_class(url) |
---|
1544 | |
---|
1545 | def __str__(self): |
---|
1546 | value = self.value |
---|
1547 | if value is None: value = "<None>" |
---|
1548 | |
---|
1549 | infos = [] |
---|
1550 | if self.disabled: infos.append("disabled") |
---|
1551 | if self.readonly: infos.append("readonly") |
---|
1552 | info = ", ".join(infos) |
---|
1553 | if info: info = " (%s)" % info |
---|
1554 | |
---|
1555 | return "<%s(%s)%s>" % (self.__class__.__name__, value, info) |
---|
1556 | |
---|
1557 | |
---|
1558 | #--------------------------------------------------- |
---|
1559 | class IgnoreControl(ScalarControl): |
---|
1560 | """Control that we're not interested in. |
---|
1561 | |
---|
1562 | Covers: |
---|
1563 | |
---|
1564 | INPUT/RESET |
---|
1565 | BUTTON/RESET |
---|
1566 | INPUT/BUTTON |
---|
1567 | BUTTON/BUTTON |
---|
1568 | |
---|
1569 | These controls are always unsuccessful, in the terminology of HTML 4 (ie. |
---|
1570 | they never require any information to be returned to the server). |
---|
1571 | |
---|
1572 | BUTTON/BUTTON is used to generate events for script embedded in HTML. |
---|
1573 | |
---|
1574 | The value attribute of IgnoreControl is always None. |
---|
1575 | |
---|
1576 | """ |
---|
1577 | def __init__(self, type, name, attrs, index=None): |
---|
1578 | ScalarControl.__init__(self, type, name, attrs, index) |
---|
1579 | self._value = None |
---|
1580 | |
---|
1581 | def is_of_kind(self, kind): return False |
---|
1582 | |
---|
1583 | def __setattr__(self, name, value): |
---|
1584 | if name == "value": |
---|
1585 | raise AttributeError( |
---|
1586 | "control '%s' is ignored, hence read-only" % self.name) |
---|
1587 | elif name in ("name", "type"): |
---|
1588 | raise AttributeError("%s attribute is readonly" % name) |
---|
1589 | else: |
---|
1590 | self.__dict__[name] = value |
---|
1591 | |
---|
1592 | |
---|
1593 | #--------------------------------------------------- |
---|
1594 | # ListControls |
---|
1595 | |
---|
1596 | # helpers and subsidiary classes |
---|
1597 | |
---|
1598 | class Item: |
---|
1599 | def __init__(self, control, attrs, index=None): |
---|
1600 | label = _get_label(attrs) |
---|
1601 | self.__dict__.update({ |
---|
1602 | "name": attrs["value"], |
---|
1603 | "_labels": label and [label] or [], |
---|
1604 | "attrs": attrs, |
---|
1605 | "_control": control, |
---|
1606 | "disabled": attrs.has_key("disabled"), |
---|
1607 | "_selected": False, |
---|
1608 | "id": attrs.get("id"), |
---|
1609 | "_index": index, |
---|
1610 | }) |
---|
1611 | control.items.append(self) |
---|
1612 | |
---|
1613 | def get_labels(self): |
---|
1614 | """Return all labels (Label instances) for this item. |
---|
1615 | |
---|
1616 | For items that represent radio buttons or checkboxes, if the item was |
---|
1617 | surrounded by a <label> tag, that will be the first label; all other |
---|
1618 | labels, connected by 'for' and 'id', are in the order that appear in |
---|
1619 | the HTML. |
---|
1620 | |
---|
1621 | For items that represent select options, if the option had a label |
---|
1622 | attribute, that will be the first label. If the option has contents |
---|
1623 | (text within the option tags) and it is not the same as the label |
---|
1624 | attribute (if any), that will be a label. There is nothing in the |
---|
1625 | spec to my knowledge that makes an option with an id unable to be the |
---|
1626 | target of a label's for attribute, so those are included, if any, for |
---|
1627 | the sake of consistency and completeness. |
---|
1628 | |
---|
1629 | """ |
---|
1630 | res = [] |
---|
1631 | res.extend(self._labels) |
---|
1632 | if self.id: |
---|
1633 | res.extend(self._control._form._id_to_labels.get(self.id, ())) |
---|
1634 | return res |
---|
1635 | |
---|
1636 | def __getattr__(self, name): |
---|
1637 | if name=="selected": |
---|
1638 | return self._selected |
---|
1639 | raise AttributeError(name) |
---|
1640 | |
---|
1641 | def __setattr__(self, name, value): |
---|
1642 | if name == "selected": |
---|
1643 | self._control._set_selected_state(self, value) |
---|
1644 | elif name == "disabled": |
---|
1645 | self.__dict__["disabled"] = bool(value) |
---|
1646 | else: |
---|
1647 | raise AttributeError(name) |
---|
1648 | |
---|
1649 | def __str__(self): |
---|
1650 | res = self.name |
---|
1651 | if self.selected: |
---|
1652 | res = "*" + res |
---|
1653 | if self.disabled: |
---|
1654 | res = "(%s)" % res |
---|
1655 | return res |
---|
1656 | |
---|
1657 | def __repr__(self): |
---|
1658 | attrs = [("name", self.name), ("id", self.id)]+self.attrs.items() |
---|
1659 | return "<%s %s>" % ( |
---|
1660 | self.__class__.__name__, |
---|
1661 | " ".join(["%s=%r" % (k, v) for k, v in attrs]) |
---|
1662 | ) |
---|
1663 | |
---|
1664 | def disambiguate(items, nr, **kwds): |
---|
1665 | msgs = [] |
---|
1666 | for key, value in kwds.items(): |
---|
1667 | msgs.append("%s=%r" % (key, value)) |
---|
1668 | msg = " ".join(msgs) |
---|
1669 | if not items: |
---|
1670 | raise ItemNotFoundError(msg) |
---|
1671 | if nr is None: |
---|
1672 | if len(items) > 1: |
---|
1673 | raise AmbiguityError(msg) |
---|
1674 | nr = 0 |
---|
1675 | if len(items) <= nr: |
---|
1676 | raise ItemNotFoundError(msg) |
---|
1677 | return items[nr] |
---|
1678 | |
---|
1679 | class ListControl(Control): |
---|
1680 | """Control representing a sequence of items. |
---|
1681 | |
---|
1682 | The value attribute of a ListControl represents the successful list items |
---|
1683 | in the control. The successful list items are those that are selected and |
---|
1684 | not disabled. |
---|
1685 | |
---|
1686 | ListControl implements both list controls that take a length-1 value |
---|
1687 | (single-selection) and those that take length >1 values |
---|
1688 | (multiple-selection). |
---|
1689 | |
---|
1690 | ListControls accept sequence values only. Some controls only accept |
---|
1691 | sequences of length 0 or 1 (RADIO, and single-selection SELECT). |
---|
1692 | In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes |
---|
1693 | and multiple-selection SELECTs (those having the "multiple" HTML attribute) |
---|
1694 | accept sequences of any length. |
---|
1695 | |
---|
1696 | Note the following mistake: |
---|
1697 | |
---|
1698 | control.value = some_value |
---|
1699 | assert control.value == some_value # not necessarily true |
---|
1700 | |
---|
1701 | The reason for this is that the value attribute always gives the list items |
---|
1702 | in the order they were listed in the HTML. |
---|
1703 | |
---|
1704 | ListControl items can also be referred to by their labels instead of names. |
---|
1705 | Use the label argument to .get(), and the .set_value_by_label(), |
---|
1706 | .get_value_by_label() methods. |
---|
1707 | |
---|
1708 | Note that, rather confusingly, though SELECT controls are represented in |
---|
1709 | HTML by SELECT elements (which contain OPTION elements, representing |
---|
1710 | individual list items), CHECKBOXes and RADIOs are not represented by *any* |
---|
1711 | element. Instead, those controls are represented by a collection of INPUT |
---|
1712 | elements. For example, this is a SELECT control, named "control1": |
---|
1713 | |
---|
1714 | <select name="control1"> |
---|
1715 | <option>foo</option> |
---|
1716 | <option value="1">bar</option> |
---|
1717 | </select> |
---|
1718 | |
---|
1719 | and this is a CHECKBOX control, named "control2": |
---|
1720 | |
---|
1721 | <input type="checkbox" name="control2" value="foo" id="cbe1"> |
---|
1722 | <input type="checkbox" name="control2" value="bar" id="cbe2"> |
---|
1723 | |
---|
1724 | The id attribute of a CHECKBOX or RADIO ListControl is always that of its |
---|
1725 | first element (for example, "cbe1" above). |
---|
1726 | |
---|
1727 | |
---|
1728 | Additional read-only public attribute: multiple. |
---|
1729 | |
---|
1730 | """ |
---|
1731 | |
---|
1732 | # ListControls are built up by the parser from their component items by |
---|
1733 | # creating one ListControl per item, consolidating them into a single |
---|
1734 | # master ListControl held by the HTMLForm: |
---|
1735 | |
---|
1736 | # -User calls form.new_control(...) |
---|
1737 | # -Form creates Control, and calls control.add_to_form(self). |
---|
1738 | # -Control looks for a Control with the same name and type in the form, |
---|
1739 | # and if it finds one, merges itself with that control by calling |
---|
1740 | # control.merge_control(self). The first Control added to the form, of |
---|
1741 | # a particular name and type, is the only one that survives in the |
---|
1742 | # form. |
---|
1743 | # -Form calls control.fixup for all its controls. ListControls in the |
---|
1744 | # form know they can now safely pick their default values. |
---|
1745 | |
---|
1746 | # To create a ListControl without an HTMLForm, use: |
---|
1747 | |
---|
1748 | # control.merge_control(new_control) |
---|
1749 | |
---|
1750 | # (actually, it's much easier just to use ParseFile) |
---|
1751 | |
---|
1752 | _label = None |
---|
1753 | |
---|
1754 | def __init__(self, type, name, attrs={}, select_default=False, |
---|
1755 | called_as_base_class=False, index=None): |
---|
1756 | """ |
---|
1757 | select_default: for RADIO and multiple-selection SELECT controls, pick |
---|
1758 | the first item as the default if no 'selected' HTML attribute is |
---|
1759 | present |
---|
1760 | |
---|
1761 | """ |
---|
1762 | if not called_as_base_class: |
---|
1763 | raise NotImplementedError() |
---|
1764 | |
---|
1765 | self.__dict__["type"] = type.lower() |
---|
1766 | self.__dict__["name"] = name |
---|
1767 | self._value = attrs.get("value") |
---|
1768 | self.disabled = False |
---|
1769 | self.readonly = False |
---|
1770 | self.id = attrs.get("id") |
---|
1771 | |
---|
1772 | # As Controls are merged in with .merge_control(), self.attrs will |
---|
1773 | # refer to each Control in turn -- always the most recently merged |
---|
1774 | # control. Each merged-in Control instance corresponds to a single |
---|
1775 | # list item: see ListControl.__doc__. |
---|
1776 | self.items = [] |
---|
1777 | self._form = None |
---|
1778 | |
---|
1779 | self._select_default = select_default |
---|
1780 | self._clicked = False |
---|
1781 | |
---|
1782 | def clear(self): |
---|
1783 | self.value = [] |
---|
1784 | |
---|
1785 | def is_of_kind(self, kind): |
---|
1786 | if kind == "list": |
---|
1787 | return True |
---|
1788 | elif kind == "multilist": |
---|
1789 | return bool(self.multiple) |
---|
1790 | elif kind == "singlelist": |
---|
1791 | return not self.multiple |
---|
1792 | else: |
---|
1793 | return False |
---|
1794 | |
---|
1795 | def get_items(self, name=None, label=None, id=None, |
---|
1796 | exclude_disabled=False): |
---|
1797 | """Return matching items by name or label. |
---|
1798 | |
---|
1799 | For argument docs, see the docstring for .get() |
---|
1800 | |
---|
1801 | """ |
---|
1802 | if name is not None and not isstringlike(name): |
---|
1803 | raise TypeError("item name must be string-like") |
---|
1804 | if label is not None and not isstringlike(label): |
---|
1805 | raise TypeError("item label must be string-like") |
---|
1806 | if id is not None and not isstringlike(id): |
---|
1807 | raise TypeError("item id must be string-like") |
---|
1808 | items = [] # order is important |
---|
1809 | compat = self._form.backwards_compat |
---|
1810 | for o in self.items: |
---|
1811 | if exclude_disabled and o.disabled: |
---|
1812 | continue |
---|
1813 | if name is not None and o.name != name: |
---|
1814 | continue |
---|
1815 | if label is not None: |
---|
1816 | for l in o.get_labels(): |
---|
1817 | if ((compat and l.text == label) or |
---|
1818 | (not compat and l.text.find(label) > -1)): |
---|
1819 | break |
---|
1820 | else: |
---|
1821 | continue |
---|
1822 | if id is not None and o.id != id: |
---|
1823 | continue |
---|
1824 | items.append(o) |
---|
1825 | return items |
---|
1826 | |
---|
1827 | def get(self, name=None, label=None, id=None, nr=None, |
---|
1828 | exclude_disabled=False): |
---|
1829 | """Return item by name or label, disambiguating if necessary with nr. |
---|
1830 | |
---|
1831 | All arguments must be passed by name, with the exception of 'name', |
---|
1832 | which may be used as a positional argument. |
---|
1833 | |
---|
1834 | If name is specified, then the item must have the indicated name. |
---|
1835 | |
---|
1836 | If label is specified, then the item must have a label whose |
---|
1837 | whitespace-compressed, stripped, text substring-matches the indicated |
---|
1838 | label string (eg. label="please choose" will match |
---|
1839 | " Do please choose an item "). |
---|
1840 | |
---|
1841 | If id is specified, then the item must have the indicated id. |
---|
1842 | |
---|
1843 | nr is an optional 0-based index of the items matching the query. |
---|
1844 | |
---|
1845 | If nr is the default None value and more than item is found, raises |
---|
1846 | AmbiguityError (unless the HTMLForm instance's backwards_compat |
---|
1847 | attribute is true). |
---|
1848 | |
---|
1849 | If no item is found, or if items are found but nr is specified and not |
---|
1850 | found, raises ItemNotFoundError. |
---|
1851 | |
---|
1852 | Optionally excludes disabled items. |
---|
1853 | |
---|
1854 | """ |
---|
1855 | if nr is None and self._form.backwards_compat: |
---|
1856 | nr = 0 # :-/ |
---|
1857 | items = self.get_items(name, label, id, exclude_disabled) |
---|
1858 | return disambiguate(items, nr, name=name, label=label, id=id) |
---|
1859 | |
---|
1860 | def _get(self, name, by_label=False, nr=None, exclude_disabled=False): |
---|
1861 | # strictly for use by deprecated methods |
---|
1862 | if by_label: |
---|
1863 | name, label = None, name |
---|
1864 | else: |
---|
1865 | name, label = name, None |
---|
1866 | return self.get(name, label, nr, exclude_disabled) |
---|
1867 | |
---|
1868 | def toggle(self, name, by_label=False, nr=None): |
---|
1869 | """Deprecated: given a name or label and optional disambiguating index |
---|
1870 | nr, toggle the matching item's selection. |
---|
1871 | |
---|
1872 | Selecting items follows the behavior described in the docstring of the |
---|
1873 | 'get' method. |
---|
1874 | |
---|
1875 | if the item is disabled, or this control is disabled or readonly, |
---|
1876 | raise AttributeError. |
---|
1877 | |
---|
1878 | """ |
---|
1879 | deprecation( |
---|
1880 | "item = control.get(...); item.selected = not item.selected") |
---|
1881 | o = self._get(name, by_label, nr) |
---|
1882 | self._set_selected_state(o, not o.selected) |
---|
1883 | |
---|
1884 | def set(self, selected, name, by_label=False, nr=None): |
---|
1885 | """Deprecated: given a name or label and optional disambiguating index |
---|
1886 | nr, set the matching item's selection to the bool value of selected. |
---|
1887 | |
---|
1888 | Selecting items follows the behavior described in the docstring of the |
---|
1889 | 'get' method. |
---|
1890 | |
---|
1891 | if the item is disabled, or this control is disabled or readonly, |
---|
1892 | raise AttributeError. |
---|
1893 | |
---|
1894 | """ |
---|
1895 | deprecation( |
---|
1896 | "control.get(...).selected = <boolean>") |
---|
1897 | self._set_selected_state(self._get(name, by_label, nr), selected) |
---|
1898 | |
---|
1899 | def _set_selected_state(self, item, action): |
---|
1900 | # action: |
---|
1901 | # bool False: off |
---|
1902 | # bool True: on |
---|
1903 | if self.disabled: |
---|
1904 | raise AttributeError("control '%s' is disabled" % self.name) |
---|
1905 | if self.readonly: |
---|
1906 | raise AttributeError("control '%s' is readonly" % self.name) |
---|
1907 | action == bool(action) |
---|
1908 | compat = self._form.backwards_compat |
---|
1909 | if not compat and item.disabled: |
---|
1910 | raise AttributeError("item is disabled") |
---|
1911 | else: |
---|
1912 | if compat and item.disabled and action: |
---|
1913 | raise AttributeError("item is disabled") |
---|
1914 | if self.multiple: |
---|
1915 | item.__dict__["_selected"] = action |
---|
1916 | else: |
---|
1917 | if not action: |
---|
1918 | item.__dict__["_selected"] = False |
---|
1919 | else: |
---|
1920 | for o in self.items: |
---|
1921 | o.__dict__["_selected"] = False |
---|
1922 | item.__dict__["_selected"] = True |
---|
1923 | |
---|
1924 | def toggle_single(self, by_label=None): |
---|
1925 | """Deprecated: toggle the selection of the single item in this control. |
---|
1926 | |
---|
1927 | Raises ItemCountError if the control does not contain only one item. |
---|
1928 | |
---|
1929 | by_label argument is ignored, and included only for backwards |
---|
1930 | compatibility. |
---|
1931 | |
---|
1932 | """ |
---|
1933 | deprecation( |
---|
1934 | "control.items[0].selected = not control.items[0].selected") |
---|
1935 | if len(self.items) != 1: |
---|
1936 | raise ItemCountError( |
---|
1937 | "'%s' is not a single-item control" % self.name) |
---|
1938 | item = self.items[0] |
---|
1939 | self._set_selected_state(item, not item.selected) |
---|
1940 | |
---|
1941 | def set_single(self, selected, by_label=None): |
---|
1942 | """Deprecated: set the selection of the single item in this control. |
---|
1943 | |
---|
1944 | Raises ItemCountError if the control does not contain only one item. |
---|
1945 | |
---|
1946 | by_label argument is ignored, and included only for backwards |
---|
1947 | compatibility. |
---|
1948 | |
---|
1949 | """ |
---|
1950 | deprecation( |
---|
1951 | "control.items[0].selected = <boolean>") |
---|
1952 | if len(self.items) != 1: |
---|
1953 | raise ItemCountError( |
---|
1954 | "'%s' is not a single-item control" % self.name) |
---|
1955 | self._set_selected_state(self.items[0], selected) |
---|
1956 | |
---|
1957 | def get_item_disabled(self, name, by_label=False, nr=None): |
---|
1958 | """Get disabled state of named list item in a ListControl.""" |
---|
1959 | deprecation( |
---|
1960 | "control.get(...).disabled") |
---|
1961 | return self._get(name, by_label, nr).disabled |
---|
1962 | |
---|
1963 | def set_item_disabled(self, disabled, name, by_label=False, nr=None): |
---|
1964 | """Set disabled state of named list item in a ListControl. |
---|
1965 | |
---|
1966 | disabled: boolean disabled state |
---|
1967 | |
---|
1968 | """ |
---|
1969 | deprecation( |
---|
1970 | "control.get(...).disabled = <boolean>") |
---|
1971 | self._get(name, by_label, nr).disabled = disabled |
---|
1972 | |
---|
1973 | def set_all_items_disabled(self, disabled): |
---|
1974 | """Set disabled state of all list items in a ListControl. |
---|
1975 | |
---|
1976 | disabled: boolean disabled state |
---|
1977 | |
---|
1978 | """ |
---|
1979 | for o in self.items: |
---|
1980 | o.disabled = disabled |
---|
1981 | |
---|
1982 | def get_item_attrs(self, name, by_label=False, nr=None): |
---|
1983 | """Return dictionary of HTML attributes for a single ListControl item. |
---|
1984 | |
---|
1985 | The HTML element types that describe list items are: OPTION for SELECT |
---|
1986 | controls, INPUT for the rest. These elements have HTML attributes that |
---|
1987 | you may occasionally want to know about -- for example, the "alt" HTML |
---|
1988 | attribute gives a text string describing the item (graphical browsers |
---|
1989 | usually display this as a tooltip). |
---|
1990 | |
---|
1991 | The returned dictionary maps HTML attribute names to values. The names |
---|
1992 | and values are taken from the original HTML. |
---|
1993 | |
---|
1994 | """ |
---|
1995 | deprecation( |
---|
1996 | "control.get(...).attrs") |
---|
1997 | return self._get(name, by_label, nr).attrs |
---|
1998 | |
---|
1999 | def add_to_form(self, form): |
---|
2000 | assert self._form is None or form == self._form, ( |
---|
2001 | "can't add control to more than one form") |
---|
2002 | self._form = form |
---|
2003 | if self.name is None: |
---|
2004 | # always count nameless elements as separate controls |
---|
2005 | Control.add_to_form(self, form) |
---|
2006 | else: |
---|
2007 | try: |
---|
2008 | control = form.find_control(self.name, self.type) |
---|
2009 | except (ControlNotFoundError, AmbiguityError): |
---|
2010 | Control.add_to_form(self, form) |
---|
2011 | else: |
---|
2012 | control.merge_control(self) |
---|
2013 | |
---|
2014 | def merge_control(self, control): |
---|
2015 | assert bool(control.multiple) == bool(self.multiple) |
---|
2016 | # usually, isinstance(control, self.__class__) |
---|
2017 | self.items.extend(control.items) |
---|
2018 | |
---|
2019 | def fixup(self): |
---|
2020 | """ |
---|
2021 | ListControls are built up from component list items (which are also |
---|
2022 | ListControls) during parsing. This method should be called after all |
---|
2023 | items have been added. See ListControl.__doc__ for the reason this is |
---|
2024 | required. |
---|
2025 | |
---|
2026 | """ |
---|
2027 | # Need to set default selection where no item was indicated as being |
---|
2028 | # selected by the HTML: |
---|
2029 | |
---|
2030 | # CHECKBOX: |
---|
2031 | # Nothing should be selected. |
---|
2032 | # SELECT/single, SELECT/multiple and RADIO: |
---|
2033 | # RFC 1866 (HTML 2.0): says first item should be selected. |
---|
2034 | # W3C HTML 4.01 Specification: says that client behaviour is |
---|
2035 | # undefined in this case. For RADIO, exactly one must be selected, |
---|
2036 | # though which one is undefined. |
---|
2037 | # Both Netscape and Microsoft Internet Explorer (IE) choose first |
---|
2038 | # item for SELECT/single. However, both IE5 and Mozilla (both 1.0 |
---|
2039 | # and Firebird 0.6) leave all items unselected for RADIO and |
---|
2040 | # SELECT/multiple. |
---|
2041 | |
---|
2042 | # Since both Netscape and IE all choose the first item for |
---|
2043 | # SELECT/single, we do the same. OTOH, both Netscape and IE |
---|
2044 | # leave SELECT/multiple with nothing selected, in violation of RFC 1866 |
---|
2045 | # (but not in violation of the W3C HTML 4 standard); the same is true |
---|
2046 | # of RADIO (which *is* in violation of the HTML 4 standard). We follow |
---|
2047 | # RFC 1866 if the _select_default attribute is set, and Netscape and IE |
---|
2048 | # otherwise. RFC 1866 and HTML 4 are always violated insofar as you |
---|
2049 | # can deselect all items in a RadioControl. |
---|
2050 | |
---|
2051 | for o in self.items: |
---|
2052 | # set items' controls to self, now that we've merged |
---|
2053 | o.__dict__["_control"] = self |
---|
2054 | |
---|
2055 | def __getattr__(self, name): |
---|
2056 | if name == "value": |
---|
2057 | compat = self._form.backwards_compat |
---|
2058 | if self.name is None: |
---|
2059 | return [] |
---|
2060 | return [o.name for o in self.items if o.selected and |
---|
2061 | (not o.disabled or compat)] |
---|
2062 | else: |
---|
2063 | raise AttributeError("%s instance has no attribute '%s'" % |
---|
2064 | (self.__class__.__name__, name)) |
---|
2065 | |
---|
2066 | def __setattr__(self, name, value): |
---|
2067 | if name == "value": |
---|
2068 | if self.disabled: |
---|
2069 | raise AttributeError("control '%s' is disabled" % self.name) |
---|
2070 | if self.readonly: |
---|
2071 | raise AttributeError("control '%s' is readonly" % self.name) |
---|
2072 | self._set_value(value) |
---|
2073 | elif name in ("name", "type", "multiple"): |
---|
2074 | raise AttributeError("%s attribute is readonly" % name) |
---|
2075 | else: |
---|
2076 | self.__dict__[name] = value |
---|
2077 | |
---|
2078 | def _set_value(self, value): |
---|
2079 | if value is None or isstringlike(value): |
---|
2080 | raise TypeError("ListControl, must set a sequence") |
---|
2081 | if not value: |
---|
2082 | compat = self._form.backwards_compat |
---|
2083 | for o in self.items: |
---|
2084 | if not o.disabled or compat: |
---|
2085 | o.selected = False |
---|
2086 | elif self.multiple: |
---|
2087 | self._multiple_set_value(value) |
---|
2088 | elif len(value) > 1: |
---|
2089 | raise ItemCountError( |
---|
2090 | "single selection list, must set sequence of " |
---|
2091 | "length 0 or 1") |
---|
2092 | else: |
---|
2093 | self._single_set_value(value) |
---|
2094 | |
---|
2095 | def _get_items(self, name, target=1): |
---|
2096 | all_items = self.get_items(name) |
---|
2097 | items = [o for o in all_items if not o.disabled] |
---|
2098 | if len(items) < target: |
---|
2099 | if len(all_items) < target: |
---|
2100 | raise ItemNotFoundError( |
---|
2101 | "insufficient items with name %r" % name) |
---|
2102 | else: |
---|
2103 | raise AttributeError( |
---|
2104 | "insufficient non-disabled items with name %s" % name) |
---|
2105 | on = [] |
---|
2106 | off = [] |
---|
2107 | for o in items: |
---|
2108 | if o.selected: |
---|
2109 | on.append(o) |
---|
2110 | else: |
---|
2111 | off.append(o) |
---|
2112 | return on, off |
---|
2113 | |
---|
2114 | def _single_set_value(self, value): |
---|
2115 | assert len(value) == 1 |
---|
2116 | on, off = self._get_items(value[0]) |
---|
2117 | assert len(on) <= 1 |
---|
2118 | if not on: |
---|
2119 | off[0].selected = True |
---|
2120 | |
---|
2121 | def _multiple_set_value(self, value): |
---|
2122 | compat = self._form.backwards_compat |
---|
2123 | turn_on = [] # transactional-ish |
---|
2124 | turn_off = [item for item in self.items if |
---|
2125 | item.selected and (not item.disabled or compat)] |
---|
2126 | names = {} |
---|
2127 | for nn in value: |
---|
2128 | if nn in names.keys(): |
---|
2129 | names[nn] += 1 |
---|
2130 | else: |
---|
2131 | names[nn] = 1 |
---|
2132 | for name, count in names.items(): |
---|
2133 | on, off = self._get_items(name, count) |
---|
2134 | for i in range(count): |
---|
2135 | if on: |
---|
2136 | item = on[0] |
---|
2137 | del on[0] |
---|
2138 | del turn_off[turn_off.index(item)] |
---|
2139 | else: |
---|
2140 | item = off[0] |
---|
2141 | del off[0] |
---|
2142 | turn_on.append(item) |
---|
2143 | for item in turn_off: |
---|
2144 | item.selected = False |
---|
2145 | for item in turn_on: |
---|
2146 | item.selected = True |
---|
2147 | |
---|
2148 | def set_value_by_label(self, value): |
---|
2149 | """Set the value of control by item labels. |
---|
2150 | |
---|
2151 | value is expected to be an iterable of strings that are substrings of |
---|
2152 | the item labels that should be selected. Before substring matching is |
---|
2153 | performed, the original label text is whitespace-compressed |
---|
2154 | (consecutive whitespace characters are converted to a single space |
---|
2155 | character) and leading and trailing whitespace is stripped. Ambiguous |
---|
2156 | labels are accepted without complaint if the form's backwards_compat is |
---|
2157 | True; otherwise, it will not complain as long as all ambiguous labels |
---|
2158 | share the same item name (e.g. OPTION value). |
---|
2159 | |
---|
2160 | """ |
---|
2161 | if isstringlike(value): |
---|
2162 | raise TypeError(value) |
---|
2163 | if not self.multiple and len(value) > 1: |
---|
2164 | raise ItemCountError( |
---|
2165 | "single selection list, must set sequence of " |
---|
2166 | "length 0 or 1") |
---|
2167 | items = [] |
---|
2168 | for nn in value: |
---|
2169 | found = self.get_items(label=nn) |
---|
2170 | if len(found) > 1: |
---|
2171 | if not self._form.backwards_compat: |
---|
2172 | # ambiguous labels are fine as long as item names (e.g. |
---|
2173 | # OPTION values) are same |
---|
2174 | opt_name = found[0].name |
---|
2175 | if [o for o in found[1:] if o.name != opt_name]: |
---|
2176 | raise AmbiguityError(nn) |
---|
2177 | else: |
---|
2178 | # OK, we'll guess :-( Assume first available item. |
---|
2179 | found = found[:1] |
---|
2180 | for o in found: |
---|
2181 | # For the multiple-item case, we could try to be smarter, |
---|
2182 | # saving them up and trying to resolve, but that's too much. |
---|
2183 | if self._form.backwards_compat or o not in items: |
---|
2184 | items.append(o) |
---|
2185 | break |
---|
2186 | else: # all of them are used |
---|
2187 | raise ItemNotFoundError(nn) |
---|
2188 | # now we have all the items that should be on |
---|
2189 | # let's just turn everything off and then back on. |
---|
2190 | self.value = [] |
---|
2191 | for o in items: |
---|
2192 | o.selected = True |
---|
2193 | |
---|
2194 | def get_value_by_label(self): |
---|
2195 | """Return the value of the control as given by normalized labels.""" |
---|
2196 | res = [] |
---|
2197 | compat = self._form.backwards_compat |
---|
2198 | for o in self.items: |
---|
2199 | if (not o.disabled or compat) and o.selected: |
---|
2200 | for l in o.get_labels(): |
---|
2201 | if l.text: |
---|
2202 | res.append(l.text) |
---|
2203 | break |
---|
2204 | else: |
---|
2205 | res.append(None) |
---|
2206 | return res |
---|
2207 | |
---|
2208 | def possible_items(self, by_label=False): |
---|
2209 | """Deprecated: return the names or labels of all possible items. |
---|
2210 | |
---|
2211 | Includes disabled items, which may be misleading for some use cases. |
---|
2212 | |
---|
2213 | """ |
---|
2214 | deprecation( |
---|
2215 | "[item.name for item in self.items]") |
---|
2216 | if by_label: |
---|
2217 | res = [] |
---|
2218 | for o in self.items: |
---|
2219 | for l in o.get_labels(): |
---|
2220 | if l.text: |
---|
2221 | res.append(l.text) |
---|
2222 | break |
---|
2223 | else: |
---|
2224 | res.append(None) |
---|
2225 | return res |
---|
2226 | return [o.name for o in self.items] |
---|
2227 | |
---|
2228 | def _totally_ordered_pairs(self): |
---|
2229 | if self.disabled or self.name is None: |
---|
2230 | return [] |
---|
2231 | else: |
---|
2232 | return [(o._index, self.name, o.name) for o in self.items |
---|
2233 | if o.selected and not o.disabled] |
---|
2234 | |
---|
2235 | def __str__(self): |
---|
2236 | name = self.name |
---|
2237 | if name is None: name = "<None>" |
---|
2238 | |
---|
2239 | display = [str(o) for o in self.items] |
---|
2240 | |
---|
2241 | infos = [] |
---|
2242 | if self.disabled: infos.append("disabled") |
---|
2243 | if self.readonly: infos.append("readonly") |
---|
2244 | info = ", ".join(infos) |
---|
2245 | if info: info = " (%s)" % info |
---|
2246 | |
---|
2247 | return "<%s(%s=[%s])%s>" % (self.__class__.__name__, |
---|
2248 | name, ", ".join(display), info) |
---|
2249 | |
---|
2250 | |
---|
2251 | class RadioControl(ListControl): |
---|
2252 | """ |
---|
2253 | Covers: |
---|
2254 | |
---|
2255 | INPUT/RADIO |
---|
2256 | |
---|
2257 | """ |
---|
2258 | def __init__(self, type, name, attrs, select_default=False, index=None): |
---|
2259 | attrs.setdefault("value", "on") |
---|
2260 | ListControl.__init__(self, type, name, attrs, select_default, |
---|
2261 | called_as_base_class=True, index=index) |
---|
2262 | self.__dict__["multiple"] = False |
---|
2263 | o = Item(self, attrs, index) |
---|
2264 | o.__dict__["_selected"] = attrs.has_key("checked") |
---|
2265 | |
---|
2266 | def fixup(self): |
---|
2267 | ListControl.fixup(self) |
---|
2268 | found = [o for o in self.items if o.selected and not o.disabled] |
---|
2269 | if not found: |
---|
2270 | if self._select_default: |
---|
2271 | for o in self.items: |
---|
2272 | if not o.disabled: |
---|
2273 | o.selected = True |
---|
2274 | break |
---|
2275 | else: |
---|
2276 | # Ensure only one item selected. Choose the last one, |
---|
2277 | # following IE and Firefox. |
---|
2278 | for o in found[:-1]: |
---|
2279 | o.selected = False |
---|
2280 | |
---|
2281 | def get_labels(self): |
---|
2282 | return [] |
---|
2283 | |
---|
2284 | class CheckboxControl(ListControl): |
---|
2285 | """ |
---|
2286 | Covers: |
---|
2287 | |
---|
2288 | INPUT/CHECKBOX |
---|
2289 | |
---|
2290 | """ |
---|
2291 | def __init__(self, type, name, attrs, select_default=False, index=None): |
---|
2292 | attrs.setdefault("value", "on") |
---|
2293 | ListControl.__init__(self, type, name, attrs, select_default, |
---|
2294 | called_as_base_class=True, index=index) |
---|
2295 | self.__dict__["multiple"] = True |
---|
2296 | o = Item(self, attrs, index) |
---|
2297 | o.__dict__["_selected"] = attrs.has_key("checked") |
---|
2298 | |
---|
2299 | def get_labels(self): |
---|
2300 | return [] |
---|
2301 | |
---|
2302 | |
---|
2303 | class SelectControl(ListControl): |
---|
2304 | """ |
---|
2305 | Covers: |
---|
2306 | |
---|
2307 | SELECT (and OPTION) |
---|
2308 | |
---|
2309 | |
---|
2310 | OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance. |
---|
2311 | |
---|
2312 | SELECT control values and labels are subject to some messy defaulting |
---|
2313 | rules. For example, if the HTML representation of the control is: |
---|
2314 | |
---|
2315 | <SELECT name=year> |
---|
2316 | <OPTION value=0 label="2002">current year</OPTION> |
---|
2317 | <OPTION value=1>2001</OPTION> |
---|
2318 | <OPTION>2000</OPTION> |
---|
2319 | </SELECT> |
---|
2320 | |
---|
2321 | The items, in order, have labels "2002", "2001" and "2000", whereas their |
---|
2322 | names (the OPTION values) are "0", "1" and "2000" respectively. Note that |
---|
2323 | the value of the last OPTION in this example defaults to its contents, as |
---|
2324 | specified by RFC 1866, as do the labels of the second and third OPTIONs. |
---|
2325 | |
---|
2326 | The OPTION labels are sometimes more meaningful than the OPTION values, |
---|
2327 | which can make for more maintainable code. |
---|
2328 | |
---|
2329 | Additional read-only public attribute: attrs |
---|
2330 | |
---|
2331 | The attrs attribute is a dictionary of the original HTML attributes of the |
---|
2332 | SELECT element. Other ListControls do not have this attribute, because in |
---|
2333 | other cases the control as a whole does not correspond to any single HTML |
---|
2334 | element. control.get(...).attrs may be used as usual to get at the HTML |
---|
2335 | attributes of the HTML elements corresponding to individual list items (for |
---|
2336 | SELECT controls, these are OPTION elements). |
---|
2337 | |
---|
2338 | Another special case is that the Item.attrs dictionaries have a special key |
---|
2339 | "contents" which does not correspond to any real HTML attribute, but rather |
---|
2340 | contains the contents of the OPTION element: |
---|
2341 | |
---|
2342 | <OPTION>this bit</OPTION> |
---|
2343 | |
---|
2344 | """ |
---|
2345 | # HTML attributes here are treated slightly differently from other list |
---|
2346 | # controls: |
---|
2347 | # -The SELECT HTML attributes dictionary is stuffed into the OPTION |
---|
2348 | # HTML attributes dictionary under the "__select" key. |
---|
2349 | # -The content of each OPTION element is stored under the special |
---|
2350 | # "contents" key of the dictionary. |
---|
2351 | # After all this, the dictionary is passed to the SelectControl constructor |
---|
2352 | # as the attrs argument, as usual. However: |
---|
2353 | # -The first SelectControl constructed when building up a SELECT control |
---|
2354 | # has a constructor attrs argument containing only the __select key -- so |
---|
2355 | # this SelectControl represents an empty SELECT control. |
---|
2356 | # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and |
---|
2357 | # the __select dictionary containing the SELECT HTML-attributes. |
---|
2358 | |
---|
2359 | def __init__(self, type, name, attrs, select_default=False, index=None): |
---|
2360 | # fish out the SELECT HTML attributes from the OPTION HTML attributes |
---|
2361 | # dictionary |
---|
2362 | self.attrs = attrs["__select"].copy() |
---|
2363 | self.__dict__["_label"] = _get_label(self.attrs) |
---|
2364 | self.__dict__["id"] = self.attrs.get("id") |
---|
2365 | self.__dict__["multiple"] = self.attrs.has_key("multiple") |
---|
2366 | # the majority of the contents, label, and value dance already happened |
---|
2367 | contents = attrs.get("contents") |
---|
2368 | attrs = attrs.copy() |
---|
2369 | del attrs["__select"] |
---|
2370 | |
---|
2371 | ListControl.__init__(self, type, name, self.attrs, select_default, |
---|
2372 | called_as_base_class=True, index=index) |
---|
2373 | self.disabled = self.attrs.has_key("disabled") |
---|
2374 | self.readonly = self.attrs.has_key("readonly") |
---|
2375 | if attrs.has_key("value"): |
---|
2376 | # otherwise it is a marker 'select started' token |
---|
2377 | o = Item(self, attrs, index) |
---|
2378 | o.__dict__["_selected"] = attrs.has_key("selected") |
---|
2379 | # add 'label' label and contents label, if different. If both are |
---|
2380 | # provided, the 'label' label is used for display in HTML |
---|
2381 | # 4.0-compliant browsers (and any lower spec? not sure) while the |
---|
2382 | # contents are used for display in older or less-compliant |
---|
2383 | # browsers. We make label objects for both, if the values are |
---|
2384 | # different. |
---|
2385 | label = attrs.get("label") |
---|
2386 | if label: |
---|
2387 | o._labels.append(Label({"__text": label})) |
---|
2388 | if contents and contents != label: |
---|
2389 | o._labels.append(Label({"__text": contents})) |
---|
2390 | elif contents: |
---|
2391 | o._labels.append(Label({"__text": contents})) |
---|
2392 | |
---|
2393 | def fixup(self): |
---|
2394 | ListControl.fixup(self) |
---|
2395 | # Firefox doesn't exclude disabled items from those considered here |
---|
2396 | # (i.e. from 'found', for both branches of the if below). Note that |
---|
2397 | # IE6 doesn't support the disabled attribute on OPTIONs at all. |
---|
2398 | found = [o for o in self.items if o.selected] |
---|
2399 | if not found: |
---|
2400 | if not self.multiple or self._select_default: |
---|
2401 | for o in self.items: |
---|
2402 | if not o.disabled: |
---|
2403 | was_disabled = self.disabled |
---|
2404 | self.disabled = False |
---|
2405 | try: |
---|
2406 | o.selected = True |
---|
2407 | finally: |
---|
2408 | o.disabled = was_disabled |
---|
2409 | break |
---|
2410 | elif not self.multiple: |
---|
2411 | # Ensure only one item selected. Choose the last one, |
---|
2412 | # following IE and Firefox. |
---|
2413 | for o in found[:-1]: |
---|
2414 | o.selected = False |
---|
2415 | |
---|
2416 | |
---|
2417 | #--------------------------------------------------- |
---|
2418 | class SubmitControl(ScalarControl): |
---|
2419 | """ |
---|
2420 | Covers: |
---|
2421 | |
---|
2422 | INPUT/SUBMIT |
---|
2423 | BUTTON/SUBMIT |
---|
2424 | |
---|
2425 | """ |
---|
2426 | def __init__(self, type, name, attrs, index=None): |
---|
2427 | ScalarControl.__init__(self, type, name, attrs, index) |
---|
2428 | # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it |
---|
2429 | # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem |
---|
2430 | # to define this. |
---|
2431 | if self.value is None: self.value = "" |
---|
2432 | self.readonly = True |
---|
2433 | |
---|
2434 | def get_labels(self): |
---|
2435 | res = [] |
---|
2436 | if self.value: |
---|
2437 | res.append(Label({"__text": self.value})) |
---|
2438 | res.extend(ScalarControl.get_labels(self)) |
---|
2439 | return res |
---|
2440 | |
---|
2441 | def is_of_kind(self, kind): return kind == "clickable" |
---|
2442 | |
---|
2443 | def _click(self, form, coord, return_type, request_class=urllib2.Request): |
---|
2444 | self._clicked = coord |
---|
2445 | r = form._switch_click(return_type, request_class) |
---|
2446 | self._clicked = False |
---|
2447 | return r |
---|
2448 | |
---|
2449 | def _totally_ordered_pairs(self): |
---|
2450 | if not self._clicked: |
---|
2451 | return [] |
---|
2452 | return ScalarControl._totally_ordered_pairs(self) |
---|
2453 | |
---|
2454 | |
---|
2455 | #--------------------------------------------------- |
---|
2456 | class ImageControl(SubmitControl): |
---|
2457 | """ |
---|
2458 | Covers: |
---|
2459 | |
---|
2460 | INPUT/IMAGE |
---|
2461 | |
---|
2462 | Coordinates are specified using one of the HTMLForm.click* methods. |
---|
2463 | |
---|
2464 | """ |
---|
2465 | def __init__(self, type, name, attrs, index=None): |
---|
2466 | SubmitControl.__init__(self, type, name, attrs, index) |
---|
2467 | self.readonly = False |
---|
2468 | |
---|
2469 | def _totally_ordered_pairs(self): |
---|
2470 | clicked = self._clicked |
---|
2471 | if self.disabled or not clicked: |
---|
2472 | return [] |
---|
2473 | name = self.name |
---|
2474 | if name is None: return [] |
---|
2475 | pairs = [ |
---|
2476 | (self._index, "%s.x" % name, str(clicked[0])), |
---|
2477 | (self._index+1, "%s.y" % name, str(clicked[1])), |
---|
2478 | ] |
---|
2479 | value = self._value |
---|
2480 | if value: |
---|
2481 | pairs.append((self._index+2, name, value)) |
---|
2482 | return pairs |
---|
2483 | |
---|
2484 | get_labels = ScalarControl.get_labels |
---|
2485 | |
---|
2486 | # aliases, just to make str(control) and str(form) clearer |
---|
2487 | class PasswordControl(TextControl): pass |
---|
2488 | class HiddenControl(TextControl): pass |
---|
2489 | class TextareaControl(TextControl): pass |
---|
2490 | class SubmitButtonControl(SubmitControl): pass |
---|
2491 | |
---|
2492 | |
---|
2493 | def is_listcontrol(control): return control.is_of_kind("list") |
---|
2494 | |
---|
2495 | |
---|
2496 | class HTMLForm: |
---|
2497 | """Represents a single HTML <form> ... </form> element. |
---|
2498 | |
---|
2499 | A form consists of a sequence of controls that usually have names, and |
---|
2500 | which can take on various values. The values of the various types of |
---|
2501 | controls represent variously: text, zero-or-one-of-many or many-of-many |
---|
2502 | choices, and files to be uploaded. Some controls can be clicked on to |
---|
2503 | submit the form, and clickable controls' values sometimes include the |
---|
2504 | coordinates of the click. |
---|
2505 | |
---|
2506 | Forms can be filled in with data to be returned to the server, and then |
---|
2507 | submitted, using the click method to generate a request object suitable for |
---|
2508 | passing to urllib2.urlopen (or the click_request_data or click_pairs |
---|
2509 | methods if you're not using urllib2). |
---|
2510 | |
---|
2511 | import ClientForm |
---|
2512 | forms = ClientForm.ParseFile(html, base_uri) |
---|
2513 | form = forms[0] |
---|
2514 | |
---|
2515 | form["query"] = "Python" |
---|
2516 | form.find_control("nr_results").get("lots").selected = True |
---|
2517 | |
---|
2518 | response = urllib2.urlopen(form.click()) |
---|
2519 | |
---|
2520 | Usually, HTMLForm instances are not created directly. Instead, the |
---|
2521 | ParseFile or ParseResponse factory functions are used. If you do construct |
---|
2522 | HTMLForm objects yourself, however, note that an HTMLForm instance is only |
---|
2523 | properly initialised after the fixup method has been called (ParseFile and |
---|
2524 | ParseResponse do this for you). See ListControl.__doc__ for the reason |
---|
2525 | this is required. |
---|
2526 | |
---|
2527 | Indexing a form (form["control_name"]) returns the named Control's value |
---|
2528 | attribute. Assignment to a form index (form["control_name"] = something) |
---|
2529 | is equivalent to assignment to the named Control's value attribute. If you |
---|
2530 | need to be more specific than just supplying the control's name, use the |
---|
2531 | set_value and get_value methods. |
---|
2532 | |
---|
2533 | ListControl values are lists of item names (specifically, the names of the |
---|
2534 | items that are selected and not disabled, and hence are "successful" -- ie. |
---|
2535 | cause data to be returned to the server). The list item's name is the |
---|
2536 | value of the corresponding HTML element's"value" attribute. |
---|
2537 | |
---|
2538 | Example: |
---|
2539 | |
---|
2540 | <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT> |
---|
2541 | <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT> |
---|
2542 | |
---|
2543 | defines a CHECKBOX control with name "cheeses" which has two items, named |
---|
2544 | "leicester" and "cheddar". |
---|
2545 | |
---|
2546 | Another example: |
---|
2547 | |
---|
2548 | <SELECT name="more_cheeses"> |
---|
2549 | <OPTION>1</OPTION> |
---|
2550 | <OPTION value="2" label="CHEDDAR">cheddar</OPTION> |
---|
2551 | </SELECT> |
---|
2552 | |
---|
2553 | defines a SELECT control with name "more_cheeses" which has two items, |
---|
2554 | named "1" and "2" (because the OPTION element's value HTML attribute |
---|
2555 | defaults to the element contents -- see SelectControl.__doc__ for more on |
---|
2556 | these defaulting rules). |
---|
2557 | |
---|
2558 | To select, deselect or otherwise manipulate individual list items, use the |
---|
2559 | HTMLForm.find_control() and ListControl.get() methods. To set the whole |
---|
2560 | value, do as for any other control: use indexing or the set_/get_value |
---|
2561 | methods. |
---|
2562 | |
---|
2563 | Example: |
---|
2564 | |
---|
2565 | # select *only* the item named "cheddar" |
---|
2566 | form["cheeses"] = ["cheddar"] |
---|
2567 | # select "cheddar", leave other items unaffected |
---|
2568 | form.find_control("cheeses").get("cheddar").selected = True |
---|
2569 | |
---|
2570 | Some controls (RADIO and SELECT without the multiple attribute) can only |
---|
2571 | have zero or one items selected at a time. Some controls (CHECKBOX and |
---|
2572 | SELECT with the multiple attribute) can have multiple items selected at a |
---|
2573 | time. To set the whole value of a ListControl, assign a sequence to a form |
---|
2574 | index: |
---|
2575 | |
---|
2576 | form["cheeses"] = ["cheddar", "leicester"] |
---|
2577 | |
---|
2578 | If the ListControl is not multiple-selection, the assigned list must be of |
---|
2579 | length one. |
---|
2580 | |
---|
2581 | To check if a control has an item, if an item is selected, or if an item is |
---|
2582 | successful (selected and not disabled), respectively: |
---|
2583 | |
---|
2584 | "cheddar" in [item.name for item in form.find_control("cheeses").items] |
---|
2585 | "cheddar" in [item.name for item in form.find_control("cheeses").items and |
---|
2586 | item.selected] |
---|
2587 | "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses")) |
---|
2588 | |
---|
2589 | Note that some list items may be disabled (see below). |
---|
2590 | |
---|
2591 | Note the following mistake: |
---|
2592 | |
---|
2593 | form[control_name] = control_value |
---|
2594 | assert form[control_name] == control_value # not necessarily true |
---|
2595 | |
---|
2596 | The reason for this is that form[control_name] always gives the list items |
---|
2597 | in the order they were listed in the HTML. |
---|
2598 | |
---|
2599 | List items (hence list values, too) can be referred to in terms of list |
---|
2600 | item labels rather than list item names using the appropriate label |
---|
2601 | arguments. Note that each item may have several labels. |
---|
2602 | |
---|
2603 | The question of default values of OPTION contents, labels and values is |
---|
2604 | somewhat complicated: see SelectControl.__doc__ and |
---|
2605 | ListControl.get_item_attrs.__doc__ if you think you need to know. |
---|
2606 | |
---|
2607 | Controls can be disabled or readonly. In either case, the control's value |
---|
2608 | cannot be changed until you clear those flags (see example below). |
---|
2609 | Disabled is the state typically represented by browsers by 'greying out' a |
---|
2610 | control. Disabled controls are not 'successful' -- they don't cause data |
---|
2611 | to get returned to the server. Readonly controls usually appear in |
---|
2612 | browsers as read-only text boxes. Readonly controls are successful. List |
---|
2613 | items can also be disabled. Attempts to select or deselect disabled items |
---|
2614 | fail with AttributeError. |
---|
2615 | |
---|
2616 | If a lot of controls are readonly, it can be useful to do this: |
---|
2617 | |
---|
2618 | form.set_all_readonly(False) |
---|
2619 | |
---|
2620 | To clear a control's value attribute, so that it is not successful (until a |
---|
2621 | value is subsequently set): |
---|
2622 | |
---|
2623 | form.clear("cheeses") |
---|
2624 | |
---|
2625 | More examples: |
---|
2626 | |
---|
2627 | control = form.find_control("cheeses") |
---|
2628 | control.disabled = False |
---|
2629 | control.readonly = False |
---|
2630 | control.get("gruyere").disabled = True |
---|
2631 | control.items[0].selected = True |
---|
2632 | |
---|
2633 | See the various Control classes for further documentation. Many methods |
---|
2634 | take name, type, kind, id, label and nr arguments to specify the control to |
---|
2635 | be operated on: see HTMLForm.find_control.__doc__. |
---|
2636 | |
---|
2637 | ControlNotFoundError (subclass of ValueError) is raised if the specified |
---|
2638 | control can't be found. This includes occasions where a non-ListControl |
---|
2639 | is found, but the method (set, for example) requires a ListControl. |
---|
2640 | ItemNotFoundError (subclass of ValueError) is raised if a list item can't |
---|
2641 | be found. ItemCountError (subclass of ValueError) is raised if an attempt |
---|
2642 | is made to select more than one item and the control doesn't allow that, or |
---|
2643 | set/get_single are called and the control contains more than one item. |
---|
2644 | AttributeError is raised if a control or item is readonly or disabled and |
---|
2645 | an attempt is made to alter its value. |
---|
2646 | |
---|
2647 | Security note: Remember that any passwords you store in HTMLForm instances |
---|
2648 | will be saved to disk in the clear if you pickle them (directly or |
---|
2649 | indirectly). The simplest solution to this is to avoid pickling HTMLForm |
---|
2650 | objects. You could also pickle before filling in any password, or just set |
---|
2651 | the password to "" before pickling. |
---|
2652 | |
---|
2653 | |
---|
2654 | Public attributes: |
---|
2655 | |
---|
2656 | action: full (absolute URI) form action |
---|
2657 | method: "GET" or "POST" |
---|
2658 | enctype: form transfer encoding MIME type |
---|
2659 | name: name of form (None if no name was specified) |
---|
2660 | attrs: dictionary mapping original HTML form attributes to their values |
---|
2661 | |
---|
2662 | controls: list of Control instances; do not alter this list |
---|
2663 | (instead, call form.new_control to make a Control and add it to the |
---|
2664 | form, or control.add_to_form if you already have a Control instance) |
---|
2665 | |
---|
2666 | |
---|
2667 | |
---|
2668 | Methods for form filling: |
---|
2669 | ------------------------- |
---|
2670 | |
---|
2671 | Most of the these methods have very similar arguments. See |
---|
2672 | HTMLForm.find_control.__doc__ for details of the name, type, kind, label |
---|
2673 | and nr arguments. |
---|
2674 | |
---|
2675 | def find_control(self, |
---|
2676 | name=None, type=None, kind=None, id=None, predicate=None, |
---|
2677 | nr=None, label=None) |
---|
2678 | |
---|
2679 | get_value(name=None, type=None, kind=None, id=None, nr=None, |
---|
2680 | by_label=False, # by_label is deprecated |
---|
2681 | label=None) |
---|
2682 | set_value(value, |
---|
2683 | name=None, type=None, kind=None, id=None, nr=None, |
---|
2684 | by_label=False, # by_label is deprecated |
---|
2685 | label=None) |
---|
2686 | |
---|
2687 | clear_all() |
---|
2688 | clear(name=None, type=None, kind=None, id=None, nr=None, label=None) |
---|
2689 | |
---|
2690 | set_all_readonly(readonly) |
---|
2691 | |
---|
2692 | |
---|
2693 | Method applying only to FileControls: |
---|
2694 | |
---|
2695 | add_file(file_object, |
---|
2696 | content_type="application/octet-stream", filename=None, |
---|
2697 | name=None, id=None, nr=None, label=None) |
---|
2698 | |
---|
2699 | |
---|
2700 | Methods applying only to clickable controls: |
---|
2701 | |
---|
2702 | click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) |
---|
2703 | click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1), |
---|
2704 | label=None) |
---|
2705 | click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None) |
---|
2706 | |
---|
2707 | """ |
---|
2708 | |
---|
2709 | type2class = { |
---|
2710 | "text": TextControl, |
---|
2711 | "password": PasswordControl, |
---|
2712 | "hidden": HiddenControl, |
---|
2713 | "textarea": TextareaControl, |
---|
2714 | |
---|
2715 | "isindex": IsindexControl, |
---|
2716 | |
---|
2717 | "file": FileControl, |
---|
2718 | |
---|
2719 | "button": IgnoreControl, |
---|
2720 | "buttonbutton": IgnoreControl, |
---|
2721 | "reset": IgnoreControl, |
---|
2722 | "resetbutton": IgnoreControl, |
---|
2723 | |
---|
2724 | "submit": SubmitControl, |
---|
2725 | "submitbutton": SubmitButtonControl, |
---|
2726 | "image": ImageControl, |
---|
2727 | |
---|
2728 | "radio": RadioControl, |
---|
2729 | "checkbox": CheckboxControl, |
---|
2730 | "select": SelectControl, |
---|
2731 | } |
---|
2732 | |
---|
2733 | #--------------------------------------------------- |
---|
2734 | # Initialisation. Use ParseResponse / ParseFile instead. |
---|
2735 | |
---|
2736 | def __init__(self, action, method="GET", |
---|
2737 | enctype="application/x-www-form-urlencoded", |
---|
2738 | name=None, attrs=None, |
---|
2739 | request_class=urllib2.Request, |
---|
2740 | forms=None, labels=None, id_to_labels=None, |
---|
2741 | backwards_compat=True): |
---|
2742 | """ |
---|
2743 | In the usual case, use ParseResponse (or ParseFile) to create new |
---|
2744 | HTMLForm objects. |
---|
2745 | |
---|
2746 | action: full (absolute URI) form action |
---|
2747 | method: "GET" or "POST" |
---|
2748 | enctype: form transfer encoding MIME type |
---|
2749 | name: name of form |
---|
2750 | attrs: dictionary mapping original HTML form attributes to their values |
---|
2751 | |
---|
2752 | """ |
---|
2753 | self.action = action |
---|
2754 | self.method = method |
---|
2755 | self.enctype = enctype |
---|
2756 | self.name = name |
---|
2757 | if attrs is not None: |
---|
2758 | self.attrs = attrs.copy() |
---|
2759 | else: |
---|
2760 | self.attrs = {} |
---|
2761 | self.controls = [] |
---|
2762 | self._request_class = request_class |
---|
2763 | |
---|
2764 | # these attributes are used by zope.testbrowser |
---|
2765 | self._forms = forms # this is a semi-public API! |
---|
2766 | self._labels = labels # this is a semi-public API! |
---|
2767 | self._id_to_labels = id_to_labels # this is a semi-public API! |
---|
2768 | |
---|
2769 | self.backwards_compat = backwards_compat # note __setattr__ |
---|
2770 | |
---|
2771 | self._urlunparse = urlparse.urlunparse |
---|
2772 | self._urlparse = urlparse.urlparse |
---|
2773 | |
---|
2774 | def __getattr__(self, name): |
---|
2775 | if name == "backwards_compat": |
---|
2776 | return self._backwards_compat |
---|
2777 | return getattr(HTMLForm, name) |
---|
2778 | |
---|
2779 | def __setattr__(self, name, value): |
---|
2780 | # yuck |
---|
2781 | if name == "backwards_compat": |
---|
2782 | name = "_backwards_compat" |
---|
2783 | value = bool(value) |
---|
2784 | for cc in self.controls: |
---|
2785 | try: |
---|
2786 | items = cc.items |
---|
2787 | except AttributeError: |
---|
2788 | continue |
---|
2789 | else: |
---|
2790 | for ii in items: |
---|
2791 | for ll in ii.get_labels(): |
---|
2792 | ll._backwards_compat = value |
---|
2793 | self.__dict__[name] = value |
---|
2794 | |
---|
2795 | def new_control(self, type, name, attrs, |
---|
2796 | ignore_unknown=False, select_default=False, index=None): |
---|
2797 | """Adds a new control to the form. |
---|
2798 | |
---|
2799 | This is usually called by ParseFile and ParseResponse. Don't call it |
---|
2800 | youself unless you're building your own Control instances. |
---|
2801 | |
---|
2802 | Note that controls representing lists of items are built up from |
---|
2803 | controls holding only a single list item. See ListControl.__doc__ for |
---|
2804 | further information. |
---|
2805 | |
---|
2806 | type: type of control (see Control.__doc__ for a list) |
---|
2807 | attrs: HTML attributes of control |
---|
2808 | ignore_unknown: if true, use a dummy Control instance for controls of |
---|
2809 | unknown type; otherwise, use a TextControl |
---|
2810 | select_default: for RADIO and multiple-selection SELECT controls, pick |
---|
2811 | the first item as the default if no 'selected' HTML attribute is |
---|
2812 | present (this defaulting happens when the HTMLForm.fixup method is |
---|
2813 | called) |
---|
2814 | index: index of corresponding element in HTML (see |
---|
2815 | MoreFormTests.test_interspersed_controls for motivation) |
---|
2816 | |
---|
2817 | """ |
---|
2818 | type = type.lower() |
---|
2819 | klass = self.type2class.get(type) |
---|
2820 | if klass is None: |
---|
2821 | if ignore_unknown: |
---|
2822 | klass = IgnoreControl |
---|
2823 | else: |
---|
2824 | klass = TextControl |
---|
2825 | |
---|
2826 | a = attrs.copy() |
---|
2827 | if issubclass(klass, ListControl): |
---|
2828 | control = klass(type, name, a, select_default, index) |
---|
2829 | else: |
---|
2830 | control = klass(type, name, a, index) |
---|
2831 | control.add_to_form(self) |
---|
2832 | control._urlparse = self._urlparse |
---|
2833 | control._urlunparse = self._urlunparse |
---|
2834 | |
---|
2835 | def fixup(self): |
---|
2836 | """Normalise form after all controls have been added. |
---|
2837 | |
---|
2838 | This is usually called by ParseFile and ParseResponse. Don't call it |
---|
2839 | youself unless you're building your own Control instances. |
---|
2840 | |
---|
2841 | This method should only be called once, after all controls have been |
---|
2842 | added to the form. |
---|
2843 | |
---|
2844 | """ |
---|
2845 | for control in self.controls: |
---|
2846 | control.fixup() |
---|
2847 | self.backwards_compat = self._backwards_compat |
---|
2848 | |
---|
2849 | #--------------------------------------------------- |
---|
2850 | def __str__(self): |
---|
2851 | header = "%s%s %s %s" % ( |
---|
2852 | (self.name and self.name+" " or ""), |
---|
2853 | self.method, self.action, self.enctype) |
---|
2854 | rep = [header] |
---|
2855 | for control in self.controls: |
---|
2856 | rep.append(" %s" % str(control)) |
---|
2857 | return "<%s>" % "\n".join(rep) |
---|
2858 | |
---|
2859 | #--------------------------------------------------- |
---|
2860 | # Form-filling methods. |
---|
2861 | |
---|
2862 | def __getitem__(self, name): |
---|
2863 | return self.find_control(name).value |
---|
2864 | def __contains__(self, name): |
---|
2865 | return bool(self.find_control(name)) |
---|
2866 | def __setitem__(self, name, value): |
---|
2867 | control = self.find_control(name) |
---|
2868 | try: |
---|
2869 | control.value = value |
---|
2870 | except AttributeError, e: |
---|
2871 | raise ValueError(str(e)) |
---|
2872 | |
---|
2873 | def get_value(self, |
---|
2874 | name=None, type=None, kind=None, id=None, nr=None, |
---|
2875 | by_label=False, # by_label is deprecated |
---|
2876 | label=None): |
---|
2877 | """Return value of control. |
---|
2878 | |
---|
2879 | If only name and value arguments are supplied, equivalent to |
---|
2880 | |
---|
2881 | form[name] |
---|
2882 | |
---|
2883 | """ |
---|
2884 | if by_label: |
---|
2885 | deprecation("form.get_value_by_label(...)") |
---|
2886 | c = self.find_control(name, type, kind, id, label=label, nr=nr) |
---|
2887 | if by_label: |
---|
2888 | try: |
---|
2889 | meth = c.get_value_by_label |
---|
2890 | except AttributeError: |
---|
2891 | raise NotImplementedError( |
---|
2892 | "control '%s' does not yet support by_label" % c.name) |
---|
2893 | else: |
---|
2894 | return meth() |
---|
2895 | else: |
---|
2896 | return c.value |
---|
2897 | def set_value(self, value, |
---|
2898 | name=None, type=None, kind=None, id=None, nr=None, |
---|
2899 | by_label=False, # by_label is deprecated |
---|
2900 | label=None): |
---|
2901 | """Set value of control. |
---|
2902 | |
---|
2903 | If only name and value arguments are supplied, equivalent to |
---|
2904 | |
---|
2905 | form[name] = value |
---|
2906 | |
---|
2907 | """ |
---|
2908 | if by_label: |
---|
2909 | deprecation("form.get_value_by_label(...)") |
---|
2910 | c = self.find_control(name, type, kind, id, label=label, nr=nr) |
---|
2911 | if by_label: |
---|
2912 | try: |
---|
2913 | meth = c.set_value_by_label |
---|
2914 | except AttributeError: |
---|
2915 | raise NotImplementedError( |
---|
2916 | "control '%s' does not yet support by_label" % c.name) |
---|
2917 | else: |
---|
2918 | meth(value) |
---|
2919 | else: |
---|
2920 | c.value = value |
---|
2921 | def get_value_by_label( |
---|
2922 | self, name=None, type=None, kind=None, id=None, label=None, nr=None): |
---|
2923 | """ |
---|
2924 | |
---|
2925 | All arguments should be passed by name. |
---|
2926 | |
---|
2927 | """ |
---|
2928 | c = self.find_control(name, type, kind, id, label=label, nr=nr) |
---|
2929 | return c.get_value_by_label() |
---|
2930 | |
---|
2931 | def set_value_by_label( |
---|
2932 | self, value, |
---|
2933 | name=None, type=None, kind=None, id=None, label=None, nr=None): |
---|
2934 | """ |
---|
2935 | |
---|
2936 | All arguments should be passed by name. |
---|
2937 | |
---|
2938 | """ |
---|
2939 | c = self.find_control(name, type, kind, id, label=label, nr=nr) |
---|
2940 | c.set_value_by_label(value) |
---|
2941 | |
---|
2942 | def set_all_readonly(self, readonly): |
---|
2943 | for control in self.controls: |
---|
2944 | control.readonly = bool(readonly) |
---|
2945 | |
---|
2946 | def clear_all(self): |
---|
2947 | """Clear the value attributes of all controls in the form. |
---|
2948 | |
---|
2949 | See HTMLForm.clear.__doc__. |
---|
2950 | |
---|
2951 | """ |
---|
2952 | for control in self.controls: |
---|
2953 | control.clear() |
---|
2954 | |
---|
2955 | def clear(self, |
---|
2956 | name=None, type=None, kind=None, id=None, nr=None, label=None): |
---|
2957 | """Clear the value attribute of a control. |
---|
2958 | |
---|
2959 | As a result, the affected control will not be successful until a value |
---|
2960 | is subsequently set. AttributeError is raised on readonly controls. |
---|
2961 | |
---|
2962 | """ |
---|
2963 | c = self.find_control(name, type, kind, id, label=label, nr=nr) |
---|
2964 | c.clear() |
---|
2965 | |
---|
2966 | |
---|
2967 | #--------------------------------------------------- |
---|
2968 | # Form-filling methods applying only to ListControls. |
---|
2969 | |
---|
2970 | def possible_items(self, # deprecated |
---|
2971 | name=None, type=None, kind=None, id=None, |
---|
2972 | nr=None, by_label=False, label=None): |
---|
2973 | """Return a list of all values that the specified control can take.""" |
---|
2974 | c = self._find_list_control(name, type, kind, id, label, nr) |
---|
2975 | return c.possible_items(by_label) |
---|
2976 | |
---|
2977 | def set(self, selected, item_name, # deprecated |
---|
2978 | name=None, type=None, kind=None, id=None, nr=None, |
---|
2979 | by_label=False, label=None): |
---|
2980 | """Select / deselect named list item. |
---|
2981 | |
---|
2982 | selected: boolean selected state |
---|
2983 | |
---|
2984 | """ |
---|
2985 | self._find_list_control(name, type, kind, id, label, nr).set( |
---|
2986 | selected, item_name, by_label) |
---|
2987 | def toggle(self, item_name, # deprecated |
---|
2988 | name=None, type=None, kind=None, id=None, nr=None, |
---|
2989 | by_label=False, label=None): |
---|
2990 | """Toggle selected state of named list item.""" |
---|
2991 | self._find_list_control(name, type, kind, id, label, nr).toggle( |
---|
2992 | item_name, by_label) |
---|
2993 | |
---|
2994 | def set_single(self, selected, # deprecated |
---|
2995 | name=None, type=None, kind=None, id=None, |
---|
2996 | nr=None, by_label=None, label=None): |
---|
2997 | """Select / deselect list item in a control having only one item. |
---|
2998 | |
---|
2999 | If the control has multiple list items, ItemCountError is raised. |
---|
3000 | |
---|
3001 | This is just a convenience method, so you don't need to know the item's |
---|
3002 | name -- the item name in these single-item controls is usually |
---|
3003 | something meaningless like "1" or "on". |
---|
3004 | |
---|
3005 | For example, if a checkbox has a single item named "on", the following |
---|
3006 | two calls are equivalent: |
---|
3007 | |
---|
3008 | control.toggle("on") |
---|
3009 | control.toggle_single() |
---|
3010 | |
---|
3011 | """ # by_label ignored and deprecated |
---|
3012 | self._find_list_control( |
---|
3013 | name, type, kind, id, label, nr).set_single(selected) |
---|
3014 | def toggle_single(self, name=None, type=None, kind=None, id=None, |
---|
3015 | nr=None, by_label=None, label=None): # deprecated |
---|
3016 | """Toggle selected state of list item in control having only one item. |
---|
3017 | |
---|
3018 | The rest is as for HTMLForm.set_single.__doc__. |
---|
3019 | |
---|
3020 | """ # by_label ignored and deprecated |
---|
3021 | self._find_list_control(name, type, kind, id, label, nr).toggle_single() |
---|
3022 | |
---|
3023 | #--------------------------------------------------- |
---|
3024 | # Form-filling method applying only to FileControls. |
---|
3025 | |
---|
3026 | def add_file(self, file_object, content_type=None, filename=None, |
---|
3027 | name=None, id=None, nr=None, label=None): |
---|
3028 | """Add a file to be uploaded. |
---|
3029 | |
---|
3030 | file_object: file-like object (with read method) from which to read |
---|
3031 | data to upload |
---|
3032 | content_type: MIME content type of data to upload |
---|
3033 | filename: filename to pass to server |
---|
3034 | |
---|
3035 | If filename is None, no filename is sent to the server. |
---|
3036 | |
---|
3037 | If content_type is None, the content type is guessed based on the |
---|
3038 | filename and the data from read from the file object. |
---|
3039 | |
---|
3040 | XXX |
---|
3041 | At the moment, guessed content type is always application/octet-stream. |
---|
3042 | Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and |
---|
3043 | plain text. |
---|
3044 | |
---|
3045 | Note the following useful HTML attributes of file upload controls (see |
---|
3046 | HTML 4.01 spec, section 17): |
---|
3047 | |
---|
3048 | accept: comma-separated list of content types that the server will |
---|
3049 | handle correctly; you can use this to filter out non-conforming files |
---|
3050 | size: XXX IIRC, this is indicative of whether form wants multiple or |
---|
3051 | single files |
---|
3052 | maxlength: XXX hint of max content length in bytes? |
---|
3053 | |
---|
3054 | """ |
---|
3055 | self.find_control(name, "file", id=id, label=label, nr=nr).add_file( |
---|
3056 | file_object, content_type, filename) |
---|
3057 | |
---|
3058 | #--------------------------------------------------- |
---|
3059 | # Form submission methods, applying only to clickable controls. |
---|
3060 | |
---|
3061 | def click(self, name=None, type=None, id=None, nr=0, coord=(1,1), |
---|
3062 | request_class=urllib2.Request, |
---|
3063 | label=None): |
---|
3064 | """Return request that would result from clicking on a control. |
---|
3065 | |
---|
3066 | The request object is a urllib2.Request instance, which you can pass to |
---|
3067 | urllib2.urlopen (or ClientCookie.urlopen). |
---|
3068 | |
---|
3069 | Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and |
---|
3070 | IMAGEs) can be clicked. |
---|
3071 | |
---|
3072 | Will click on the first clickable control, subject to the name, type |
---|
3073 | and nr arguments (as for find_control). If no name, type, id or number |
---|
3074 | is specified and there are no clickable controls, a request will be |
---|
3075 | returned for the form in its current, un-clicked, state. |
---|
3076 | |
---|
3077 | IndexError is raised if any of name, type, id or nr is specified but no |
---|
3078 | matching control is found. ValueError is raised if the HTMLForm has an |
---|
3079 | enctype attribute that is not recognised. |
---|
3080 | |
---|
3081 | You can optionally specify a coordinate to click at, which only makes a |
---|
3082 | difference if you clicked on an image. |
---|
3083 | |
---|
3084 | """ |
---|
3085 | return self._click(name, type, id, label, nr, coord, "request", |
---|
3086 | self._request_class) |
---|
3087 | |
---|
3088 | def click_request_data(self, |
---|
3089 | name=None, type=None, id=None, |
---|
3090 | nr=0, coord=(1,1), |
---|
3091 | request_class=urllib2.Request, |
---|
3092 | label=None): |
---|
3093 | """As for click method, but return a tuple (url, data, headers). |
---|
3094 | |
---|
3095 | You can use this data to send a request to the server. This is useful |
---|
3096 | if you're using httplib or urllib rather than urllib2. Otherwise, use |
---|
3097 | the click method. |
---|
3098 | |
---|
3099 | # Untested. Have to subclass to add headers, I think -- so use urllib2 |
---|
3100 | # instead! |
---|
3101 | import urllib |
---|
3102 | url, data, hdrs = form.click_request_data() |
---|
3103 | r = urllib.urlopen(url, data) |
---|
3104 | |
---|
3105 | # Untested. I don't know of any reason to use httplib -- you can get |
---|
3106 | # just as much control with urllib2. |
---|
3107 | import httplib, urlparse |
---|
3108 | url, data, hdrs = form.click_request_data() |
---|
3109 | tup = urlparse(url) |
---|
3110 | host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:]) |
---|
3111 | conn = httplib.HTTPConnection(host) |
---|
3112 | if data: |
---|
3113 | httplib.request("POST", path, data, hdrs) |
---|
3114 | else: |
---|
3115 | httplib.request("GET", path, headers=hdrs) |
---|
3116 | r = conn.getresponse() |
---|
3117 | |
---|
3118 | """ |
---|
3119 | return self._click(name, type, id, label, nr, coord, "request_data", |
---|
3120 | self._request_class) |
---|
3121 | |
---|
3122 | def click_pairs(self, name=None, type=None, id=None, |
---|
3123 | nr=0, coord=(1,1), |
---|
3124 | label=None): |
---|
3125 | """As for click_request_data, but returns a list of (key, value) pairs. |
---|
3126 | |
---|
3127 | You can use this list as an argument to ClientForm.urlencode. This is |
---|
3128 | usually only useful if you're using httplib or urllib rather than |
---|
3129 | urllib2 or ClientCookie. It may also be useful if you want to manually |
---|
3130 | tweak the keys and/or values, but this should not be necessary. |
---|
3131 | Otherwise, use the click method. |
---|
3132 | |
---|
3133 | Note that this method is only useful for forms of MIME type |
---|
3134 | x-www-form-urlencoded. In particular, it does not return the |
---|
3135 | information required for file upload. If you need file upload and are |
---|
3136 | not using urllib2, use click_request_data. |
---|
3137 | |
---|
3138 | Also note that Python 2.0's urllib.urlencode is slightly broken: it |
---|
3139 | only accepts a mapping, not a sequence of pairs, as an argument. This |
---|
3140 | messes up any ordering in the argument. Use ClientForm.urlencode |
---|
3141 | instead. |
---|
3142 | |
---|
3143 | """ |
---|
3144 | return self._click(name, type, id, label, nr, coord, "pairs", |
---|
3145 | self._request_class) |
---|
3146 | |
---|
3147 | #--------------------------------------------------- |
---|
3148 | |
---|
3149 | def find_control(self, |
---|
3150 | name=None, type=None, kind=None, id=None, |
---|
3151 | predicate=None, nr=None, |
---|
3152 | label=None): |
---|
3153 | """Locate and return some specific control within the form. |
---|
3154 | |
---|
3155 | At least one of the name, type, kind, predicate and nr arguments must |
---|
3156 | be supplied. If no matching control is found, ControlNotFoundError is |
---|
3157 | raised. |
---|
3158 | |
---|
3159 | If name is specified, then the control must have the indicated name. |
---|
3160 | |
---|
3161 | If type is specified then the control must have the specified type (in |
---|
3162 | addition to the types possible for <input> HTML tags: "text", |
---|
3163 | "password", "hidden", "submit", "image", "button", "radio", "checkbox", |
---|
3164 | "file" we also have "reset", "buttonbutton", "submitbutton", |
---|
3165 | "resetbutton", "textarea", "select" and "isindex"). |
---|
3166 | |
---|
3167 | If kind is specified, then the control must fall into the specified |
---|
3168 | group, each of which satisfies a particular interface. The types are |
---|
3169 | "text", "list", "multilist", "singlelist", "clickable" and "file". |
---|
3170 | |
---|
3171 | If id is specified, then the control must have the indicated id. |
---|
3172 | |
---|
3173 | If predicate is specified, then the control must match that function. |
---|
3174 | The predicate function is passed the control as its single argument, |
---|
3175 | and should return a boolean value indicating whether the control |
---|
3176 | matched. |
---|
3177 | |
---|
3178 | nr, if supplied, is the sequence number of the control (where 0 is the |
---|
3179 | first). Note that control 0 is the first control matching all the |
---|
3180 | other arguments (if supplied); it is not necessarily the first control |
---|
3181 | in the form. If no nr is supplied, AmbiguityError is raised if |
---|
3182 | multiple controls match the other arguments (unless the |
---|
3183 | .backwards-compat attribute is true). |
---|
3184 | |
---|
3185 | If label is specified, then the control must have this label. Note |
---|
3186 | that radio controls and checkboxes never have labels: their items do. |
---|
3187 | |
---|
3188 | """ |
---|
3189 | if ((name is None) and (type is None) and (kind is None) and |
---|
3190 | (id is None) and (label is None) and (predicate is None) and |
---|
3191 | (nr is None)): |
---|
3192 | raise ValueError( |
---|
3193 | "at least one argument must be supplied to specify control") |
---|
3194 | return self._find_control(name, type, kind, id, label, predicate, nr) |
---|
3195 | |
---|
3196 | #--------------------------------------------------- |
---|
3197 | # Private methods. |
---|
3198 | |
---|
3199 | def _find_list_control(self, |
---|
3200 | name=None, type=None, kind=None, id=None, |
---|
3201 | label=None, nr=None): |
---|
3202 | if ((name is None) and (type is None) and (kind is None) and |
---|
3203 | (id is None) and (label is None) and (nr is None)): |
---|
3204 | raise ValueError( |
---|
3205 | "at least one argument must be supplied to specify control") |
---|
3206 | |
---|
3207 | return self._find_control(name, type, kind, id, label, |
---|
3208 | is_listcontrol, nr) |
---|
3209 | |
---|
3210 | def _find_control(self, name, type, kind, id, label, predicate, nr): |
---|
3211 | if ((name is not None) and (name is not Missing) and |
---|
3212 | not isstringlike(name)): |
---|
3213 | raise TypeError("control name must be string-like") |
---|
3214 | if (type is not None) and not isstringlike(type): |
---|
3215 | raise TypeError("control type must be string-like") |
---|
3216 | if (kind is not None) and not isstringlike(kind): |
---|
3217 | raise TypeError("control kind must be string-like") |
---|
3218 | if (id is not None) and not isstringlike(id): |
---|
3219 | raise TypeError("control id must be string-like") |
---|
3220 | if (label is not None) and not isstringlike(label): |
---|
3221 | raise TypeError("control label must be string-like") |
---|
3222 | if (predicate is not None) and not callable(predicate): |
---|
3223 | raise TypeError("control predicate must be callable") |
---|
3224 | if (nr is not None) and nr < 0: |
---|
3225 | raise ValueError("control number must be a positive integer") |
---|
3226 | |
---|
3227 | orig_nr = nr |
---|
3228 | found = None |
---|
3229 | ambiguous = False |
---|
3230 | if nr is None and self.backwards_compat: |
---|
3231 | nr = 0 |
---|
3232 | |
---|
3233 | for control in self.controls: |
---|
3234 | if ((name is not None and name != control.name) and |
---|
3235 | (name is not Missing or control.name is not None)): |
---|
3236 | continue |
---|
3237 | if type is not None and type != control.type: |
---|
3238 | continue |
---|
3239 | if kind is not None and not control.is_of_kind(kind): |
---|
3240 | continue |
---|
3241 | if id is not None and id != control.id: |
---|
3242 | continue |
---|
3243 | if predicate and not predicate(control): |
---|
3244 | continue |
---|
3245 | if label: |
---|
3246 | for l in control.get_labels(): |
---|
3247 | if l.text.find(label) > -1: |
---|
3248 | break |
---|
3249 | else: |
---|
3250 | continue |
---|
3251 | if nr is not None: |
---|
3252 | if nr == 0: |
---|
3253 | return control # early exit: unambiguous due to nr |
---|
3254 | nr -= 1 |
---|
3255 | continue |
---|
3256 | if found: |
---|
3257 | ambiguous = True |
---|
3258 | break |
---|
3259 | found = control |
---|
3260 | |
---|
3261 | if found and not ambiguous: |
---|
3262 | return found |
---|
3263 | |
---|
3264 | description = [] |
---|
3265 | if name is not None: description.append("name %s" % repr(name)) |
---|
3266 | if type is not None: description.append("type '%s'" % type) |
---|
3267 | if kind is not None: description.append("kind '%s'" % kind) |
---|
3268 | if id is not None: description.append("id '%s'" % id) |
---|
3269 | if label is not None: description.append("label '%s'" % label) |
---|
3270 | if predicate is not None: |
---|
3271 | description.append("predicate %s" % predicate) |
---|
3272 | if orig_nr: description.append("nr %d" % orig_nr) |
---|
3273 | description = ", ".join(description) |
---|
3274 | |
---|
3275 | if ambiguous: |
---|
3276 | raise AmbiguityError("more than one control matching "+description) |
---|
3277 | elif not found: |
---|
3278 | raise ControlNotFoundError("no control matching "+description) |
---|
3279 | assert False |
---|
3280 | |
---|
3281 | def _click(self, name, type, id, label, nr, coord, return_type, |
---|
3282 | request_class=urllib2.Request): |
---|
3283 | try: |
---|
3284 | control = self._find_control( |
---|
3285 | name, type, "clickable", id, label, None, nr) |
---|
3286 | except ControlNotFoundError: |
---|
3287 | if ((name is not None) or (type is not None) or (id is not None) or |
---|
3288 | (nr != 0)): |
---|
3289 | raise |
---|
3290 | # no clickable controls, but no control was explicitly requested, |
---|
3291 | # so return state without clicking any control |
---|
3292 | return self._switch_click(return_type, request_class) |
---|
3293 | else: |
---|
3294 | return control._click(self, coord, return_type, request_class) |
---|
3295 | |
---|
3296 | def _pairs(self): |
---|
3297 | """Return sequence of (key, value) pairs suitable for urlencoding.""" |
---|
3298 | return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()] |
---|
3299 | |
---|
3300 | |
---|
3301 | def _pairs_and_controls(self): |
---|
3302 | """Return sequence of (index, key, value, control_index) |
---|
3303 | of totally ordered pairs suitable for urlencoding. |
---|
3304 | |
---|
3305 | control_index is the index of the control in self.controls |
---|
3306 | """ |
---|
3307 | pairs = [] |
---|
3308 | for control_index in range(len(self.controls)): |
---|
3309 | control = self.controls[control_index] |
---|
3310 | for ii, key, val in control._totally_ordered_pairs(): |
---|
3311 | pairs.append((ii, key, val, control_index)) |
---|
3312 | |
---|
3313 | # stable sort by ONLY first item in tuple |
---|
3314 | pairs.sort() |
---|
3315 | |
---|
3316 | return pairs |
---|
3317 | |
---|
3318 | def _request_data(self): |
---|
3319 | """Return a tuple (url, data, headers).""" |
---|
3320 | method = self.method.upper() |
---|
3321 | #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action) |
---|
3322 | parts = self._urlparse(self.action) |
---|
3323 | rest, (query, frag) = parts[:-2], parts[-2:] |
---|
3324 | |
---|
3325 | if method == "GET": |
---|
3326 | if self.enctype != "application/x-www-form-urlencoded": |
---|
3327 | raise ValueError( |
---|
3328 | "unknown GET form encoding type '%s'" % self.enctype) |
---|
3329 | parts = rest + (urlencode(self._pairs()), None) |
---|
3330 | uri = self._urlunparse(parts) |
---|
3331 | return uri, None, [] |
---|
3332 | elif method == "POST": |
---|
3333 | parts = rest + (query, None) |
---|
3334 | uri = self._urlunparse(parts) |
---|
3335 | if self.enctype == "application/x-www-form-urlencoded": |
---|
3336 | return (uri, urlencode(self._pairs()), |
---|
3337 | [("Content-type", self.enctype)]) |
---|
3338 | elif self.enctype == "multipart/form-data": |
---|
3339 | data = StringIO() |
---|
3340 | http_hdrs = [] |
---|
3341 | mw = MimeWriter(data, http_hdrs) |
---|
3342 | f = mw.startmultipartbody("form-data", add_to_http_hdrs=True, |
---|
3343 | prefix=0) |
---|
3344 | for ii, k, v, control_index in self._pairs_and_controls(): |
---|
3345 | self.controls[control_index]._write_mime_data(mw, k, v) |
---|
3346 | mw.lastpart() |
---|
3347 | return uri, data.getvalue(), http_hdrs |
---|
3348 | else: |
---|
3349 | raise ValueError( |
---|
3350 | "unknown POST form encoding type '%s'" % self.enctype) |
---|
3351 | else: |
---|
3352 | raise ValueError("Unknown method '%s'" % method) |
---|
3353 | |
---|
3354 | def _switch_click(self, return_type, request_class=urllib2.Request): |
---|
3355 | # This is called by HTMLForm and clickable Controls to hide switching |
---|
3356 | # on return_type. |
---|
3357 | if return_type == "pairs": |
---|
3358 | return self._pairs() |
---|
3359 | elif return_type == "request_data": |
---|
3360 | return self._request_data() |
---|
3361 | else: |
---|
3362 | req_data = self._request_data() |
---|
3363 | req = request_class(req_data[0], req_data[1]) |
---|
3364 | for key, val in req_data[2]: |
---|
3365 | add_hdr = req.add_header |
---|
3366 | if key.lower() == "content-type": |
---|
3367 | try: |
---|
3368 | add_hdr = req.add_unredirected_header |
---|
3369 | except AttributeError: |
---|
3370 | # pre-2.4 and not using ClientCookie |
---|
3371 | pass |
---|
3372 | add_hdr(key, val) |
---|
3373 | return req |
---|