root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/utils.py @ 3

リビジョン 3, 13.8 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Various ugly utility functions for twill.
3
4Apart from various simple utility functions, twill's robust parsing
5code is implemented in the ConfigurableParsingFactory class.
6"""
7
8from cStringIO import StringIO
9import os
10import base64
11
12import subprocess
13
14import _mechanize_dist as mechanize
15from _mechanize_dist import ClientForm
16from _mechanize_dist._util import time
17from _mechanize_dist._http import HTTPRefreshProcessor
18from _mechanize_dist import BrowserStateError
19
20class ResultWrapper:
21    """
22    Deal with mechanize/urllib2/whatever results, and present them in a
23    unified form.  Returned by 'journey'-wrapped functions.
24    """
25    def __init__(self, http_code, url, page):
26        if http_code is not None:
27            self.http_code = int(http_code)
28        else:
29            self.http_code = 200
30        self.url = url
31        self.page = page
32
33    def get_url(self):
34        return self.url
35
36    def get_http_code(self):
37        return self.http_code
38
39    def get_page(self):
40        return self.page
41
42def trunc(s, length):
43    """
44    Truncate a string s to length length, by cutting off the last
45    (length-4) characters and replacing them with ' ...'
46    """
47    if not s:
48        return ''
49   
50    if len(s) > length:
51        return s[:length-4] + ' ...'
52   
53    return s
54
55def print_form(n, f, OUT):
56    """
57    Pretty-print the given form, assigned # n.
58    """
59    if f.name:
60        print>>OUT, '\nForm name=%s (#%d)' % (f.name, n + 1)
61    else:
62        print>>OUT, '\nForm #%d' % (n + 1,)
63
64    if f.controls:
65        print>>OUT, "## ## __Name__________________ __Type___ __ID________ __Value__________________"
66
67
68    submit_indices = {}
69    n = 1
70    for c in f.controls:
71        if c.is_of_kind('clickable'):
72            submit_indices[c] = n
73            n += 1
74           
75    clickies = [c for c in f.controls if c.is_of_kind('clickable')]
76    nonclickies = [c for c in f.controls if c not in clickies]
77
78    for n, field in enumerate(f.controls):
79        if hasattr(field, 'items'):
80            items = [ i.name for i in field.items ]
81            value_displayed = "%s of %s" % (field.value, items)
82        else:
83            value_displayed = "%s" % (field.value,)
84
85        if field.is_of_kind('clickable'):
86            submit_index = "%-2s" % (submit_indices[field],)
87        else:
88            submit_index = "  "
89        strings = ("%-2s" % (n + 1,),
90                   submit_index,
91                   "%-24s %-9s" % (trunc(str(field.name), 24),
92                                   trunc(field.type, 9)),
93                   "%-12s" % (trunc(field.id or "(None)", 12),),
94                   trunc(value_displayed, 40),
95                   )
96        for s in strings:
97            print>>OUT, s,
98        print>>OUT, ''
99
100    print ''
101
102def make_boolean(value):
103    """
104    Convert the input value into a boolean like so:
105   
106    >> make_boolean('true')
107    True
108    >> make_boolean('false')
109    False
110    >> make_boolean('1')
111    True
112    >> make_boolean('0')
113    False
114    >> make_boolean('+')
115    True
116    >> make_boolean('-')
117    False
118    """
119    value = str(value)
120    value = value.lower().strip()
121
122    # true/false
123    if value in ('true', 'false'):
124        if value == 'true':
125            return True
126        return False
127
128    # 0/nonzero
129    try:
130        ival = int(value)
131        return bool(ival)
132    except ValueError:
133        pass
134
135    # +/-
136    if value in ('+', '-'):
137        if value == '+':
138            return True
139        return False
140
141    # on/off
142    if value in ('on', 'off'):
143        if value == 'on':
144            return True
145        return False
146
147    raise TwillException("unable to convert '%s' into true/false" % (value,))
148
149def set_form_control_value(control, val):
150    """
151    Helper function to deal with setting form values on checkboxes, lists etc.
152    """
153    if isinstance(control, ClientForm.CheckboxControl):
154        try:
155            checkbox = control.get()
156            checkbox.selected = make_boolean(val)
157            return
158        except ClientForm.AmbiguityError:
159            # if there's more than one checkbox, use the behaviour for
160            # ClientForm.ListControl, below.
161            pass
162           
163    if isinstance(control, ClientForm.ListControl):
164        #
165        # for ListControls (checkboxes, multiselect, etc.) we first need
166        # to find the right *value*.  Then we need to set it +/-.
167        #
168
169        # figure out if we want to *select* it, or if we want to *deselect*
170        # it (flag T/F).  By default (no +/-) select...
171       
172        if val.startswith('-'):
173            val = val[1:]
174            flag = False
175        else:
176            flag = True
177            if val.startswith('+'):
178                val = val[1:]
179
180        # now, select the value.
181
182        try:
183            item = control.get(name=val)
184        except ClientForm.ItemNotFoundError:
185            try:
186                item = control.get(label=val)
187            except ClientForm.AmbiguityError:
188                raise ClientForm.ItemNotFoundError('multiple matches to value/label "%s" in list control' % (val,))
189            except ClientForm.ItemNotFoundError:
190                raise ClientForm.ItemNotFoundError('cannot find value/label "%s" in list control' % (val,))
191
192        if flag:
193            item.selected = 1
194        else:
195            item.selected = 0
196    else:
197        control.value = val
198
199def _all_the_same_submit(matches):
200    """
201    Utility function to check to see if a list of controls all really
202    belong to the same control: for use with checkboxes, hidden, and
203    submit buttons.
204    """
205    name = None
206    value = None
207    for match in matches:
208        if match.type not in ['submit', 'hidden']:
209            return False
210        if name is None:
211            name = match.name
212            value = match.value
213        else:
214            if match.name != name or match.value!= value:
215                return False
216    return True
217
218def _all_the_same_checkbox(matches):
219    """
220    Check whether all these controls are actually the the same
221    checkbox.
222
223    Hidden controls can combine with checkboxes, to allow form
224    processors to ensure a False value is returned even if user
225    does not check the checkbox. Without the hidden control, no
226    value would be returned.
227    """
228    name = None
229    for match in matches:
230        if match.type not in ['checkbox', 'hidden']:
231            return False
232        if name is None:
233            name = match.name
234        else:
235            if match.name != name:
236                return False
237    return True
238
239def unique_match(matches):
240    return len(matches) == 1 or \
241           _all_the_same_checkbox(matches) or \
242           _all_the_same_submit(matches)
243
244#
245# stuff to run 'tidy'...
246#
247
248_tidy_cmd = ["tidy", "-q", "-ashtml"]
249_tidy_exists = True
250
251def run_tidy(html):
252    """
253    Run the 'tidy' command-line program on the given HTML string.
254
255    Return a 2-tuple (output, errors).  (None, None) will be returned if
256    'tidy' doesn't exist or otherwise fails.
257    """
258    global _tidy_cmd, _tidy_exists
259
260    from commands import _options
261    require_tidy = _options.get('require_tidy')
262
263    if not _tidy_exists:
264        if require_tidy:
265            raise TwillException("tidy does not exist and require_tidy is set")
266        return (None, None)
267   
268    #
269    # run the command, if we think it exists
270    #
271   
272    clean_html = None
273    if _tidy_exists:
274        try:
275            process = subprocess.Popen(_tidy_cmd, stdin=subprocess.PIPE,
276                                       stdout=subprocess.PIPE,
277                                       stderr=subprocess.PIPE, bufsize=0,
278                                       shell=False)
279       
280            (stdout, stderr) = process.communicate(html)
281
282            clean_html = stdout
283            errors = stderr
284        except OSError:
285            _tidy_exists = False
286
287    errors = None
288    if require_tidy and clean_html is None:
289        raise TwillException("tidy does not exist and require_tidy is set")
290
291    return (clean_html, errors)
292
293class ConfigurableParsingFactory(mechanize.Factory):
294    """
295    A factory that listens to twill config options regarding parsing.
296
297    First: clean up passed-in HTML using tidy?
298    Second: parse using the regular parser, or BeautifulSoup?
299    Third: should we fail on, or ignore, parse errors?
300    """
301   
302    def __init__(self):
303        self.basic_factory = mechanize.DefaultFactory()
304        self.soup_factory = mechanize.RobustFactory()
305
306        self.set_response(None)
307
308    def set_request_class(self, request_class):
309        self.basic_factory.set_request_class(request_class)
310        self.soup_factory.set_request_class(request_class)
311
312    def set_response(self, response):
313        if not response:
314            self.factory = None
315            self._orig_html = self._html = self._url = None
316            return
317
318        ###
319
320        if self.use_BS():
321            self.factory = self.soup_factory
322        else:
323            self.factory = self.basic_factory
324        cleaned_response = self._cleanup_html(response)
325        self.factory.set_response(cleaned_response)
326
327    def links(self):
328        return self.factory.links()
329   
330    def forms(self):
331        return self.factory.forms()
332
333    def get_global_form(self):
334        return self.factory.global_form
335    global_form = property(get_global_form)
336
337    def _get_title(self):
338        return self.factory.title
339    title = property(_get_title)
340
341    def _get_encoding(self):
342        return self.factory.encoding
343    encoding = property(_get_encoding)
344
345    def _get_is_html(self):
346        return self.factory.is_html
347    is_html = property(_get_is_html)
348
349    def _cleanup_html(self, response):
350        response.seek(0)
351        self._orig_html = response.read()
352        self._url = response.geturl()
353        response.seek(0)
354
355        self._html = self._orig_html
356
357        from twill.commands import _options
358        use_tidy = _options.get('use_tidy')
359        if use_tidy:
360            (new_html, errors) = run_tidy(self._html)
361            if new_html:
362                self._html = new_html
363
364        return mechanize.make_response(self._html, response._headers.items(),
365                                       response._url, response.code,
366                                       response.msg)
367                                       
368    def use_BS(self):
369        from twill.commands import _options
370        flag = _options.get('use_BeautifulSoup')
371
372        return flag
373
374###
375
376class FixedHTTPBasicAuthHandler(mechanize.HTTPBasicAuthHandler):
377    """
378    Fix a bug that exists through Python 2.4 (but NOT in 2.5!)
379    """
380    def retry_http_basic_auth(self, host, req, realm):
381        user,pw = self.passwd.find_user_password(realm, req.get_full_url())
382        # ----------------------------------------------^^^^^^^^^^^^^^^^^^ CTB
383        if pw is not None:
384            raw = "%s:%s" % (user, pw)
385            auth = 'Basic %s' % base64.encodestring(raw).strip()
386            if req.headers.get(self.auth_header, None) == auth:
387                return None
388            req.add_header(self.auth_header, auth)
389            return self.parent.open(req)
390        else:
391            return None
392   
393
394###
395
396_debug_print_refresh = False
397class FunctioningHTTPRefreshProcessor(HTTPRefreshProcessor):
398    """
399    Fix an issue where the 'content' component of the http-equiv=refresh
400    tag may not contain 'url='.  CTB hack.
401    """
402    def http_response(self, request, response):
403        from twill.commands import OUT, _options
404        do_refresh = _options.get('acknowledge_equiv_refresh')
405       
406        code, msg, hdrs = response.code, response.msg, response.info()
407
408        if code == 200 and hdrs.has_key("refresh") and do_refresh:
409            refresh = hdrs.getheaders("refresh")[0]
410           
411            if _debug_print_refresh:
412                print>>OUT, "equiv-refresh DEBUG: code 200, hdrs has 'refresh'"
413                print>>OUT, "equiv-refresh DEBUG: refresh header is", refresh
414               
415            i = refresh.find(";")
416            if i != -1:
417                pause, newurl_spec = refresh[:i], refresh[i+1:]
418                pause = int(pause)
419
420                if _debug_print_refresh:
421                    print>>OUT, "equiv-refresh DEBUG: pause:", pause
422                    print>>OUT, "equiv-refresh DEBUG: new url:", newurl_spec
423               
424                j = newurl_spec.find("=")
425                if j != -1:
426                    newurl = newurl_spec[j+1:]
427                else:
428                    newurl = newurl_spec
429
430                if _debug_print_refresh:
431                    print>>OUT, "equiv-refresh DEBUG: final url:", newurl
432
433                print>>OUT, "Following HTTP-EQUIV=REFRESH to %s" % (newurl,)
434                   
435                if (self.max_time is None) or (pause <= self.max_time):
436                    if pause != 0 and 0:  # CTB hack! ==#  and self.honor_time:
437                        time.sleep(pause)
438                    hdrs["location"] = newurl
439                    # hardcoded http is NOT a bug
440                    response = self.parent.error(
441                        "http", request, response,
442                        "refresh", msg, hdrs)
443
444        return response
445
446    https_response = http_response
447
448####
449
450class HistoryStack(mechanize._mechanize.History):
451    def __len__(self):
452        return len(self._history)
453    def __getitem__(self, i):
454        return self._history[i]
455   
456####
457
458def _is_valid_filename(f):
459    return not (f.endswith('~') or f.endswith('.bak') or f.endswith('.old'))
460
461def gather_filenames(arglist):
462    """
463    Collect script files from within directories.
464    """
465    l = []
466
467    for filename in arglist:
468        if os.path.isdir(filename):
469            thislist = []
470            for (dirpath, dirnames, filenames) in os.walk(filename):
471                for f in filenames:
472                    if _is_valid_filename(f):
473                        f = os.path.join(dirpath, f)
474                        thislist.append(f)
475                       
476            thislist.sort()
477            l.extend(thislist)
478        else:
479            l.append(filename)
480
481    return l
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。