root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/extensions/check_links.py

リビジョン 3, 5.4 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Extension functions to check all of the links on a page.
3
4Usage:
5
6   check_links [ <pattern> ]
7
8Make sure that all of the HTTP links on the current page can be visited
9successfully.  If 'pattern' is given, check only URLs that match that
10regular expression.
11
12If option 'check_links.only_collect_bad_links' is on, then all bad
13links are silently collected across all calls to check_links.  The
14function 'report_bad_links' can then be used to report all of the links,
15together with their referring pages.
16"""
17
18__all__ = ['check_links', 'report_bad_links']
19
20DEBUG=True
21
22import re
23from twill import commands
24from twill.errors import TwillAssertionError
25
26### first, set up config options & persistent 'bad links' memory...
27
28if commands._options.get('check_links.only_collection_bad_links') is None:
29    commands._options['check_links.only_collect_bad_links'] = False
30
31bad_links_dict = {}
32
33#
34# main function: 'check_links'
35#
36
37def check_links(pattern = '', visited={}):
38    """
39    >> check_links [ <pattern> ]
40
41    Make sure that all of the HTTP links on the current page can be visited
42    with an HTTP response 200 (success).  If 'pattern' is given, interpret
43    it as a regular expression that link URLs must contain in order to be
44    tested, e.g.
45
46        check_links http://.*\.google\.com
47
48    would check only links to google URLs.  Note that because 'follow'
49    is used to visit the pages, the referrer URL is properly set on the
50    visit.
51    """
52    from twill import commands
53
54    if DEBUG:
55        print 'in check_links'
56   
57    OUT = commands.OUT
58    browser = commands.browser
59
60    #
61    # compile the regexp
62    #
63   
64    regexp = None
65    if pattern:
66        regexp = re.compile(pattern)
67
68    #
69    # iterate over all links, collecting those that match.
70    #
71    # note that in the case of duplicate URLs, only one of the
72    # links is actually followed!
73    #
74
75    collected_urls = {}
76
77    links = list(browser._browser.links())
78    if not links:
79        if DEBUG:
80            print>>OUT, "no links to check!?"
81        return
82       
83    for link in links:
84        url = link.absolute_url
85        url = url.split('#', 1)[0]      # get rid of subpage pointers
86
87        if not (url.startswith('http://') or url.startswith('https://')):
88            if DEBUG:
89               print>>OUT, "url '%s' is not an HTTP link; ignoring" % (url,)
90            continue
91
92        if regexp:
93            if regexp.search(url):
94                collected_urls[url] = link
95                if DEBUG:
96                    print>>OUT, "Gathered URL %s -- matched regexp" % (url,)
97            elif DEBUG:
98                print>>OUT, "URL %s doesn't match regexp" % (url,)
99        else:
100            collected_urls[url] = link
101            if DEBUG:
102                print>>OUT, "Gathered URL %s." % (url,)
103
104    #
105    # now, for each unique URL, follow the link. Trap ALL exceptions
106    # as failures.
107    #
108
109    failed = []
110    for link in collected_urls.values():
111        went = False
112        try:
113            if DEBUG:
114                print>>OUT, "Trying %s" % (link.absolute_url,),
115               
116            if not visited.has_key(link.absolute_url):
117                went = True
118                browser.follow_link(link)
119               
120                code = browser.get_code()
121                assert code == 200
122
123                visited[link.absolute_url] = 1
124               
125                if DEBUG:
126                    print>>OUT, '...success!'
127            else:
128                if DEBUG:
129                    print>>OUT, ' (already visited successfully)'
130        except:
131            failed.append(link.absolute_url)
132            if DEBUG:
133                print>>OUT, '...failure ;('
134
135        if went:
136            browser.back()
137
138    if failed:
139        if commands._options['check_links.only_collect_bad_links']:
140            for l in failed:
141                refering_pages = bad_links_dict.get(l, [])
142                print '***', browser.get_url()
143                refering_pages.append(browser.get_url())
144                bad_links_dict[l] = refering_pages
145        else:
146            print>>OUT, '\nCould not follow %d links' % (len(failed),)
147            print>>OUT, '\t%s\n' % '\n\t'.join(failed)
148            raise TwillAssertionError("broken links on page")
149
150def report_bad_links(fail_if_exist='+', flush_bad_links='+'):
151    """
152    >> report_bad_links [<fail-if-exist> [<flush-bad-links>]]
153
154    Report all of the links collected across check_links runs (collected
155    if and only if the config option check_links.only_collect_bad_links
156    is set).
157
158    If <fail-if-exist> is false (true by default) then the command will
159    fail after reporting any bad links.
160
161    If <flush-bad-links> is false (true by default) then the list of
162    bad links will be retained across the function call.
163    """
164    global bad_links_dict
165   
166    from twill import utils
167    fail_if_exist = utils.make_boolean(fail_if_exist)
168    flush_bad_links = utils.make_boolean(flush_bad_links)
169
170    from twill import commands
171    OUT = commands.OUT
172
173    if not bad_links_dict:
174        print>>OUT, '\nNo bad links to report.\n'
175    else:
176        print>>OUT, '\nCould not follow %d links' % (len(bad_links_dict),)
177        for page, referers in bad_links_dict.items():
178            err_msg = "\t link '%s' (occurs on: " % (page,)\
179                      + ",".join(referers) + ')'
180            print>>OUT, err_msg
181
182        if flush_bad_links:
183            bad_links_dict = {}
184
185        if fail_if_exist:
186            raise TwillAssertionError("broken links encountered")
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。