Context Navigation

check_links.py @ 3

リビジョン 3, 5.4 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""
2	Extension functions to check all of the links on a page.
3
4	Usage:
5
6	check_links [ <pattern> ]
7
8	Make sure that all of the HTTP links on the current page can be visited
9	successfully. If 'pattern' is given, check only URLs that match that
10	regular expression.
11
12	If option 'check_links.only_collect_bad_links' is on, then all bad
13	links are silently collected across all calls to check_links. The
14	function 'report_bad_links' can then be used to report all of the links,
15	together with their referring pages.
16	"""
17
18	__all__ = ['check_links', 'report_bad_links']
19
20	DEBUG=True
21
22	import re
23	from twill import commands
24	from twill.errors import TwillAssertionError
25
26	### first, set up config options & persistent 'bad links' memory...
27
28	if commands._options.get('check_links.only_collection_bad_links') is None:
29	commands._options['check_links.only_collect_bad_links'] = False
30
31	bad_links_dict = {}
32
33	#
34	# main function: 'check_links'
35	#
36
37	def check_links(pattern = '', visited={}):
38	"""
39	>> check_links [ <pattern> ]
40
41	Make sure that all of the HTTP links on the current page can be visited
42	with an HTTP response 200 (success). If 'pattern' is given, interpret
43	it as a regular expression that link URLs must contain in order to be
44	tested, e.g.
45
46	check_links http://.*\.google\.com
47
48	would check only links to google URLs. Note that because 'follow'
49	is used to visit the pages, the referrer URL is properly set on the
50	visit.
51	"""
52	from twill import commands
53
54	if DEBUG:
55	print 'in check_links'
56
57	OUT = commands.OUT
58	browser = commands.browser
59
60	#
61	# compile the regexp
62	#
63
64	regexp = None
65	if pattern:
66	regexp = re.compile(pattern)
67
68	#
69	# iterate over all links, collecting those that match.
70	#
71	# note that in the case of duplicate URLs, only one of the
72	# links is actually followed!
73	#
74
75	collected_urls = {}
76
77	links = list(browser._browser.links())
78	if not links:
79	if DEBUG:
80	print>>OUT, "no links to check!?"
81	return
82
83	for link in links:
84	url = link.absolute_url
85	url = url.split('#', 1)[0] # get rid of subpage pointers
86
87	if not (url.startswith('http://') or url.startswith('https://')):
88	if DEBUG:
89	print>>OUT, "url '%s' is not an HTTP link; ignoring" % (url,)
90	continue
91
92	if regexp:
93	if regexp.search(url):
94	collected_urls[url] = link
95	if DEBUG:
96	print>>OUT, "Gathered URL %s -- matched regexp" % (url,)
97	elif DEBUG:
98	print>>OUT, "URL %s doesn't match regexp" % (url,)
99	else:
100	collected_urls[url] = link
101	if DEBUG:
102	print>>OUT, "Gathered URL %s." % (url,)
103
104	#
105	# now, for each unique URL, follow the link. Trap ALL exceptions
106	# as failures.
107	#
108
109	failed = []
110	for link in collected_urls.values():
111	went = False
112	try:
113	if DEBUG:
114	print>>OUT, "Trying %s" % (link.absolute_url,),
115
116	if not visited.has_key(link.absolute_url):
117	went = True
118	browser.follow_link(link)
119
120	code = browser.get_code()
121	assert code == 200
122
123	visited[link.absolute_url] = 1
124
125	if DEBUG:
126	print>>OUT, '...success!'
127	else:
128	if DEBUG:
129	print>>OUT, ' (already visited successfully)'
130	except:
131	failed.append(link.absolute_url)
132	if DEBUG:
133	print>>OUT, '...failure ;('
134
135	if went:
136	browser.back()
137
138	if failed:
139	if commands._options['check_links.only_collect_bad_links']:
140	for l in failed:
141	refering_pages = bad_links_dict.get(l, [])
142	print '***', browser.get_url()
143	refering_pages.append(browser.get_url())
144	bad_links_dict[l] = refering_pages
145	else:
146	print>>OUT, '\nCould not follow %d links' % (len(failed),)
147	print>>OUT, '\t%s\n' % '\n\t'.join(failed)
148	raise TwillAssertionError("broken links on page")
149
150	def report_bad_links(fail_if_exist='+', flush_bad_links='+'):
151	"""
152	>> report_bad_links [<fail-if-exist> [<flush-bad-links>]]
153
154	Report all of the links collected across check_links runs (collected
155	if and only if the config option check_links.only_collect_bad_links
156	is set).
157
158	If <fail-if-exist> is false (true by default) then the command will
159	fail after reporting any bad links.
160
161	If <flush-bad-links> is false (true by default) then the list of
162	bad links will be retained across the function call.
163	"""
164	global bad_links_dict
165
166	from twill import utils
167	fail_if_exist = utils.make_boolean(fail_if_exist)
168	flush_bad_links = utils.make_boolean(flush_bad_links)
169
170	from twill import commands
171	OUT = commands.OUT
172
173	if not bad_links_dict:
174	print>>OUT, '\nNo bad links to report.\n'
175	else:
176	print>>OUT, '\nCould not follow %d links' % (len(bad_links_dict),)
177	for page, referers in bad_links_dict.items():
178	err_msg = "\t link '%s' (occurs on: " % (page,)\
179	+ ",".join(referers) + ')'
180	print>>OUT, err_msg
181
182	if flush_bad_links:
183	bad_links_dict = {}
184
185	if fail_if_exist:
186	raise TwillAssertionError("broken links encountered")

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/twill-0.9-py2.6.egg/twill/extensions/check_links.py @ 3

異なるフォーマットでダウンロード: