Context Navigation

thread.py

リビジョン 3, 3.9 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""
2	Tools for "threading" out specific species from alignments (removing other
3	species and fixing alignment text).
4	"""
5
6	import sys
7	from itertools import *
8	from copy import deepcopy
9
10	def thread( mafs, species ):
11	"""
12	Restrict an list of alignments to a given list of species by:
13
14	1) Removing components for any other species
15	2) Remove any columns containing all gaps
16
17	Example:
18
19	>>> import bx.align.maf
20
21	>>> block1 = bx.align.maf.from_string( '''
22	... a score=4964.0
23	... s hg18.chr10 52686 44 + 135374737 GTGCTAACTTACTGCTCCACAGAAAACATCAATTCTGCTCATGC
24	... s rheMac2.chr20 58163346 43 - 88221753 ATATTATCTTAACATTAAAGA-AGAACAGTAATTCTGGTCATAA
25	... s panTro1.chrUn_random 208115356 44 - 240967748 GTGCTAACTGACTGCTCCAGAGAAAACATCAATTCTGTTCATGT
26	... s oryCun1.scaffold_175207 85970 22 + 212797 ----------------------AAAATATTAGTTATCACCATAT
27	... s bosTau2.chr23 23894492 43 + 41602928 AAACTACCTTAATGTCACAGG-AAACAATGTATgctgctgctgc
28	... ''' )
29
30	>>> block2 = bx.align.maf.from_string( '''
31	... a score=9151.0
32	... s hg18.chr10 52730 69 + 135374737 GCAGGTACAATTCATCAAGAAAG-GAATTACAACTTCAGAAATGTGTTCAAAATATATCCATACTT-TGAC
33	... s oryCun1.scaffold_175207 85992 71 + 212797 TCTAGTGCTCTCCAATAATATAATAGATTATAACTTCATATAATTATGTGAAATATAAGATTATTTATCAG
34	... s panTro1.chrUn_random 208115400 69 - 240967748 GCAGCTACTATTCATCAAGAAAG-GGATTACAACTTCAGAAATGTGTTCAAAGTGTATCCATACTT-TGAT
35	... s rheMac2.chr20 58163389 69 - 88221753 ACACATATTATTTCTTAACATGGAGGATTATATCTT-AAACATGTGTGCaaaatataaatatatat-tcaa
36	... ''' )
37
38	>>> mafs = [ block1, block2 ]
39
40	>>> threaded = [ t for t in thread( mafs, [ "hg18", "panTro1" ] ) ]
41
42	>>> len( threaded )
43	2
44
45	>>> print threaded[0]
46	a score=0.0
47	s hg18.chr10 52686 44 + 135374737 GTGCTAACTTACTGCTCCACAGAAAACATCAATTCTGCTCATGC
48	s panTro1.chrUn_random 208115356 44 - 240967748 GTGCTAACTGACTGCTCCAGAGAAAACATCAATTCTGTTCATGT
49	<BLANKLINE>
50
51	>>> print threaded[1]
52	a score=0.0
53	s hg18.chr10 52730 69 + 135374737 GCAGGTACAATTCATCAAGAAAGGAATTACAACTTCAGAAATGTGTTCAAAATATATCCATACTTTGAC
54	s panTro1.chrUn_random 208115400 69 - 240967748 GCAGCTACTATTCATCAAGAAAGGGATTACAACTTCAGAAATGTGTTCAAAGTGTATCCATACTTTGAT
55	<BLANKLINE>
56
57	"""
58	for m in mafs:
59	new_maf = deepcopy( m )
60	new_components = get_components_for_species( new_maf, species )
61	if new_components:
62	remove_all_gap_columns( new_components )
63	new_maf.components = new_components
64	new_maf.score = 0.0
65	new_maf.text_size = len(new_components[0].text)
66	yield new_maf
67
68	def get_components_for_species( alignment, species ):
69	"""Return the component for each species in the list `species` or None"""
70	# If the number of components in the alignment is less that the requested number
71	# of species we can immediately fail
72	if len( alignment.components ) < len( species ): return None
73	# Otherwise, build an index of components by species, then lookup
74	index = dict( [ ( c.src.split( '.' )[0], c ) for c in alignment.components ] )
75	try: return [ index[s] for s in species ]
76	except: return None
77
78	def remove_all_gap_columns( components ):
79	"""
80	Remove any columns containing only gaps from a set of alignment components,
81	text of components is modified IN PLACE.
82
83	TODO: Optimize this with Pyrex.
84	"""
85	seqs = [ list( c.text ) for c in components ]
86	i = 0
87	text_size = len( seqs[0] )
88	while i < text_size:
89	all_gap = True
90	for seq in seqs:
91	if seq[i] != '-': all_gap = False
92	if all_gap:
93	for seq in seqs: del seq[i]
94	text_size -= 1
95	else:
96	i += 1
97	for i in range( len( components ) ):
98	components[i].text = ''.join( seqs[i] )

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/align/tools/thread.py

異なるフォーマットでダウンロード: