Context Navigation

maf.py @ 3

リビジョン 3, 8.2 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""
2	Support for the `MAF`_ multiple sequence alignment format used by `multiz`_.
3
4	.. _MAF: http://genome.ucsc.edu/FAQ/FAQformat.html#format5
5	.. _multiz: http://www.bx.psu.edu/miller_lab/
6	"""
7
8	from bx.align import *
9
10	from StringIO import StringIO
11	import os
12
13	import itertools
14	from bx import interval_index_file
15
16	from bx.misc.seekbzip2 import SeekableBzip2File
17
18	MAF_INVERSE_STATUS = 'V'
19	MAF_INSERT_STATUS = 'I'
20	MAF_CONTIG_STATUS = 'C'
21	MAF_CONTIG_NESTED_STATUS = 'c'
22	MAF_NEW_STATUS = 'N'
23	MAF_NEW_NESTED_STATUS = 'n'
24	MAF_MAYBE_NEW_STATUS = 'S'
25	MAF_MAYBE_NEW_NESTED_STATUS = 's'
26	MAF_MISSING_STATUS = 'M'
27
28	class MAFIndexedAccess( interval_index_file.AbstractIndexedAccess ):
29	"""
30	Indexed access to a MAF file.
31	"""
32	def read_at_current_offset( self, file, **kwargs ):
33	"""
34	Read the MAF block at the current position in `file` and return an
35	instance of `Alignment`.
36	"""
37	return read_next_maf( file, **kwargs )
38
39	class MAFMultiIndexedAccess( interval_index_file.AbstractMultiIndexedAccess ):
40	"""
41	Indexed access to multiple MAF files.
42	"""
43	indexed_access_class = MAFIndexedAccess
44
45	Indexed = MAFIndexedAccess
46	"""Deprecated: `MAFIndexedAccess` is also available under the name `Indexed`."""
47
48	MultiIndexed = MAFMultiIndexedAccess
49	"""Deprecated: `MAFMultiIndexedAccess` is also available under the name `MultiIndexed`."""
50
51	class Reader( object ):
52	"""
53	Iterate over all maf blocks in a file in order
54	"""
55	def __init__( self, file, **kwargs ):
56	self.file = file
57	self.maf_kwargs = kwargs
58	# Read and verify maf header, store any attributes
59	fields = self.file.readline().split()
60	if fields[0] != '##maf': raise "File does not have MAF header"
61	self.attributes = parse_attributes( fields[1:] )
62
63	def next( self ):
64	return read_next_maf( self.file, **self.maf_kwargs )
65
66	def __iter__( self ):
67	return ReaderIter( self )
68
69	def close( self ):
70	self.file.close()
71
72	class ReaderIter( object ):
73	"""
74	Adapts a `Reader` to the iterator protocol.
75	"""
76	def __init__( self, reader ):
77	self.reader = reader
78	def __iter__( self ):
79	return self
80	def next( self ):
81	v = self.reader.next()
82	if not v: raise StopIteration
83	return v
84
85	class Writer( object ):
86
87	def __init__( self, file, attributes={} ):
88	self.file = file
89	# Write header, Webb's maf code wants version first, we accomodate
90	if not attributes.has_key('version'): attributes['version'] = 1
91	self.file.write( "##maf version=%s" % attributes['version'] )
92	for key in attributes:
93	if key == 'version': continue
94	self.file.writelines( " %s=%s" % ( key, attributes[key] ) )
95	self.file.write( "\n" )
96
97	def write( self, alignment ):
98	self.file.write( "a score=" + str( alignment.score ) )
99	for key in alignment.attributes:
100	self.file.write( " %s=%s" % ( key, alignment.attributes[key] ) )
101	self.file.write( "\n" )
102	# Components
103	rows = []
104	for c in alignment.components:
105	# "Empty component" generates an 'e' row
106	if c.empty:
107	rows.append( ( "e", c.src, str( c.start ), str( c.size ), c.strand, str( c.src_size ), c.synteny_empty ) )
108	continue
109	# Regular component
110	rows.append( ( "s", c.src, str( c.start ), str( c.size ), c.strand, str( c.src_size ), c.text ) )
111	# If component has quality, write a q row
112	if c.quality is not None:
113	rows.append( ( "q", c.src, "", "", "", "", c.quality ) )
114	# If component has synteny follow up with an 'i' row
115	if c.synteny_left and c.synteny_right:
116	rows.append( ( "i", c.src, "", "", "", "", " ".join( map( str, c.synteny_left + c.synteny_right ) ) ) )
117	self.file.write( format_tabular( rows, "llrrrrl" ) )
118	self.file.write( "\n" )
119
120	def close( self ):
121	self.file.close()
122
123	# ---- Helper methods -------------------------------------------------------
124
125	def from_string( string, **kwargs ):
126	return read_next_maf( StringIO( string ), **kwargs )
127
128	def read_next_maf( file, species_to_lengths=None, parse_e_rows=False ):
129	"""
130	Read the next MAF block from `file` and return as an `Alignment`
131	instance. If `parse_i_rows` is true, empty components will be created
132	when e rows are encountered.
133	"""
134	alignment = Alignment(species_to_lengths=species_to_lengths)
135	# Attributes line
136	line = readline( file, skip_blank=True )
137	if not line: return None
138	fields = line.split()
139	if fields[0] != 'a': raise "Expected 'a ...' line"
140	alignment.attributes = parse_attributes( fields[1:] )
141	if 'score' in alignment.attributes:
142	alignment.score = alignment.attributes['score']
143	del alignment.attributes['score']
144	else:
145	alignment.score = 0
146	# Sequence lines
147	last_component = None
148	while 1:
149	line = readline( file )
150	# EOF or Blank line terminates alignment components
151	if not line or line.isspace(): break
152	if line.isspace(): break
153	# Parse row
154	fields = line.split()
155	if fields[0] == 's':
156	# An 's' row contains sequence for a component
157	component = Component()
158	component.src = fields[1]
159	component.start = int( fields[2] )
160	component.size = int( fields[3] )
161	component.strand = fields[4]
162	component.src_size = int( fields[5] )
163	if len(fields) > 6: component.text = fields[6].strip()
164	# Add to set
165	alignment.add_component( component )
166	last_component = component
167	elif fields[0] == 'e':
168	# An 'e' row, when no bases align for a given species this tells
169	# us something about the synteny
170	if parse_e_rows:
171	component = Component()
172	component.empty = True
173	component.src = fields[1]
174	component.start = int( fields[2] )
175	component.size = int( fields[3] )
176	component.strand = fields[4]
177	component.src_size = int( fields[5] )
178	component.text = None
179	synteny = fields[6].strip()
180	assert len( synteny ) == 1, \
181	"Synteny status in 'e' rows should be denoted with a single character code"
182	component.synteny_empty = synteny
183	alignment.add_component( component )
184	last_component = component
185	elif fields[0] == 'i':
186	# An 'i' row, indicates left and right synteny status for the
187	# previous component, we hope ;)
188	assert fields[1] == last_component.src, "'i' row does not follow matching 's' row"
189	last_component.synteny_left = ( fields[2], int( fields[3] ) )
190	last_component.synteny_right = ( fields[4], int( fields[5] ) )
191	elif fields[0] == 'q':
192	assert fields[1] == last_component.src, "'q' row does not follow matching 's' row"
193	# TODO: Should convert this to an integer array?
194	last_component.quality = fields[2]
195
196	return alignment
197
198	def readline( file, skip_blank=False ):
199	"""Read a line from provided file, skipping any blank or comment lines"""
200	while 1:
201	line = file.readline()
202	#print "every line: %r" % line
203	if not line: return None
204	if line[0] != '#' and not ( skip_blank and line.isspace() ):
205	return line
206
207	def parse_attributes( fields ):
208	"""Parse list of key=value strings into a dict"""
209	attributes = {}
210	for field in fields:
211	pair = field.split( '=' )
212	attributes[ pair[0] ] = pair[1]
213	return attributes
214
215	def format_tabular( rows, align=None ):
216	if len( rows ) == 0: return ""
217	lengths = [ len( col ) for col in rows[ 0 ] ]
218	for row in rows[1:]:
219	for i in range( 0, len( row ) ):
220	lengths[ i ] = max( lengths[ i ], len( row[ i ] ) )
221	rval = ""
222	for row in rows:
223	for i in range( 0, len( row ) ):
224	if align and align[ i ] == "l":
225	rval += row[ i ].ljust( lengths[ i ] )
226	else:
227	rval += row[ i ].rjust( lengths[ i ] )
228	rval += " "
229	rval += "\n"
230	return rval
231

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/align/maf.py @ 3

異なるフォーマットでダウンロード: