Context Navigation

axt.py @ 3

リビジョン 3, 7.2 KB (コミッタ: kohda, 14 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""
2	Support for reading and writing the `AXT`_ format used for pairwise
3	alignments.
4
5	.. _AXT: http://genome.ucsc.edu/goldenPath/help/axt.html
6	"""
7
8	from bx.align import *
9
10	import itertools
11	from bx import interval_index_file
12
13	# Tools for dealing with pairwise alignments in AXT format
14
15	class MultiIndexed( object ):
16	"""Similar to 'indexed' but wraps more than one axt_file"""
17	def __init__( self, axt_filenames, keep_open=False ):
18	self.indexes = [ Indexed( axt_file, axt_file + ".index" ) for axt_file in axt_filenames ]
19	def get( self, src, start, end ):
20	blocks = []
21	for index in self.indexes: blocks += index.get( src, start, end )
22	return blocks
23
24
25	class Indexed( object ):
26	"""Indexed access to a axt using overlap queries, requires an index file"""
27
28	def __init__( self, axt_filename, index_filename=None, keep_open=False, species1 = None, species2=None, species_to_lengths=None, support_ids=False ):
29	if index_filename is None: index_filename = axt_filename + ".index"
30	self.indexes = interval_index_file.Indexes( filename=index_filename )
31	self.axt_filename = axt_filename
32	# nota bene: (self.species1 = species1 or "species1") is incorrect if species1=""
33	self.species1 = species1
34	if (self.species1 == None): self.species1 = "species1"
35	self.species2 = species2
36	if (self.species2 == None): self.species2 = "species2"
37	self.species_to_lengths = species_to_lengths
38	self.support_ids = support_ids # for extra text at end of axt header lines
39	if keep_open:
40	self.f = open( axt_filename )
41	else:
42	self.f = None
43
44	def get( self, src, start, end ):
45	intersections = self.indexes.find( src, start, end )
46	return itertools.imap( self.get_axt_at_offset, [ val for start, end, val in intersections ] )
47
48	def get_axt_at_offset( self, offset ):
49	if self.f:
50	self.f.seek( offset )
51	return read_next_axt( self.f, self.species1, self.species2, self.species_to_lengths, self.support_ids )
52	else:
53	f = open( self.axt_filename )
54	try:
55	f.seek( offset )
56	return read_next_axt( f, self.species1, self.species2, self.species_to_lengths, self.support_ids )
57	finally:
58	f.close()
59
60	class Reader( object ):
61	"""Iterate over all axt blocks in a file in order"""
62
63	def __init__( self, file, species1 = None, species2=None, species_to_lengths=None, support_ids=False ):
64	self.file = file
65	# nota bene: (self.species1 = species1 or "species1") is incorrect if species1=""
66	self.species1 = species1
67	if (self.species1 == None): self.species1 = "species1"
68	self.species2 = species2
69	if (self.species2 == None): self.species2 = "species2"
70	self.species_to_lengths = species_to_lengths
71	self.support_ids = support_ids # for extra text at end of axt header lines
72	self.attributes = {}
73
74	def next( self ):
75	return read_next_axt( self.file, self.species1, self.species2, self.species_to_lengths, self.support_ids )
76
77	def __iter__( self ):
78	return ReaderIter( self )
79
80	def close( self ):
81	self.file.close()
82
83	class ReaderIter( object ):
84	def __init__( self, reader ):
85	self.reader = reader
86	def __iter__( self ):
87	return self
88	def next( self ):
89	v = self.reader.next()
90	if not v: raise StopIteration
91	return v
92
93	class Writer( object ):
94
95	def __init__( self, file, attributes={} ):
96	self.file = file
97	self.block = 0
98	self.src_split = True
99	if ("src_split" in attributes):
100	self.src_split = attributes["src_split"]
101
102	def write( self, alignment ):
103	if (len(alignment.components) != 2):
104	raise "%d-component alignment is not compatible with axt" % \
105	len(alignment.components)
106	c1 = alignment.components[0]
107	c2 = alignment.components[1]
108
109	if c1.strand != "+":
110	c1 = c1.reverse_complement()
111	c2 = c2.reverse_complement()
112
113	if (self.src_split):
114	spec1,chr1 = src_split( c1.src )
115	spec2,chr2 = src_split( c2.src )
116	else:
117	chr1,chr2 = c1.src,c2.src
118
119	self.file.write( "%d %s %d %d %s %d %d %s %s\n" % \
120	(self.block,chr1,c1.start+1,c1.start+c1.size,
121	chr2,c2.start+1,c2.start+c2.size,c2.strand,
122	alignment.score))
123	self.file.write( "%s\n" % c1.text )
124	self.file.write( "%s\n" % c2.text )
125	self.file.write( "\n" )
126	self.block += 1
127
128	def close( self ):
129	self.file.close()
130
131	# ---- Helper methods ---------------------------------------------------------
132
133	# typical axt block:
134	# 0 chr19 3001012 3001075 chr11 70568380 70568443 - 3500 [optional text]
135	# TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGA
136	# TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGA
137	# start and stop are origin-1, inclusive
138	# first species is always on plus strand
139	# when second species is on minus strand, start and stop are counted from sequence end
140
141	def read_next_axt( file, species1, species2, species_to_lengths=None, support_ids=False ):
142	line = readline( file, skip_blank=True )
143	if not line: return
144	fields = line.split()
145	if (len(fields) < 9) or ((not support_ids) and (len(fields) > 9)):
146	raise "bad axt-block header: %s" % line
147	attributes = {}
148	if (len(fields) > 9):
149	attributes["id"] = "_".join(fields[9:])
150	seq1 = readline( file )
151	if not line or line.isspace(): raise "incomplete axt-block; header: %s" % line
152	seq2 = readline( file )
153	if not line or line.isspace(): raise "incomplete axt-block; header: %s" % line
154	# Build 2 component alignment
155	alignment = Alignment(attributes=attributes,species_to_lengths=species_to_lengths)
156	# Build component for species 1
157	component = Component()
158	component.src = fields[1]
159	if (species1 != ""): component.src = species1 + "." + component.src
160	component.start = int( fields[2] ) - 1 # (axt intervals are origin-1
161	end = int( fields[3] ) # and inclusive on both ends)
162	component.size = end - component.start
163	component.strand = "+"
164	component.text = seq1.strip()
165	alignment.add_component( component )
166	# Build component for species 2
167	component = Component()
168	component.src = fields[4]
169	if (species2 != ""): component.src = species2 + "." + component.src
170	component.start = int( fields[5] ) - 1
171	end = int( fields[6] )
172	component.size = end - component.start
173	component.strand = fields[7]
174	component.text = seq2.strip()
175	alignment.add_component( component )
176	# add score
177	try:
178	alignment.score = int( fields[8] )
179	except:
180	try:
181	alignment.score = float( fields[8] )
182	except:
183	alignment.score = fields[8]
184	return alignment
185
186	def readline( file, skip_blank=False ):
187	"""Read a line from provided file, skipping any blank or comment lines"""
188	while 1:
189	line = file.readline()
190	if not line: return None
191	if line[0] != '#' and not ( skip_blank and line.isspace() ):
192	return line
193

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/align/axt.py @ 3

異なるフォーマットでダウンロード: