Context Navigation

bitset_builders.py

リビジョン 3, 6.0 KB (コミッタ: kohda, 15 年前)
Install Unix tools http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号
1	"""
2	Support for creating dictionaries of `Bitset`s / `BinnedBitset`s from text
3	files containg sets of "covered" intervals in sequences (e.g. `BED`_ files).
4
5	.. BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
6	"""
7
8	from warnings import warn
9	from bx.bitset import *
10	import re
11
12	def binned_bitsets_from_file( f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream_pad=0, downstream_pad=0, lens={} ):
13	"""
14	Read a file into a dictionary of bitsets. The defaults arguments
15
16	- 'f' should be a file like object (or any iterable containing strings)
17	- 'chrom_col', 'start_col', and 'end_col' must exist in each line.
18	- 'strand_col' is optional, any line without it will be assumed to be '+'
19	- if 'lens' is provided bitset sizes will be looked up from it, otherwise
20	chromosomes will be assumed to be the maximum size
21	"""
22	last_chrom = None
23	last_bitset = None
24	bitsets = dict()
25	for line in f:
26	if line.startswith("#") or line.isspace():
27	continue
28	fields = line.split()
29	strand = "+"
30	if len(fields) > strand_col:
31	if fields[strand_col] == "-": strand = "-"
32	chrom = fields[chrom_col]
33	if chrom != last_chrom:
34	if chrom not in bitsets:
35	if chrom in lens:
36	size = lens[chrom]
37	else:
38	size = MAX
39	bitsets[chrom] = BinnedBitSet( size )
40	last_chrom = chrom
41	last_bitset = bitsets[chrom]
42	start, end = int( fields[start_col] ), int( fields[end_col] )
43	if upstream_pad: start = max( 0, start - upstream_pad )
44	if downstream_pad: end = min( size, end + downstream_pad )
45	if start > end: warn( "Interval start after end!" )
46	last_bitset.set_range( start, end-start )
47	return bitsets
48
49	def binned_bitsets_from_bed_file( f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream_pad=0, downstream_pad=0, lens={} ):
50	"""
51	Read a file into a dictionary of bitsets. The defaults arguments
52
53	- 'f' should be a file like object (or any iterable containing strings)
54	- 'chrom_col', 'start_col', and 'end_col' must exist in each line.
55	- 'strand_col' is optional, any line without it will be assumed to be '+'
56	- if 'lens' is provided bitset sizes will be looked up from it, otherwise
57	chromosomes will be assumed to be the maximum size
58	"""
59	last_chrom = None
60	last_bitset = None
61	bitsets = dict()
62	offset = 0
63	for line in f:
64	if line.startswith("#") or line.isspace():
65	continue
66	# Ignore browser lines completely
67	if line.startswith( "browser" ):
68	continue
69	# Need to check track lines due to the offset
70	if line.startswith( "track" ):
71	m = re.search( "offset=(\d+)", line )
72	if m and m.group( 1 ):
73	offset = int( m.group(1) )
74	continue
75	fields = line.split()
76	strand = "+"
77	if len(fields) > strand_col:
78	if fields[strand_col] == "-": strand = "-"
79	chrom = fields[chrom_col]
80	if chrom != last_chrom:
81	if chrom not in bitsets:
82	if chrom in lens:
83	size = lens[chrom]
84	else:
85	size = MAX
86	bitsets[chrom] = BinnedBitSet( size )
87	last_chrom = chrom
88	last_bitset = bitsets[chrom]
89	start, end = int( fields[start_col] ) + offset, int( fields[end_col] ) + offset
90	## # Switch to '+' strand coordinates if not already
91	## if strand == '-':
92	## start = size - end
93	## end = size - start
94	if upstream_pad: start = max( 0, start - upstream_pad )
95	if downstream_pad: end = min( size, end + downstream_pad )
96	if start > end: warn( "Interval start after end!" )
97	last_bitset.set_range( start, end-start )
98	return bitsets
99
100	def binned_bitsets_proximity( f, chrom_col=0, start_col=1, end_col=2, strand_col=5, upstream=0, downstream=0 ):
101	"""Read a file into a dictionary of bitsets"""
102	last_chrom = None
103	last_bitset = None
104	bitsets = dict()
105	for line in f:
106	if line.startswith("#"): continue
107	# print "input=%s" % ( line ),
108	fields = line.split()
109	strand = "+"
110	if len(fields) >= strand_col + 1:
111	if fields[strand_col] == "-": strand = "-"
112	chrom = fields[chrom_col]
113	if chrom != last_chrom:
114	if chrom not in bitsets:
115	bitsets[chrom] = BinnedBitSet( MAX )
116	last_chrom = chrom
117	last_bitset = bitsets[chrom]
118	start, end = int( fields[start_col] ), int( fields[end_col] )
119	if strand == "+":
120	if upstream: start = max( 0, start - upstream )
121	if downstream: end = min( MAX, end + downstream )
122	if strand == "-":
123	if upstream: end = min( MAX, end + upstream )
124	if downstream: start = max( 0, start - downstream )
125	# print "set: start=%d\tend=%d" % ( start, end )
126	if end-start > 0:
127	last_bitset.set_range( start, end-start )
128	return bitsets
129
130	def binned_bitsets_from_list( list=[] ):
131	"""Read a list into a dictionary of bitsets"""
132	last_chrom = None
133	last_bitset = None
134	bitsets = dict()
135	for l in list:
136	chrom = l[0]
137	if chrom != last_chrom:
138	if chrom not in bitsets:
139	bitsets[chrom] = BinnedBitSet(MAX)
140	last_chrom = chrom
141	last_bitset = bitsets[chrom]
142	start, end = int( l[1] ), int( l[2] )
143	last_bitset.set_range( start, end - start )
144	return bitsets
145
146	def binned_bitsets_by_chrom( f, chrom, chrom_col=0, start_col=1, end_col=2):
147	"""Read a file by chrom name into a bitset"""
148	bitset = BinnedBitSet( MAX )
149	for line in f:
150	if line.startswith("#"): continue
151	fields = line.split()
152	if fields[chrom_col] == chrom:
153	start, end = int( fields[start_col] ), int( fields[end_col] )
154	bitset.set_range( start, end-start )
155	return bitset

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/bitset_builders.py

異なるフォーマットでダウンロード: