Context Navigation

rgRegion.py

リビジョン 2, 2.8 KB (コミッタ: hatakeyama, 15 年前)
import galaxy-central

行番号
1	"""
2	released under the terms of the LGPL
3	copyright ross lazarus August 2007
4	for the rgenetics project
5
6	Special galaxy tool for the camp2007 data
7	Allows grabbing arbitrary columns from an arbitrary region
8
9	Needs a mongo results file in the location hardwired below or could be passed in as
10	a library parameter - but this file must have a very specific structure
11	rs chrom offset float1...floatn
12
13	called as
14	<command interpreter="python">
15	rsRegion.py $infile '$cols' $r $tag $out_file1
16	</command>
17
18	cols is a delimited list of chosen column names for the subset
19	r is a ucsc location region pasted into the tool
20
21	"""
22
23
24	import sys,string
25
26	trantab = string.maketrans(string.punctuation,'_'*len(string.punctuation))
27	print >> sys.stdout, '##rgRegion.py started'
28	if len(sys.argv) <> 6:
29	print >> sys.stdout, '##!expected params in sys.argv, got %d - %s' % (len(sys.argv),sys.argv)
30	sys.exit(1)
31	print '##got %d - %s' % (len(sys.argv),sys.argv)
32	# quick and dirty for galaxy - we always get something for each parameter
33	fname = sys.argv[1]
34	wewant = sys.argv[2].split(',')
35	region = sys.argv[3].lower()
36	tag = sys.argv[4].translate(trantab)
37	ofname = sys.argv[5]
38	myname = 'rgRegion'
39	if len(wewant) == 0: # no columns selected?
40	print >> sys.stdout, '##!%s: no columns selected - cannot run' % myname
41	sys.exit(1)
42	try:
43	f = open(fname,'r')
44	except: # bad input file name?
45	print >> sys.stdout, '##!%s unable to open file %s' % (myname, fname)
46	sys.exit(1)
47	try: # TODO make a regexp?
48	c,rest = region.split(':')
49	c = c.replace('chr','') # leave although will break strict genome graphs
50	rest = rest.replace(',','') # remove commas
51	spos,epos = rest.split('-')
52	spos = int(spos)
53	epos = int(epos)
54	except:
55	print >> sys.stdout, '##!%s unable to parse region %s - MUST look like "chr8:10,000-100,000' % (myname,region)
56	sys.exit(1)
57	print >> sys.stdout, '##%s parsing chrom %s from %d to %d' % (myname, c,spos,epos)
58	res = []
59	cnames = f.next().strip().split() # column titles for output
60	linelen = len(cnames)
61	wewant = [int(x) - 1 for x in wewant] # need col numbers base 0
62	for n,l in enumerate(f):
63	ll = l.strip().split()
64	thisc = ll[1]
65	thispos = int(ll[2])
66	if (thisc == c) and (thispos >= spos) and (thispos <= epos):
67	if len(ll) == linelen:
68	res.append([ll[x] for x in wewant]) # subset of columns!
69	else:
70	print >> sys.stdout, '##! looking for %d fields - found %d in ll=%s' % (linelen,len(ll),str(ll))
71	o = file(ofname,'w')
72	res = ['%s\n' % '\t'.join(x) for x in res] # turn into tab delim string
73	print >> sys.stdout, '##%s selected and returning %d data rows' % (myname,len(res))
74	head = [cnames[x] for x in wewant] # ah, list comprehensions - list of needed column names
75	o.write('%s\n' % '\t'.join(head)) # header row for output
76	o.write(''.join(res))
77	o.close()
78	f.close()
79
80

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/rgenetics/rgRegion.py

異なるフォーマットでダウンロード: