Context Navigation

rgRegion.py @ 2

リビジョン 2, 2.8 KB (コミッタ: hatakeyama, 14 年前)
import galaxy-central

Rev	行番号
[2]	1	"""
	2	released under the terms of the LGPL
	3	copyright ross lazarus August 2007
	4	for the rgenetics project
	5
	6	Special galaxy tool for the camp2007 data
	7	Allows grabbing arbitrary columns from an arbitrary region
	8
	9	Needs a mongo results file in the location hardwired below or could be passed in as
	10	a library parameter - but this file must have a very specific structure
	11	rs chrom offset float1...floatn
	12
	13	called as
	14	<command interpreter="python">
	15	rsRegion.py $infile '$cols' $r $tag $out_file1
	16	</command>
	17
	18	cols is a delimited list of chosen column names for the subset
	19	r is a ucsc location region pasted into the tool
	20
	21	"""
	22
	23
	24	import sys,string
	25
	26	trantab = string.maketrans(string.punctuation,'_'*len(string.punctuation))
	27	print >> sys.stdout, '##rgRegion.py started'
	28	if len(sys.argv) <> 6:
	29	print >> sys.stdout, '##!expected params in sys.argv, got %d - %s' % (len(sys.argv),sys.argv)
	30	sys.exit(1)
	31	print '##got %d - %s' % (len(sys.argv),sys.argv)
	32	# quick and dirty for galaxy - we always get something for each parameter
	33	fname = sys.argv[1]
	34	wewant = sys.argv[2].split(',')
	35	region = sys.argv[3].lower()
	36	tag = sys.argv[4].translate(trantab)
	37	ofname = sys.argv[5]
	38	myname = 'rgRegion'
	39	if len(wewant) == 0: # no columns selected?
	40	print >> sys.stdout, '##!%s: no columns selected - cannot run' % myname
	41	sys.exit(1)
	42	try:
	43	f = open(fname,'r')
	44	except: # bad input file name?
	45	print >> sys.stdout, '##!%s unable to open file %s' % (myname, fname)
	46	sys.exit(1)
	47	try: # TODO make a regexp?
	48	c,rest = region.split(':')
	49	c = c.replace('chr','') # leave although will break strict genome graphs
	50	rest = rest.replace(',','') # remove commas
	51	spos,epos = rest.split('-')
	52	spos = int(spos)
	53	epos = int(epos)
	54	except:
	55	print >> sys.stdout, '##!%s unable to parse region %s - MUST look like "chr8:10,000-100,000' % (myname,region)
	56	sys.exit(1)
	57	print >> sys.stdout, '##%s parsing chrom %s from %d to %d' % (myname, c,spos,epos)
	58	res = []
	59	cnames = f.next().strip().split() # column titles for output
	60	linelen = len(cnames)
	61	wewant = [int(x) - 1 for x in wewant] # need col numbers base 0
	62	for n,l in enumerate(f):
	63	ll = l.strip().split()
	64	thisc = ll[1]
	65	thispos = int(ll[2])
	66	if (thisc == c) and (thispos >= spos) and (thispos <= epos):
	67	if len(ll) == linelen:
	68	res.append([ll[x] for x in wewant]) # subset of columns!
	69	else:
	70	print >> sys.stdout, '##! looking for %d fields - found %d in ll=%s' % (linelen,len(ll),str(ll))
	71	o = file(ofname,'w')
	72	res = ['%s\n' % '\t'.join(x) for x in res] # turn into tab delim string
	73	print >> sys.stdout, '##%s selected and returning %d data rows' % (myname,len(res))
	74	head = [cnames[x] for x in wewant] # ah, list comprehensions - list of needed column names
	75	o.write('%s\n' % '\t'.join(head)) # header row for output
	76	o.write(''.join(res))
	77	o.close()
	78	f.close()
	79
	80

Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。

Context Navigation

root/galaxy-central/tools/rgenetics/rgRegion.py @ 2

異なるフォーマットでダウンロード: