1 | # for rgenetics - lped to fbat
|
---|
2 | # recode to numeric fbat version
|
---|
3 | # much slower so best to always
|
---|
4 | # use numeric alleles internally
|
---|
5 |
|
---|
6 | import sys,os,time
|
---|
7 |
|
---|
8 |
|
---|
9 | prog = os.path.split(sys.argv[0])[-1]
|
---|
10 | myversion = 'Oct 10 2009'
|
---|
11 |
|
---|
12 | galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
|
---|
13 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
---|
14 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
---|
15 | <head>
|
---|
16 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
---|
17 | <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
|
---|
18 | <title></title>
|
---|
19 | <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
|
---|
20 | </head>
|
---|
21 | <body>
|
---|
22 | <div class="document">
|
---|
23 | """
|
---|
24 |
|
---|
25 | def timenow():
|
---|
26 | """return current time as a string
|
---|
27 | """
|
---|
28 | return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
|
---|
29 |
|
---|
30 |
|
---|
31 | def rgConv(inpedfilepath,outhtmlname,outfilepath):
|
---|
32 | """convert linkage ped/map to fbat"""
|
---|
33 | recode={'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2','3':'3','4':'4'}
|
---|
34 | basename = os.path.split(inpedfilepath)[-1] # get basename
|
---|
35 | inmap = '%s.map' % inpedfilepath
|
---|
36 | inped = '%s.ped' % inpedfilepath
|
---|
37 | outf = '%s.ped' % basename # note the fbat exe insists that this is the extension for the ped data
|
---|
38 | outfpath = os.path.join(outfilepath,outf) # where to write the fbat format file to
|
---|
39 | try:
|
---|
40 | mf = file(inmap,'r')
|
---|
41 | except:
|
---|
42 | sys.stderr.write('%s cannot open inmap file %s - do you have permission?\n' % (prog,inmap))
|
---|
43 | sys.exit(1)
|
---|
44 | try:
|
---|
45 | rsl = [x.split()[1] for x in mf]
|
---|
46 | except:
|
---|
47 | sys.stderr.write('## cannot parse %s' % inmap)
|
---|
48 | sys.exit(1)
|
---|
49 | try:
|
---|
50 | os.makedirs(outfilepath)
|
---|
51 | except:
|
---|
52 | pass # already exists
|
---|
53 | head = ' '.join(rsl) # list of rs numbers
|
---|
54 | # TODO add anno to rs but fbat will prolly barf?
|
---|
55 | pedf = file(inped,'r')
|
---|
56 | o = file(outfpath,'w',2**20)
|
---|
57 | o.write(head)
|
---|
58 | o.write('\n')
|
---|
59 | for i,row in enumerate(pedf):
|
---|
60 | if i == 0:
|
---|
61 | lrow = row.split()
|
---|
62 | try:
|
---|
63 | x = [int(x) for x in lrow[10:50]] # look for non numeric codes
|
---|
64 | except:
|
---|
65 | dorecode = 1
|
---|
66 | if dorecode:
|
---|
67 | lrow = row.strip().split()
|
---|
68 | p = lrow[:6]
|
---|
69 | g = lrow[6:]
|
---|
70 | gc = [recode.get(x,'0') for x in g]
|
---|
71 | lrow = p+gc
|
---|
72 | row = '%s\n' % ' '.join(lrow)
|
---|
73 | o.write(row)
|
---|
74 | o.close()
|
---|
75 |
|
---|
76 |
|
---|
77 | def main():
|
---|
78 | """call fbater
|
---|
79 | need to work with rgenetics composite datatypes
|
---|
80 | so in and out are html files with data in extrafiles path
|
---|
81 | <command interpreter="python">rg_convert_lped_fped.py '$input1/$input1.metadata.base_name'
|
---|
82 | '$output1' '$output1.extra_files_path'
|
---|
83 | </command>
|
---|
84 | """
|
---|
85 | nparm = 3
|
---|
86 | if len(sys.argv) < nparm:
|
---|
87 | sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm))
|
---|
88 | sys.exit(1)
|
---|
89 | inpedfilepath = sys.argv[1]
|
---|
90 | outhtmlname = sys.argv[2]
|
---|
91 | outfilepath = sys.argv[3]
|
---|
92 | try:
|
---|
93 | os.makedirs(outfilepath)
|
---|
94 | except:
|
---|
95 | pass
|
---|
96 | rgConv(inpedfilepath,outhtmlname,outfilepath)
|
---|
97 | f = file(outhtmlname,'w')
|
---|
98 | f.write(galhtmlprefix % prog)
|
---|
99 | flist = os.listdir(outfilepath)
|
---|
100 | print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
|
---|
101 | f.write('<div>## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n<ol>' % (prog,timenow()))
|
---|
102 | for i, data in enumerate( flist ):
|
---|
103 | f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
|
---|
104 | f.write("</div></body></html>")
|
---|
105 | f.close()
|
---|
106 |
|
---|
107 |
|
---|
108 |
|
---|
109 | if __name__ == "__main__":
|
---|
110 | main()
|
---|