1 | # for rgenetics - lped to pbed
|
---|
2 | # where to stop with converters
|
---|
3 | # pbed might be central
|
---|
4 | # eg lped/eigen/fbat/snpmatrix all to pbed
|
---|
5 | # and pbed to lped/eigen/fbat/snpmatrix ?
|
---|
6 | # that's a lot of converters
|
---|
7 | import sys,os,time,subprocess
|
---|
8 |
|
---|
9 |
|
---|
10 | prog = os.path.split(sys.argv[0])[-1]
|
---|
11 | myversion = 'Oct 10 2009'
|
---|
12 |
|
---|
13 | galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
|
---|
14 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
---|
15 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
---|
16 | <head>
|
---|
17 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
---|
18 | <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
|
---|
19 | <title></title>
|
---|
20 | <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
|
---|
21 | </head>
|
---|
22 | <body>
|
---|
23 | <div class="document">
|
---|
24 | """
|
---|
25 |
|
---|
26 | def timenow():
|
---|
27 | """return current time as a string
|
---|
28 | """
|
---|
29 | return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
|
---|
30 |
|
---|
31 | def getMissval(inped=''):
|
---|
32 | """
|
---|
33 | read some lines...ugly hack - try to guess missing value
|
---|
34 | should be N or 0 but might be . or -
|
---|
35 | """
|
---|
36 | commonmissvals = {'N':'N','0':'0','n':'n','9':'9','-':'-','.':'.'}
|
---|
37 | try:
|
---|
38 | f = file(inped,'r')
|
---|
39 | except:
|
---|
40 | return None # signal no in file
|
---|
41 | missval = None
|
---|
42 | while missval == None: # doggedly continue until we solve the mystery
|
---|
43 | try:
|
---|
44 | l = f.readline()
|
---|
45 | except:
|
---|
46 | break
|
---|
47 | ll = l.split()[6:] # ignore pedigree stuff
|
---|
48 | for c in ll:
|
---|
49 | if commonmissvals.get(c,None):
|
---|
50 | missval = c
|
---|
51 | f.close()
|
---|
52 | return missval
|
---|
53 | if not missval:
|
---|
54 | missval = 'N' # punt
|
---|
55 | close(f)
|
---|
56 | return missval
|
---|
57 |
|
---|
58 | def rgConv(inpedfilepath,outhtmlname,outfilepath,plink):
|
---|
59 | """
|
---|
60 | """
|
---|
61 | pedf = '%s.ped' % inpedfilepath
|
---|
62 | basename = os.path.split(inpedfilepath)[-1] # get basename
|
---|
63 | outroot = os.path.join(outfilepath,basename)
|
---|
64 | missval = getMissval(inped = pedf)
|
---|
65 | if not missval:
|
---|
66 | print '### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf
|
---|
67 | missval = '0'
|
---|
68 | cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink,inpedfilepath,outroot,missval)
|
---|
69 | p = subprocess.Popen(cl,shell=True,cwd=outfilepath)
|
---|
70 | retval = p.wait() # run plink
|
---|
71 |
|
---|
72 |
|
---|
73 |
|
---|
74 |
|
---|
75 | def main():
|
---|
76 | """
|
---|
77 | need to work with rgenetics composite datatypes
|
---|
78 | so in and out are html files with data in extrafiles path
|
---|
79 | <command interpreter="python">lped_to_pbed_converter.py '$input1/$input1.metadata.base_name'
|
---|
80 | '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
|
---|
81 | </command>
|
---|
82 | """
|
---|
83 | nparm = 4
|
---|
84 | if len(sys.argv) < nparm:
|
---|
85 | sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm))
|
---|
86 | sys.exit(1)
|
---|
87 | inpedfilepath = sys.argv[1]
|
---|
88 | outhtmlname = sys.argv[2]
|
---|
89 | outfilepath = sys.argv[3]
|
---|
90 | try:
|
---|
91 | os.makedirs(outfilepath)
|
---|
92 | except:
|
---|
93 | pass
|
---|
94 | plink = sys.argv[4]
|
---|
95 | rgConv(inpedfilepath,outhtmlname,outfilepath,plink)
|
---|
96 | f = file(outhtmlname,'w')
|
---|
97 | f.write(galhtmlprefix % prog)
|
---|
98 | flist = os.listdir(outfilepath)
|
---|
99 | s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
|
---|
100 | print s
|
---|
101 | f.write('<div>%s\n<ol>' % (s))
|
---|
102 | for i, data in enumerate( flist ):
|
---|
103 | f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
|
---|
104 | f.write("</div></body></html>")
|
---|
105 | f.close()
|
---|
106 |
|
---|
107 |
|
---|
108 |
|
---|
109 | if __name__ == "__main__":
|
---|
110 | main()
|
---|