[2] | 1 | # for rgenetics - lped to fbat
|
---|
| 2 | # recode to numeric fbat version
|
---|
| 3 | # much slower so best to always
|
---|
| 4 | # use numeric alleles internally
|
---|
| 5 |
|
---|
| 6 | import sys,os,time
|
---|
| 7 |
|
---|
| 8 |
|
---|
| 9 | prog = os.path.split(sys.argv[0])[-1]
|
---|
| 10 | myversion = 'Oct 10 2009'
|
---|
| 11 |
|
---|
| 12 | galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
|
---|
| 13 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
---|
| 14 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
---|
| 15 | <head>
|
---|
| 16 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
---|
| 17 | <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
|
---|
| 18 | <title></title>
|
---|
| 19 | <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
|
---|
| 20 | </head>
|
---|
| 21 | <body>
|
---|
| 22 | <div class="document">
|
---|
| 23 | """
|
---|
| 24 |
|
---|
| 25 | def timenow():
|
---|
| 26 | """return current time as a string
|
---|
| 27 | """
|
---|
| 28 | return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
|
---|
| 29 |
|
---|
| 30 |
|
---|
| 31 | def rgConv(inpedfilepath,outhtmlname,outfilepath):
|
---|
| 32 | """convert linkage ped/map to fbat"""
|
---|
| 33 | recode={'A':'1','C':'2','G':'3','T':'4','N':'0','0':'0','1':'1','2':'2','3':'3','4':'4'}
|
---|
| 34 | basename = os.path.split(inpedfilepath)[-1] # get basename
|
---|
| 35 | inmap = '%s.map' % inpedfilepath
|
---|
| 36 | inped = '%s.ped' % inpedfilepath
|
---|
| 37 | outf = '%s.ped' % basename # note the fbat exe insists that this is the extension for the ped data
|
---|
| 38 | outfpath = os.path.join(outfilepath,outf) # where to write the fbat format file to
|
---|
| 39 | try:
|
---|
| 40 | mf = file(inmap,'r')
|
---|
| 41 | except:
|
---|
| 42 | sys.stderr.write('%s cannot open inmap file %s - do you have permission?\n' % (prog,inmap))
|
---|
| 43 | sys.exit(1)
|
---|
| 44 | try:
|
---|
| 45 | rsl = [x.split()[1] for x in mf]
|
---|
| 46 | except:
|
---|
| 47 | sys.stderr.write('## cannot parse %s' % inmap)
|
---|
| 48 | sys.exit(1)
|
---|
| 49 | try:
|
---|
| 50 | os.makedirs(outfilepath)
|
---|
| 51 | except:
|
---|
| 52 | pass # already exists
|
---|
| 53 | head = ' '.join(rsl) # list of rs numbers
|
---|
| 54 | # TODO add anno to rs but fbat will prolly barf?
|
---|
| 55 | pedf = file(inped,'r')
|
---|
| 56 | o = file(outfpath,'w',2**20)
|
---|
| 57 | o.write(head)
|
---|
| 58 | o.write('\n')
|
---|
| 59 | for i,row in enumerate(pedf):
|
---|
| 60 | if i == 0:
|
---|
| 61 | lrow = row.split()
|
---|
| 62 | try:
|
---|
| 63 | x = [int(x) for x in lrow[10:50]] # look for non numeric codes
|
---|
| 64 | except:
|
---|
| 65 | dorecode = 1
|
---|
| 66 | if dorecode:
|
---|
| 67 | lrow = row.strip().split()
|
---|
| 68 | p = lrow[:6]
|
---|
| 69 | g = lrow[6:]
|
---|
| 70 | gc = [recode.get(x,'0') for x in g]
|
---|
| 71 | lrow = p+gc
|
---|
| 72 | row = '%s\n' % ' '.join(lrow)
|
---|
| 73 | o.write(row)
|
---|
| 74 | o.close()
|
---|
| 75 |
|
---|
| 76 |
|
---|
| 77 | def main():
|
---|
| 78 | """call fbater
|
---|
| 79 | need to work with rgenetics composite datatypes
|
---|
| 80 | so in and out are html files with data in extrafiles path
|
---|
| 81 | <command interpreter="python">rg_convert_lped_fped.py '$input1/$input1.metadata.base_name'
|
---|
| 82 | '$output1' '$output1.extra_files_path'
|
---|
| 83 | </command>
|
---|
| 84 | """
|
---|
| 85 | nparm = 3
|
---|
| 86 | if len(sys.argv) < nparm:
|
---|
| 87 | sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm))
|
---|
| 88 | sys.exit(1)
|
---|
| 89 | inpedfilepath = sys.argv[1]
|
---|
| 90 | outhtmlname = sys.argv[2]
|
---|
| 91 | outfilepath = sys.argv[3]
|
---|
| 92 | try:
|
---|
| 93 | os.makedirs(outfilepath)
|
---|
| 94 | except:
|
---|
| 95 | pass
|
---|
| 96 | rgConv(inpedfilepath,outhtmlname,outfilepath)
|
---|
| 97 | f = file(outhtmlname,'w')
|
---|
| 98 | f.write(galhtmlprefix % prog)
|
---|
| 99 | flist = os.listdir(outfilepath)
|
---|
| 100 | print '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
|
---|
| 101 | f.write('<div>## Rgenetics: http://rgenetics.org Galaxy Tools %s %s\n<ol>' % (prog,timenow()))
|
---|
| 102 | for i, data in enumerate( flist ):
|
---|
| 103 | f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
|
---|
| 104 | f.write("</div></body></html>")
|
---|
| 105 | f.close()
|
---|
| 106 |
|
---|
| 107 |
|
---|
| 108 |
|
---|
| 109 | if __name__ == "__main__":
|
---|
| 110 | main()
|
---|