[2] | 1 | # for rgenetics - lped to pbed
|
---|
| 2 | # where to stop with converters
|
---|
| 3 | # pbed might be central
|
---|
| 4 | # eg lped/eigen/fbat/snpmatrix all to pbed
|
---|
| 5 | # and pbed to lped/eigen/fbat/snpmatrix ?
|
---|
| 6 | # that's a lot of converters
|
---|
| 7 | import sys,os,time,subprocess
|
---|
| 8 |
|
---|
| 9 |
|
---|
| 10 | prog = os.path.split(sys.argv[0])[-1]
|
---|
| 11 | myversion = 'Oct 10 2009'
|
---|
| 12 |
|
---|
| 13 | galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
|
---|
| 14 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
---|
| 15 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
---|
| 16 | <head>
|
---|
| 17 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
---|
| 18 | <meta name="generator" content="Galaxy %s tool output - see http://g2.trac.bx.psu.edu/" />
|
---|
| 19 | <title></title>
|
---|
| 20 | <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
|
---|
| 21 | </head>
|
---|
| 22 | <body>
|
---|
| 23 | <div class="document">
|
---|
| 24 | """
|
---|
| 25 |
|
---|
| 26 | def timenow():
|
---|
| 27 | """return current time as a string
|
---|
| 28 | """
|
---|
| 29 | return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
|
---|
| 30 |
|
---|
| 31 | def getMissval(inped=''):
|
---|
| 32 | """
|
---|
| 33 | read some lines...ugly hack - try to guess missing value
|
---|
| 34 | should be N or 0 but might be . or -
|
---|
| 35 | """
|
---|
| 36 | commonmissvals = {'N':'N','0':'0','n':'n','9':'9','-':'-','.':'.'}
|
---|
| 37 | try:
|
---|
| 38 | f = file(inped,'r')
|
---|
| 39 | except:
|
---|
| 40 | return None # signal no in file
|
---|
| 41 | missval = None
|
---|
| 42 | while missval == None: # doggedly continue until we solve the mystery
|
---|
| 43 | try:
|
---|
| 44 | l = f.readline()
|
---|
| 45 | except:
|
---|
| 46 | break
|
---|
| 47 | ll = l.split()[6:] # ignore pedigree stuff
|
---|
| 48 | for c in ll:
|
---|
| 49 | if commonmissvals.get(c,None):
|
---|
| 50 | missval = c
|
---|
| 51 | f.close()
|
---|
| 52 | return missval
|
---|
| 53 | if not missval:
|
---|
| 54 | missval = 'N' # punt
|
---|
| 55 | close(f)
|
---|
| 56 | return missval
|
---|
| 57 |
|
---|
| 58 | def rgConv(inpedfilepath,outhtmlname,outfilepath,plink):
|
---|
| 59 | """
|
---|
| 60 | """
|
---|
| 61 | pedf = '%s.ped' % inpedfilepath
|
---|
| 62 | basename = os.path.split(inpedfilepath)[-1] # get basename
|
---|
| 63 | outroot = os.path.join(outfilepath,basename)
|
---|
| 64 | missval = getMissval(inped = pedf)
|
---|
| 65 | if not missval:
|
---|
| 66 | print '### lped_to_pbed_converter.py cannot identify missing value in %s' % pedf
|
---|
| 67 | missval = '0'
|
---|
| 68 | cl = '%s --noweb --file %s --make-bed --out %s --missing-genotype %s' % (plink,inpedfilepath,outroot,missval)
|
---|
| 69 | p = subprocess.Popen(cl,shell=True,cwd=outfilepath)
|
---|
| 70 | retval = p.wait() # run plink
|
---|
| 71 |
|
---|
| 72 |
|
---|
| 73 |
|
---|
| 74 |
|
---|
| 75 | def main():
|
---|
| 76 | """
|
---|
| 77 | need to work with rgenetics composite datatypes
|
---|
| 78 | so in and out are html files with data in extrafiles path
|
---|
| 79 | <command interpreter="python">lped_to_pbed_converter.py '$input1/$input1.metadata.base_name'
|
---|
| 80 | '$output1' '$output1.extra_files_path' '${GALAXY_DATA_INDEX_DIR}/rg/bin/plink'
|
---|
| 81 | </command>
|
---|
| 82 | """
|
---|
| 83 | nparm = 4
|
---|
| 84 | if len(sys.argv) < nparm:
|
---|
| 85 | sys.stderr.write('## %s called with %s - needs %d parameters \n' % (prog,sys.argv,nparm))
|
---|
| 86 | sys.exit(1)
|
---|
| 87 | inpedfilepath = sys.argv[1]
|
---|
| 88 | outhtmlname = sys.argv[2]
|
---|
| 89 | outfilepath = sys.argv[3]
|
---|
| 90 | try:
|
---|
| 91 | os.makedirs(outfilepath)
|
---|
| 92 | except:
|
---|
| 93 | pass
|
---|
| 94 | plink = sys.argv[4]
|
---|
| 95 | rgConv(inpedfilepath,outhtmlname,outfilepath,plink)
|
---|
| 96 | f = file(outhtmlname,'w')
|
---|
| 97 | f.write(galhtmlprefix % prog)
|
---|
| 98 | flist = os.listdir(outfilepath)
|
---|
| 99 | s = '## Rgenetics: http://rgenetics.org Galaxy Tools %s %s' % (prog,timenow()) # becomes info
|
---|
| 100 | print s
|
---|
| 101 | f.write('<div>%s\n<ol>' % (s))
|
---|
| 102 | for i, data in enumerate( flist ):
|
---|
| 103 | f.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
|
---|
| 104 | f.write("</div></body></html>")
|
---|
| 105 | f.close()
|
---|
| 106 |
|
---|
| 107 |
|
---|
| 108 |
|
---|
| 109 | if __name__ == "__main__":
|
---|
| 110 | main()
|
---|