root/galaxy-central/tools/filters/joinWrapper.py @ 2

リビジョン 2, 2.4 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2#Guruprasad Ananda
3"""
4This tool provides the UNIX "join" functionality.
5"""
6import sys, os, tempfile, subprocess
7
8def stop_err(msg):
9    sys.stderr.write(msg)
10    sys.exit()
11
12def main():
13    infile1 = sys.argv[1]
14    infile2 = sys.argv[2]
15    field1 = int(sys.argv[3])
16    field2 = int(sys.argv[4])
17    mode =sys.argv[5]
18    outfile = sys.argv[6]
19   
20    tmpfile1 = tempfile.NamedTemporaryFile()
21    tmpfile2 = tempfile.NamedTemporaryFile()
22   
23    try:
24        #Sort the two files based on specified fields
25        os.system("sort -t '    ' -k %d,%d -o %s %s" %(field1, field1, tmpfile1.name, infile1))
26        os.system("sort -t '    ' -k %d,%d -o %s %s" %(field2, field2, tmpfile2.name, infile2))
27    except Exception, exc:
28        stop_err( 'Initialization error -> %s' %str(exc) )
29       
30    option = ""
31    for line in file(tmpfile1.name):
32        line = line.strip()
33        if line:
34            elems = line.split('\t')
35            for j in range(1,len(elems)+1):
36                if j == 1:
37                    option = "1.1"
38                else:
39                    option = option + ",1." + str(j)
40            break
41   
42    #check if join has --version option. BSD join doens't have this option, while GNU join does.
43    #The return value in the latter case will be 0, and non-zero in the latter case.
44    ret = subprocess.call('join --version 2>/dev/null', shell=True)
45    # check if we are a version later than 7 of join. If so, we want to skip
46    # checking the order since join will raise an error with duplicated items in
47    # the two files being joined.
48    if ret == 0:
49        cl = subprocess.Popen(["join", "--version"], stdout=subprocess.PIPE)
50        (stdout, _) = cl.communicate()
51        version_line = stdout.split("\n")[0]
52        (version, _) = version_line.split()[-1].split(".")
53        if int(version) >= 7:
54            flags = "--nocheck-order"
55        else:
56            flags = ""
57    else:
58        flags = ""
59
60    if mode == "V":
61        cmdline = "join %s -t ' ' -v 1 -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
62    else:
63        cmdline = "join %s -t ' ' -o %s -1 %d -2 %d %s %s > %s" %(flags, option, field1, field2, tmpfile1.name, tmpfile2.name, outfile)
64   
65    try:
66        os.system(cmdline)
67    except Exception, exj:
68        stop_err('Error joining the two datasets -> %s' %str(exj))
69       
70if __name__ == "__main__":
71    main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。