root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/axt_to_maf.py

リビジョン 3, 3.6 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4Application to convert AXT file to MAF file. Reads an AXT file from standard
5input and writes a MAF file to standard out;  some statistics are written to
6standard error.
7
8axt_to_maf primary:lengths_file secondary:lengths_file < axt_file > maf_file
9  --silent: prevents stats report
10 
11  Lengths files provide the length of each chromosome (maf format needs this
12  information but axt file does not contain it).  The format is a series of
13  lines of the form:
14 
15    <chromosome name> <length>
16 
17  The chromosome field in each axt block must match some <chromosome name> in
18  the lengths file.
19"""
20
21__author__ = "Bob Harris (rsharris@bx.psu.edu)"
22
23import sys
24import copy
25import bx.align.axt
26import bx.align.maf
27
28def usage(s=None):
29        message = __doc__
30        if (s == None): sys.exit (message)
31        else:           sys.exit ("%s\n%s" % (s,message))
32
33
34def main():
35        global debug
36
37        ##########
38        # parse the command line
39        ##########
40
41        primary   = None
42        secondary = None
43        silent    = False
44
45        # pick off options
46
47        args = sys.argv[1:]
48        while (len(args) > 0):
49                arg = args.pop(0)
50                val = None
51                fields = arg.split("=",1)
52                if (len(fields) == 2):
53                        arg = fields[0]
54                        val = fields[1]
55                        if (val == ""):
56                                usage("missing a value in %s=" % arg)
57
58                if (arg == "--silent") and (val == None):
59                        silent = True
60                elif (primary == None) and (val == None):
61                        primary = arg
62                elif (secondary == None) and (val == None):
63                        secondary = arg
64                else:
65                        usage("unknown argument: %s" % arg)
66
67        if (primary == None):
68                usage("missing primary species")
69
70        if (secondary == None):
71                usage("missing secondary species")
72
73        fields = primary.split(":")
74        if (len(fields) != 2):
75                usage("bad primary species (must be species:lengths_file")
76        primary = fields[0]
77        primaryLengths = fields[1]
78
79        fields = secondary.split(":")
80        if (len(fields) != 2):
81                usage("bad secondary species (must be species:lengths_file")
82        secondary = fields[0]
83        secondaryLengths = fields[1]
84
85        ##########
86        # read the lengths
87        ##########
88
89        speciesToLengths = {}
90        speciesToLengths[primary]   = read_lengths (primaryLengths)
91        speciesToLengths[secondary] = read_lengths (secondaryLengths)
92
93        ##########
94        # read the alignments
95        ##########
96
97        out = bx.align.maf.Writer(sys.stdout)
98
99        axtsRead = 0
100        axtsWritten = 0
101        for axtBlock in bx.align.axt.Reader(sys.stdin,\
102                        species_to_lengths = speciesToLengths,
103                        species1           = primary,
104                        species2           = secondary):
105                axtsRead += 1
106
107                p = axtBlock.get_component_by_src_start(primary)
108                if (p == None): continue
109                s = axtBlock.get_component_by_src_start(secondary)
110                if (s == None): continue
111
112                mafBlock = bx.align.Alignment (axtBlock.score, axtBlock.attributes)
113                mafBlock.add_component (clone_component(p))
114                mafBlock.add_component (clone_component(s))
115
116                out.write (mafBlock)
117                axtsWritten += 1
118
119        if (not silent):
120                sys.stderr.write ("%d blocks read, %d written\n" % (axtsRead,axtsWritten))
121
122
123def clone_component(c):
124        return bx.align.Component (c.src, c.start, c.size, c.strand, c.src_size, \
125                                   copy.copy(c.text))
126
127
128def read_lengths (fileName):
129
130        chromToLength = {}
131
132        f = file (fileName, "r")
133
134        for lineNumber,line in enumerate(f):
135                line = line.strip()
136                if (line == ""): continue
137                if (line.startswith("#")): continue
138
139                fields = line.split ()
140                if (len(fields) != 2):
141                        raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
142
143                chrom = fields[0]
144                try:
145                        length = int(fields[1])
146                except:
147                        raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
148
149                if (chrom in chromToLength):
150                        raise "%s appears more than once (%s:%d): %s" \
151                            % (chrom,fileName,lineNumber)
152
153                chromToLength[chrom] = length
154
155        f.close ()
156
157        return chromToLength
158
159
160if __name__ == "__main__": main()
161
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。