root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/axt_to_lav.py @ 3

リビジョン 3, 3.9 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4Application to convert AXT file to LAV file. Reads an AXT file from standard
5input and writes a LAV file to standard out; some statistics are written to
6standard error.
7
8usage: %prog primary_spec secondary_spec [--silent] < axt_file > lav_file
9
10Each spec is of the form seq_file[:species_name]:lengths_file.
11
12- seq_file should be a format string for the file names for the individual
13  sequences, with %s to be replaced by the alignment's src field.  For
14  example, "hg18/%s.nib" would prescribe files named "hg18/chr1.nib",
15  "hg18/chr2.nib", etc.
16
17- species_name is optional.  If present, it is prepended to the alignment's
18  src field.
19
20- Lengths files provide the length of each chromosome (lav format needs this
21  information but axt file does not contain it).  The format is a series of
22  lines of the form:
23
24  <chromosome name> <length>
25
26  The chromosome field in each axt block must match some <chromosome name> in
27  the lengths file.
28"""
29
30__author__ = "Bob Harris (rsharris@bx.psu.edu)"
31
32import sys
33import copy
34import bx.align.axt
35import bx.align.lav
36
37
38def usage(s=None):
39        message = __doc__
40        if (s == None): sys.exit (message)
41        else:           sys.exit ("%s\n%s" % (s,message))
42
43
44def main():
45        global debug
46
47        # parse the command line
48
49        primary   = None
50        secondary = None
51        silent    = False
52
53        # pick off options
54
55        args = sys.argv[1:]
56        while (len(args) > 0):
57                arg = args.pop(0)
58                val = None
59                fields = arg.split("=",1)
60                if (len(fields) == 2):
61                        arg = fields[0]
62                        val = fields[1]
63                        if (val == ""):
64                                usage("missing a value in %s=" % arg)
65
66                if (arg == "--silent") and (val == None):
67                        silent = True
68                elif (primary == None) and (val == None):
69                        primary = arg
70                elif (secondary == None) and (val == None):
71                        secondary = arg
72                else:
73                        usage("unknown argument: %s" % arg)
74
75        if (primary == None):
76                usage("missing primary file name and length")
77
78        if (secondary == None):
79                usage("missing secondary file name and length")
80
81        try:
82                (primaryFile,primary,primaryLengths) = parse_spec(primary)
83        except:
84                usage("bad primary spec (must be seq_file[:species_name]:lengths_file")
85
86        try:
87                (secondaryFile,secondary,secondaryLengths) = parse_spec(secondary)
88        except:
89                usage("bad secondary spec (must be seq_file[:species_name]:lengths_file")
90
91        # read the lengths
92
93        speciesToLengths = {}
94        speciesToLengths[primary]   = read_lengths (primaryLengths)
95        speciesToLengths[secondary] = read_lengths (secondaryLengths)
96
97        # read the alignments
98
99        out = bx.align.lav.Writer(sys.stdout, \
100                        attributes = { "name_format_1" : primaryFile,
101                                       "name_format_2" : secondaryFile })
102
103        axtsRead = 0
104        axtsWritten = 0
105        for axtBlock in bx.align.axt.Reader(sys.stdin, \
106                        species_to_lengths = speciesToLengths,
107                        species1           = primary,
108                        species2           = secondary,
109                        support_ids        = True):
110                axtsRead += 1
111                out.write (axtBlock)
112                axtsWritten += 1
113
114        out.close()
115
116        if (not silent):
117                sys.stderr.write ("%d blocks read, %d written\n" % (axtsRead,axtsWritten))
118
119def parse_spec(spec): # returns (seq_file,species_name,lengths_file)
120        fields = spec.split(":")
121        if   (len(fields) == 2): return (fields[0],"",fields[1])
122        elif (len(fields) == 3): return (fields[0],fields[1],fields[2])
123        else:                    raise ValueError
124
125def read_lengths (fileName):
126
127        chromToLength = {}
128
129        f = file (fileName, "r")
130
131        for lineNumber,line in enumerate(f):
132                line = line.strip()
133                if (line == ""): continue
134                if (line.startswith("#")): continue
135
136                fields = line.split ()
137                if (len(fields) != 2):
138                        raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
139
140                chrom = fields[0]
141                try:
142                        length = int(fields[1])
143                except:
144                        raise "bad lengths line (%s:%d): %s" % (fileName,lineNumber,line)
145
146                if (chrom in chromToLength):
147                        raise "%s appears more than once (%s:%d): %s" \
148                            % (chrom,fileName,lineNumber)
149
150                chromToLength[chrom] = length
151
152        f.close ()
153
154        return chromToLength
155
156
157if __name__ == "__main__": main()
158
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。