root/galaxy-central/cron/build_chrom_db.py @ 3

リビジョン 2, 2.2 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3"""
4Connects to a UCSC table browser and scrapes chrominfo for every build
5specified by an input file (such as one output by parse_builds.py).
6If not input file specified, it will connect using parse_builds.py to
7retrieve a list of available builds.
8
9All chromInfo is placed in a path with the convention
10{dbpath}/buildname.len
11
12Usage:
13python build_chrom_db.py dbpath/ [builds_file]
14"""
15
16import sys
17import parse_builds
18import urllib
19import fileinput
20
21def getchrominfo(url, db):
22    tableURL = "http://genome-test.cse.ucsc.edu/cgi-bin/hgTables?"
23    URL = tableURL + urllib.urlencode({
24        "clade" : "",
25        "org" : "",
26        "db" : db,
27        "hgta_outputType": "primaryTable",
28        "hgta_group" : "allTables",
29        "hgta_table" : "chromInfo",
30        "hgta_track" : db,
31        "hgta_regionType":"",
32        "position":"",
33        "hgta_doTopSubmit" : "get info"})
34    page = urllib.urlopen(URL)
35    for line in page:
36        line = line.rstrip( "\r\n" )
37        if line.startswith("#"): continue
38        fields = line.split("\t")
39        if len(fields) > 1:
40            yield [fields[0], fields[1]]
41
42if __name__ == "__main__":
43    if len(sys.argv) == 1:
44        print "Path to place chromInfo tables must be specified."
45        sys.exit(1)
46    dbpath = sys.argv[1]
47    builds = []
48    if len(sys.argv) > 2:
49        try:
50            buildfile = fileinput.FileInput(sys.argv[2])
51            for line in buildfile:
52                if line.startswith("#"): continue
53                builds.append(line.split("\t")[0])
54        except:
55            print "Bad input file."
56            sys.exit(1)
57    else:
58        try:
59            for build in parse_builds.getbuilds("http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"):
60                builds.append(build[0])
61        except:
62            print "Unable to retrieve builds."
63            sys.exit(1)
64    for build in builds:
65        if build == "?":continue # no lengths for unspecified chrom
66        outfile = open(dbpath + build + ".len", "w")
67        print "Retrieving "+build
68        for chrominfo in getchrominfo("http://genome-test.cse.ucsc.edu/cgi-bin/hgTables?",build):
69            print >> outfile,"\t".join(chrominfo)
70        outfile.close()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。