#!/usr/bin/env python

"""
Connects to a UCSC table browser and scrapes chrominfo for every build
specified by an input file (such as one output by parse_builds.py).
If not input file specified, it will connect using parse_builds.py to
retrieve a list of available builds.

All chromInfo is placed in a path with the convention
{dbpath}/buildname.len

Usage:
python build_chrom_db.py dbpath/ [builds_file]
"""

import sys
import parse_builds
import urllib
import fileinput

def getchrominfo(url, db):
    tableURL = "http://genome-test.cse.ucsc.edu/cgi-bin/hgTables?"
    URL = tableURL + urllib.urlencode({
        "clade" : "",
        "org" : "",
        "db" : db,
        "hgta_outputType": "primaryTable",
        "hgta_group" : "allTables",
        "hgta_table" : "chromInfo",
        "hgta_track" : db,
        "hgta_regionType":"",
        "position":"",
        "hgta_doTopSubmit" : "get info"})
    page = urllib.urlopen(URL)
    for line in page:
        line = line.rstrip( "\r\n" )
        if line.startswith("#"): continue
        fields = line.split("\t")
        if len(fields) > 1:
            yield [fields[0], fields[1]]

if __name__ == "__main__":
    if len(sys.argv) == 1:
        print "Path to place chromInfo tables must be specified."
        sys.exit(1)
    dbpath = sys.argv[1]
    builds = []
    if len(sys.argv) > 2:
        try:
            buildfile = fileinput.FileInput(sys.argv[2])
            for line in buildfile:
                if line.startswith("#"): continue
                builds.append(line.split("\t")[0])
        except:
            print "Bad input file."
            sys.exit(1)
    else:
        try:
            for build in parse_builds.getbuilds("http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"):
                builds.append(build[0])
        except:
            print "Unable to retrieve builds."
            sys.exit(1)
    for build in builds:
        if build == "?":continue # no lengths for unspecified chrom
        outfile = open(dbpath + build + ".len", "w")
        print "Retrieving "+build
        for chrominfo in getchrominfo("http://genome-test.cse.ucsc.edu/cgi-bin/hgTables?",build):
            print >> outfile,"\t".join(chrominfo)
        outfile.close()