[2] | 1 | #!/usr/bin/env python |
---|
| 2 | """ |
---|
| 3 | Generate indices for track browsing of an interval file. |
---|
| 4 | |
---|
| 5 | usage: %prog bed_file out_directory |
---|
| 6 | -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in interval file |
---|
| 7 | """ |
---|
| 8 | import sys |
---|
| 9 | from galaxy import eggs |
---|
| 10 | import pkg_resources; pkg_resources.require( "bx-python" ) |
---|
| 11 | from bx.intervals import io |
---|
| 12 | from bx.cookbook import doc_optparse |
---|
| 13 | import psyco_full |
---|
| 14 | import commands |
---|
| 15 | import os |
---|
| 16 | from os import environ |
---|
| 17 | import tempfile |
---|
| 18 | from bisect import bisect |
---|
| 19 | |
---|
| 20 | def divide( intervals, out_path ): |
---|
| 21 | manifest = {} |
---|
| 22 | current_file = None |
---|
| 23 | lastchrom = "" |
---|
| 24 | for line in intervals: |
---|
| 25 | try: |
---|
| 26 | chrom = line.chrom |
---|
| 27 | except AttributeError, e: |
---|
| 28 | continue |
---|
| 29 | manifest[chrom] = max(manifest.get(chrom,0),line.end) |
---|
| 30 | if not lastchrom == chrom: |
---|
| 31 | if current_file: |
---|
| 32 | current_file.close() |
---|
| 33 | current_file = open( os.path.join( out_path, "%s" % chrom), "a" ) |
---|
| 34 | print >> current_file, "\t".join(line) |
---|
| 35 | lastchrom = chrom |
---|
| 36 | if current_file: |
---|
| 37 | current_file.close() |
---|
| 38 | return manifest |
---|
| 39 | |
---|
| 40 | if __name__ == "__main__": |
---|
| 41 | options, args = doc_optparse.parse( __doc__ ) |
---|
| 42 | try: |
---|
| 43 | chr_col_1, start_col_1, end_col_1, strand_col_1 = [int(x)-1 for x in options.cols1.split(',')] |
---|
| 44 | in_fname, out_path = args |
---|
| 45 | except: |
---|
| 46 | doc_optparse.exception() |
---|
| 47 | |
---|
| 48 | # Sort through a tempfile first |
---|
| 49 | temp_file = tempfile.NamedTemporaryFile(mode="r") |
---|
| 50 | environ['LC_ALL'] = 'POSIX' |
---|
| 51 | commandline = "sort -f -n -k %d -k %d -k %d -o %s %s" % (chr_col_1+1,start_col_1+1,end_col_1+1, temp_file.name, in_fname) |
---|
| 52 | errorcode, stdout = commands.getstatusoutput(commandline) |
---|
| 53 | |
---|
| 54 | temp_file.seek(0) |
---|
| 55 | interval = io.NiceReaderWrapper( temp_file, |
---|
| 56 | chrom_col=chr_col_1, |
---|
| 57 | start_col=start_col_1, |
---|
| 58 | end_col=end_col_1, |
---|
| 59 | strand_col=strand_col_1, |
---|
| 60 | fix_strand=True ) |
---|
| 61 | manifest = divide( interval, out_path ) |
---|
| 62 | manifest_file = open( os.path.join( out_path, "manifest.tab" ),"w" ) |
---|
| 63 | for key, value in manifest.items(): |
---|
| 64 | print >> manifest_file, "%s\t%s" % (key, value) |
---|
| 65 | manifest_file.close() |
---|
| 66 | temp_file.close() |
---|