[2] | 1 | #!/usr/bin/env python |
---|
| 2 | """ |
---|
| 3 | Build a UCSC genome browser custom track file |
---|
| 4 | """ |
---|
| 5 | |
---|
| 6 | import sys, os |
---|
| 7 | |
---|
| 8 | assert sys.version_info[:2] >= ( 2, 4 ) |
---|
| 9 | |
---|
| 10 | def stop_err( msg ): |
---|
| 11 | sys.stderr.write( msg ) |
---|
| 12 | sys.exit() |
---|
| 13 | |
---|
| 14 | args = sys.argv[1:] |
---|
| 15 | |
---|
| 16 | out_fname = args.pop(0) |
---|
| 17 | out = open( out_fname, "w" ) |
---|
| 18 | |
---|
| 19 | num_tracks = 0 |
---|
| 20 | skipped_lines = 0 |
---|
| 21 | first_invalid_line = 0 |
---|
| 22 | while args: |
---|
| 23 | # Suck in one dataset worth of arguments |
---|
| 24 | in_fname = args.pop(0) |
---|
| 25 | type = args.pop(0) |
---|
| 26 | colspec = args.pop(0) |
---|
| 27 | name = args.pop(0) |
---|
| 28 | description = args.pop(0) |
---|
| 29 | color = args.pop(0).replace( '-', ',' ) |
---|
| 30 | visibility = args.pop(0) |
---|
| 31 | # Do the work |
---|
| 32 | if type == "wig": |
---|
| 33 | print >> out, '''track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s''' \ |
---|
| 34 | % ( name, description, color, visibility ) |
---|
| 35 | for i, line in enumerate( file( in_fname ) ): |
---|
| 36 | print >> out, line, |
---|
| 37 | print >> out |
---|
| 38 | elif type == "bed": |
---|
| 39 | print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \ |
---|
| 40 | % ( name, description, color, visibility ) |
---|
| 41 | for i, line in enumerate( file( in_fname ) ): |
---|
| 42 | print >> out, line, |
---|
| 43 | print >> out |
---|
| 44 | else: |
---|
| 45 | # Assume type is interval (don't pass this script anything else!) |
---|
| 46 | try: |
---|
| 47 | c, s, e, st = [ int( x ) - 1 for x in colspec.split( "," ) ] |
---|
| 48 | except: |
---|
| 49 | try: |
---|
| 50 | c, s, e = [ int( x ) - 1 for x in colspec.split( "," )[:3] ] |
---|
| 51 | st = -1 #strand column is absent |
---|
| 52 | except: |
---|
| 53 | stop_err( "Columns in interval file invalid for UCSC custom track." ) |
---|
| 54 | |
---|
| 55 | print >> out, '''track name="%s" description="%s" color=%s visibility=%s''' \ |
---|
| 56 | % ( name, description, color, visibility ) |
---|
| 57 | i = 0 |
---|
| 58 | for i, line in enumerate( file( in_fname ) ): |
---|
| 59 | line = line.rstrip( '\r\n' ) |
---|
| 60 | if line and not line.startswith( '#' ): |
---|
| 61 | fields = line.split( "\t" ) |
---|
| 62 | if st > 0: |
---|
| 63 | #strand column is present |
---|
| 64 | try: |
---|
| 65 | print >> out, "%s\t%s\t%s\t%d\t0\t%s" % ( fields[c], fields[s], fields[e], i, fields[st] ) |
---|
| 66 | except: |
---|
| 67 | skipped_lines += 1 |
---|
| 68 | if not first_invalid_line: |
---|
| 69 | first_invalid_line = i+1 |
---|
| 70 | else: |
---|
| 71 | try: |
---|
| 72 | print >> out, "%s\t%s\t%s" % ( fields[c], fields[s], fields[e] ) |
---|
| 73 | except: |
---|
| 74 | skipped_lines += 1 |
---|
| 75 | if not first_invalid_line: |
---|
| 76 | first_invalid_line = i+1 |
---|
| 77 | print >> out |
---|
| 78 | num_tracks += 1 |
---|
| 79 | |
---|
| 80 | out.close() |
---|
| 81 | |
---|
| 82 | print "Generated a custom track containing %d subtracks." % num_tracks |
---|
| 83 | if skipped_lines: |
---|
| 84 | print "Skipped %d invalid lines starting at #%d" % ( skipped_lines, first_invalid_line ) |
---|
| 85 | |
---|
| 86 | |
---|
| 87 | |
---|