[2] | 1 | #!/usr/bin/env python
|
---|
| 2 | #Dan Blankenberg
|
---|
| 3 |
|
---|
| 4 | import sys
|
---|
| 5 | from galaxy import eggs
|
---|
| 6 | import pkg_resources; pkg_resources.require( "bx-python" )
|
---|
| 7 | import bx.intervals.io
|
---|
| 8 |
|
---|
| 9 | assert sys.version_info[:2] >= ( 2, 4 )
|
---|
| 10 |
|
---|
| 11 | def stop_err( msg ):
|
---|
| 12 | sys.stderr.write( msg )
|
---|
| 13 | sys.exit()
|
---|
| 14 |
|
---|
| 15 | def __main__():
|
---|
| 16 | output_name = sys.argv[1]
|
---|
| 17 | input_name = sys.argv[2]
|
---|
| 18 | try:
|
---|
| 19 | chromCol = int( sys.argv[3] ) - 1
|
---|
| 20 | except:
|
---|
| 21 | stop_err( "'%s' is an invalid chrom column, correct the column settings before attempting to convert the data format." % str( sys.argv[3] ) )
|
---|
| 22 | try:
|
---|
| 23 | startCol = int( sys.argv[4] ) - 1
|
---|
| 24 | except:
|
---|
| 25 | stop_err( "'%s' is an invalid start column, correct the column settings before attempting to convert the data format." % str( sys.argv[4] ) )
|
---|
| 26 | try:
|
---|
| 27 | endCol = int( sys.argv[5] ) - 1
|
---|
| 28 | except:
|
---|
| 29 | stop_err( "'%s' is an invalid end column, correct the column settings before attempting to convert the data format." % str( sys.argv[5] ) )
|
---|
| 30 | try:
|
---|
| 31 | strandCol = int( sys.argv[6] ) - 1
|
---|
| 32 | except:
|
---|
| 33 | strandCol = -1
|
---|
| 34 | try:
|
---|
| 35 | nameCol = int( sys.argv[7] ) - 1
|
---|
| 36 | except:
|
---|
| 37 | nameCol = -1
|
---|
| 38 | skipped_lines = 0
|
---|
| 39 | first_skipped_line = 0
|
---|
| 40 | out = open( output_name,'w' )
|
---|
| 41 | count = 0
|
---|
| 42 | for count, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( input_name, 'r' ), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False ) ):
|
---|
| 43 | try:
|
---|
| 44 | if nameCol >= 0:
|
---|
| 45 | name = region.fields[nameCol]
|
---|
| 46 | else:
|
---|
| 47 | raise IndexError
|
---|
| 48 | except:
|
---|
| 49 | name = "region_%i" % count
|
---|
| 50 | try:
|
---|
| 51 |
|
---|
| 52 | out.write( "%s\t%i\t%i\t%s\t%i\t%s\n" % ( region.chrom, region.start, region.end, name, 0, region.strand ) )
|
---|
| 53 | except:
|
---|
| 54 | skipped_lines += 1
|
---|
| 55 | if not first_skipped_line:
|
---|
| 56 | first_skipped_line = count + 1
|
---|
| 57 | out.close()
|
---|
| 58 | print "%i regions converted to BED." % ( count + 1 - skipped_lines )
|
---|
| 59 | if skipped_lines > 0:
|
---|
| 60 | print "Skipped %d blank or invalid lines starting with line # %d." % ( skipped_lines, first_skipped_line )
|
---|
| 61 |
|
---|
| 62 | if __name__ == "__main__": __main__()
|
---|