[2] | 1 | """ |
---|
| 2 | Provides utilities for working with GFF files. |
---|
| 3 | """ |
---|
| 4 | |
---|
| 5 | from bx.intervals.io import NiceReaderWrapper, GenomicInterval |
---|
| 6 | |
---|
| 7 | class GFFReaderWrapper( NiceReaderWrapper ): |
---|
| 8 | """ |
---|
| 9 | Reader wrapper converts GFF format--starting and ending coordinates are 1-based, closed--to the |
---|
| 10 | 'traditional'/BED interval format--0 based, half-open. This is useful when using GFF files as inputs |
---|
| 11 | to tools that expect traditional interval format. |
---|
| 12 | """ |
---|
| 13 | def parse_row( self, line ): |
---|
| 14 | interval = GenomicInterval( self, line.split( "\t" ), self.chrom_col, self.start_col, self.end_col, \ |
---|
| 15 | self.strand_col, self.default_strand, fix_strand=self.fix_strand ) |
---|
| 16 | interval = convert_gff_coords_to_bed( interval ) |
---|
| 17 | return interval |
---|
| 18 | |
---|
| 19 | def convert_bed_coords_to_gff( interval ): |
---|
| 20 | """ |
---|
| 21 | Converts an interval object's coordinates from BED format to GFF format. Accepted object types include |
---|
| 22 | GenomicInterval and list (where the first element in the list is the interval's start, and the second |
---|
| 23 | element is the interval's end). |
---|
| 24 | """ |
---|
| 25 | if type( interval ) is GenomicInterval: |
---|
| 26 | interval.start += 1 |
---|
| 27 | elif type ( interval ) is list: |
---|
| 28 | interval[ 0 ] += 1 |
---|
| 29 | return interval |
---|
| 30 | |
---|
| 31 | def convert_gff_coords_to_bed( interval ): |
---|
| 32 | """ |
---|
| 33 | Converts an interval object's coordinates from GFF format to BED format. Accepted object types include |
---|
| 34 | GenomicInterval and list (where the first element in the list is the interval's start, and the second |
---|
| 35 | element is the interval's end). |
---|
| 36 | """ |
---|
| 37 | if type( interval ) is GenomicInterval: |
---|
| 38 | interval.start -= 1 |
---|
| 39 | elif type ( interval ) is list: |
---|
| 40 | interval[ 0 ] -= 1 |
---|
| 41 | return interval |
---|
| 42 | |
---|
| 43 | def parse_gff_attributes( attr_str ): |
---|
| 44 | """ |
---|
| 45 | Parses a GFF attribute string and returns a dictionary of name-value pairs. |
---|
| 46 | The general format for a GFF attribute string is name1 "value1" ; name2 "value2" |
---|
| 47 | """ |
---|
| 48 | attributes_list = attr_str.split(";") |
---|
| 49 | attributes = {} |
---|
| 50 | for name_value_pair in attributes_list: |
---|
| 51 | pair = name_value_pair.strip().split(" ") |
---|
| 52 | if pair == '': |
---|
| 53 | continue |
---|
| 54 | name = pair[0].strip() |
---|
| 55 | if name == '': |
---|
| 56 | continue |
---|
| 57 | # Need to strip double quote from values |
---|
| 58 | value = pair[1].strip(" \"") |
---|
| 59 | attributes[ name ] = value |
---|
| 60 | return attributes |
---|