1 | """ |
---|
2 | Provides utilities for working with GFF files. |
---|
3 | """ |
---|
4 | |
---|
5 | from bx.intervals.io import NiceReaderWrapper, GenomicInterval |
---|
6 | |
---|
7 | class GFFReaderWrapper( NiceReaderWrapper ): |
---|
8 | """ |
---|
9 | Reader wrapper converts GFF format--starting and ending coordinates are 1-based, closed--to the |
---|
10 | 'traditional'/BED interval format--0 based, half-open. This is useful when using GFF files as inputs |
---|
11 | to tools that expect traditional interval format. |
---|
12 | """ |
---|
13 | def parse_row( self, line ): |
---|
14 | interval = GenomicInterval( self, line.split( "\t" ), self.chrom_col, self.start_col, self.end_col, \ |
---|
15 | self.strand_col, self.default_strand, fix_strand=self.fix_strand ) |
---|
16 | interval = convert_gff_coords_to_bed( interval ) |
---|
17 | return interval |
---|
18 | |
---|
19 | def convert_bed_coords_to_gff( interval ): |
---|
20 | """ |
---|
21 | Converts an interval object's coordinates from BED format to GFF format. Accepted object types include |
---|
22 | GenomicInterval and list (where the first element in the list is the interval's start, and the second |
---|
23 | element is the interval's end). |
---|
24 | """ |
---|
25 | if type( interval ) is GenomicInterval: |
---|
26 | interval.start += 1 |
---|
27 | elif type ( interval ) is list: |
---|
28 | interval[ 0 ] += 1 |
---|
29 | return interval |
---|
30 | |
---|
31 | def convert_gff_coords_to_bed( interval ): |
---|
32 | """ |
---|
33 | Converts an interval object's coordinates from GFF format to BED format. Accepted object types include |
---|
34 | GenomicInterval and list (where the first element in the list is the interval's start, and the second |
---|
35 | element is the interval's end). |
---|
36 | """ |
---|
37 | if type( interval ) is GenomicInterval: |
---|
38 | interval.start -= 1 |
---|
39 | elif type ( interval ) is list: |
---|
40 | interval[ 0 ] -= 1 |
---|
41 | return interval |
---|
42 | |
---|
43 | def parse_gff_attributes( attr_str ): |
---|
44 | """ |
---|
45 | Parses a GFF attribute string and returns a dictionary of name-value pairs. |
---|
46 | The general format for a GFF attribute string is name1 "value1" ; name2 "value2" |
---|
47 | """ |
---|
48 | attributes_list = attr_str.split(";") |
---|
49 | attributes = {} |
---|
50 | for name_value_pair in attributes_list: |
---|
51 | pair = name_value_pair.strip().split(" ") |
---|
52 | if pair == '': |
---|
53 | continue |
---|
54 | name = pair[0].strip() |
---|
55 | if name == '': |
---|
56 | continue |
---|
57 | # Need to strip double quote from values |
---|
58 | value = pair[1].strip(" \"") |
---|
59 | attributes[ name ] = value |
---|
60 | return attributes |
---|