root/galaxy-central/lib/galaxy/tools/util/gff_util.py

リビジョン 2, 2.3 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1"""
2Provides utilities for working with GFF files.
3"""
4
5from bx.intervals.io import NiceReaderWrapper, GenomicInterval
6
7class GFFReaderWrapper( NiceReaderWrapper ):
8    """
9    Reader wrapper converts GFF format--starting and ending coordinates are 1-based, closed--to the
10    'traditional'/BED interval format--0 based, half-open. This is useful when using GFF files as inputs
11    to tools that expect traditional interval format.
12    """
13    def parse_row( self, line ):
14        interval = GenomicInterval( self, line.split( "\t" ), self.chrom_col, self.start_col, self.end_col, \
15                                    self.strand_col, self.default_strand, fix_strand=self.fix_strand )
16        interval = convert_gff_coords_to_bed( interval )
17        return interval
18       
19def convert_bed_coords_to_gff( interval ):
20    """
21    Converts an interval object's coordinates from BED format to GFF format. Accepted object types include
22    GenomicInterval and list (where the first element in the list is the interval's start, and the second
23    element is the interval's end).
24    """
25    if type( interval ) is GenomicInterval:
26        interval.start += 1
27    elif type ( interval ) is list:
28        interval[ 0 ] += 1
29    return interval
30   
31def convert_gff_coords_to_bed( interval ):
32    """
33    Converts an interval object's coordinates from GFF format to BED format. Accepted object types include
34    GenomicInterval and list (where the first element in the list is the interval's start, and the second
35    element is the interval's end).
36    """
37    if type( interval ) is GenomicInterval:
38        interval.start -= 1
39    elif type ( interval ) is list:
40        interval[ 0 ] -= 1
41    return interval
42   
43def parse_gff_attributes( attr_str ):
44    """
45    Parses a GFF attribute string and returns a dictionary of name-value pairs.
46    The general format for a GFF attribute string is name1 "value1" ; name2 "value2"
47    """
48    attributes_list = attr_str.split(";")
49    attributes = {}
50    for name_value_pair in attributes_list:
51        pair = name_value_pair.strip().split(" ")
52        if pair == '':
53            continue
54        name = pair[0].strip()
55        if name == '':
56            continue
57        # Need to strip double quote from values
58        value = pair[1].strip(" \"")
59        attributes[ name ] = value
60    return attributes
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。