1 | """ |
---|
2 | Reading and writing delimited data files (with headers and comments). |
---|
3 | """ |
---|
4 | |
---|
5 | import sys |
---|
6 | from itertools import * |
---|
7 | from UserDict import DictMixin |
---|
8 | |
---|
9 | class ParseError( Exception ): |
---|
10 | def __init__( self, *args, **kwargs ): |
---|
11 | Exception.__init__( self, *args ) |
---|
12 | self.linenum = kwargs.get("linenum",None) |
---|
13 | def __str__( self ): |
---|
14 | if self.linenum: |
---|
15 | return Exception.__str__( self ) + " on line " + str(self.linenum) |
---|
16 | else: |
---|
17 | return Exception.__str__( self ) |
---|
18 | |
---|
19 | class TableRow( object ): |
---|
20 | """ |
---|
21 | A row of a table |
---|
22 | """ |
---|
23 | def __init__( self, reader, fields ): |
---|
24 | self.reader = reader |
---|
25 | self.fields = fields |
---|
26 | def __getitem__( self, key ): |
---|
27 | if type( key ) == int: |
---|
28 | return self.fields[key] |
---|
29 | elif type( key ) == str: |
---|
30 | if self.reader.header: |
---|
31 | return self.fields[ self.reader.header.field_to_column[key] ] |
---|
32 | else: |
---|
33 | raise TypeError( "column names only supported for files with headers" ) |
---|
34 | else: |
---|
35 | raise TypeError( "field indices must be integers or strings" ) |
---|
36 | @property |
---|
37 | def fieldnames( self ): |
---|
38 | return self.reader.header.fields |
---|
39 | def __str__( self ): |
---|
40 | return "\t".join( self.fields ) |
---|
41 | |
---|
42 | class Header( object ): |
---|
43 | """ |
---|
44 | Header of a table -- contains column names and a mapping from them |
---|
45 | to column indexes |
---|
46 | """ |
---|
47 | def __init__( self, fields ): |
---|
48 | self.set_fields( fields ) |
---|
49 | def set_fields( self, fields ): |
---|
50 | self.fields = fields |
---|
51 | self.field_to_column = dict( zip( fields, count() ) ) |
---|
52 | def __getitem__( self, key ): |
---|
53 | if type( key ) == int: |
---|
54 | return self.fields[key] |
---|
55 | elif type( key ) == str: |
---|
56 | if key in self.field_to_column: |
---|
57 | return key |
---|
58 | else: |
---|
59 | raise TypeError( "field indices must be integers or strings" ) |
---|
60 | def __str__( self ): |
---|
61 | return "#" + "\t".join( self.fields ) |
---|
62 | |
---|
63 | class Comment( object ): |
---|
64 | def __init__( self, line ): |
---|
65 | self.line = line |
---|
66 | def __str__( self ): |
---|
67 | if self.line.startswith("#"): |
---|
68 | return self.line |
---|
69 | return "#" + self.line |
---|
70 | |
---|
71 | class TableReader( object ): |
---|
72 | """ |
---|
73 | Reader for iterating tabular data |
---|
74 | """ |
---|
75 | def __init__( self, input, return_header=True, return_comments=True, force_header=None, comment_lines_startswith = ["#"] ): |
---|
76 | self.input = input |
---|
77 | self.return_comments = return_comments |
---|
78 | self.return_header = return_header |
---|
79 | self.input_iter = iter( input ) |
---|
80 | self.linenum = 0 |
---|
81 | self.header = force_header |
---|
82 | self.comment_lines_startswith = comment_lines_startswith |
---|
83 | def __iter__( self ): |
---|
84 | return self |
---|
85 | def next( self ): |
---|
86 | line = self.input_iter.next() |
---|
87 | self.linenum += 1 |
---|
88 | line = line.rstrip( "\r\n" ) |
---|
89 | # Catch blank lines (throw a warning?) |
---|
90 | # This will end up adding a '#' at the beginning of blank lines |
---|
91 | if line == '': |
---|
92 | if self.return_comments: |
---|
93 | return Comment( line ) |
---|
94 | else: |
---|
95 | return self.next() |
---|
96 | # Is it a comment line? |
---|
97 | for comment_line_start in self.comment_lines_startswith: |
---|
98 | if line.startswith( comment_line_start ): |
---|
99 | # If a comment and the first line we assume it is a header |
---|
100 | if self.header is None and self.linenum == 1: |
---|
101 | self.header = self.parse_header( line ) |
---|
102 | if self.return_header: |
---|
103 | return self.header |
---|
104 | else: |
---|
105 | return self.next() |
---|
106 | else: |
---|
107 | if self.return_comments: |
---|
108 | return self.parse_comment( line ) |
---|
109 | else: |
---|
110 | return self.next() |
---|
111 | # Not a comment, must be an interval |
---|
112 | try: |
---|
113 | return self.parse_row( line ) |
---|
114 | except ParseError, e: |
---|
115 | e.linenum = self.linenum |
---|
116 | raise e |
---|
117 | def parse_header( self, line ): |
---|
118 | if line.startswith("#"): |
---|
119 | fields = line[1:].split( "\t" ) |
---|
120 | else: |
---|
121 | fields = line.split( "\t" ) |
---|
122 | return Header( fields ) |
---|
123 | def parse_comment( self, line ): |
---|
124 | return Comment( line ) |
---|
125 | def parse_row( self, line ): |
---|
126 | return TableRow( self, line.split( "\t" ) ) |
---|
127 | |
---|
128 | |
---|
129 | |
---|
130 | |
---|
131 | |
---|
132 | |
---|
133 | |
---|
134 | |
---|
135 | |
---|
136 | |
---|
137 | |
---|