[3] | 1 | """ |
---|
| 2 | Reading and writing delimited data files (with headers and comments). |
---|
| 3 | """ |
---|
| 4 | |
---|
| 5 | import sys |
---|
| 6 | from itertools import * |
---|
| 7 | from UserDict import DictMixin |
---|
| 8 | |
---|
| 9 | class ParseError( Exception ): |
---|
| 10 | def __init__( self, *args, **kwargs ): |
---|
| 11 | Exception.__init__( self, *args ) |
---|
| 12 | self.linenum = kwargs.get("linenum",None) |
---|
| 13 | def __str__( self ): |
---|
| 14 | if self.linenum: |
---|
| 15 | return Exception.__str__( self ) + " on line " + str(self.linenum) |
---|
| 16 | else: |
---|
| 17 | return Exception.__str__( self ) |
---|
| 18 | |
---|
| 19 | class TableRow( object ): |
---|
| 20 | """ |
---|
| 21 | A row of a table |
---|
| 22 | """ |
---|
| 23 | def __init__( self, reader, fields ): |
---|
| 24 | self.reader = reader |
---|
| 25 | self.fields = fields |
---|
| 26 | def __getitem__( self, key ): |
---|
| 27 | if type( key ) == int: |
---|
| 28 | return self.fields[key] |
---|
| 29 | elif type( key ) == str: |
---|
| 30 | if self.reader.header: |
---|
| 31 | return self.fields[ self.reader.header.field_to_column[key] ] |
---|
| 32 | else: |
---|
| 33 | raise TypeError( "column names only supported for files with headers" ) |
---|
| 34 | else: |
---|
| 35 | raise TypeError( "field indices must be integers or strings" ) |
---|
| 36 | @property |
---|
| 37 | def fieldnames( self ): |
---|
| 38 | return self.reader.header.fields |
---|
| 39 | def __str__( self ): |
---|
| 40 | return "\t".join( self.fields ) |
---|
| 41 | |
---|
| 42 | class Header( object ): |
---|
| 43 | """ |
---|
| 44 | Header of a table -- contains column names and a mapping from them |
---|
| 45 | to column indexes |
---|
| 46 | """ |
---|
| 47 | def __init__( self, fields ): |
---|
| 48 | self.set_fields( fields ) |
---|
| 49 | def set_fields( self, fields ): |
---|
| 50 | self.fields = fields |
---|
| 51 | self.field_to_column = dict( zip( fields, count() ) ) |
---|
| 52 | def __getitem__( self, key ): |
---|
| 53 | if type( key ) == int: |
---|
| 54 | return self.fields[key] |
---|
| 55 | elif type( key ) == str: |
---|
| 56 | if key in self.field_to_column: |
---|
| 57 | return key |
---|
| 58 | else: |
---|
| 59 | raise TypeError( "field indices must be integers or strings" ) |
---|
| 60 | def __str__( self ): |
---|
| 61 | return "#" + "\t".join( self.fields ) |
---|
| 62 | |
---|
| 63 | class Comment( object ): |
---|
| 64 | def __init__( self, line ): |
---|
| 65 | self.line = line |
---|
| 66 | def __str__( self ): |
---|
| 67 | if self.line.startswith("#"): |
---|
| 68 | return self.line |
---|
| 69 | return "#" + self.line |
---|
| 70 | |
---|
| 71 | class TableReader( object ): |
---|
| 72 | """ |
---|
| 73 | Reader for iterating tabular data |
---|
| 74 | """ |
---|
| 75 | def __init__( self, input, return_header=True, return_comments=True, force_header=None, comment_lines_startswith = ["#"] ): |
---|
| 76 | self.input = input |
---|
| 77 | self.return_comments = return_comments |
---|
| 78 | self.return_header = return_header |
---|
| 79 | self.input_iter = iter( input ) |
---|
| 80 | self.linenum = 0 |
---|
| 81 | self.header = force_header |
---|
| 82 | self.comment_lines_startswith = comment_lines_startswith |
---|
| 83 | def __iter__( self ): |
---|
| 84 | return self |
---|
| 85 | def next( self ): |
---|
| 86 | line = self.input_iter.next() |
---|
| 87 | self.linenum += 1 |
---|
| 88 | line = line.rstrip( "\r\n" ) |
---|
| 89 | # Catch blank lines (throw a warning?) |
---|
| 90 | # This will end up adding a '#' at the beginning of blank lines |
---|
| 91 | if line == '': |
---|
| 92 | if self.return_comments: |
---|
| 93 | return Comment( line ) |
---|
| 94 | else: |
---|
| 95 | return self.next() |
---|
| 96 | # Is it a comment line? |
---|
| 97 | for comment_line_start in self.comment_lines_startswith: |
---|
| 98 | if line.startswith( comment_line_start ): |
---|
| 99 | # If a comment and the first line we assume it is a header |
---|
| 100 | if self.header is None and self.linenum == 1: |
---|
| 101 | self.header = self.parse_header( line ) |
---|
| 102 | if self.return_header: |
---|
| 103 | return self.header |
---|
| 104 | else: |
---|
| 105 | return self.next() |
---|
| 106 | else: |
---|
| 107 | if self.return_comments: |
---|
| 108 | return self.parse_comment( line ) |
---|
| 109 | else: |
---|
| 110 | return self.next() |
---|
| 111 | # Not a comment, must be an interval |
---|
| 112 | try: |
---|
| 113 | return self.parse_row( line ) |
---|
| 114 | except ParseError, e: |
---|
| 115 | e.linenum = self.linenum |
---|
| 116 | raise e |
---|
| 117 | def parse_header( self, line ): |
---|
| 118 | if line.startswith("#"): |
---|
| 119 | fields = line[1:].split( "\t" ) |
---|
| 120 | else: |
---|
| 121 | fields = line.split( "\t" ) |
---|
| 122 | return Header( fields ) |
---|
| 123 | def parse_comment( self, line ): |
---|
| 124 | return Comment( line ) |
---|
| 125 | def parse_row( self, line ): |
---|
| 126 | return TableRow( self, line.split( "\t" ) ) |
---|
| 127 | |
---|
| 128 | |
---|
| 129 | |
---|
| 130 | |
---|
| 131 | |
---|
| 132 | |
---|
| 133 | |
---|
| 134 | |
---|
| 135 | |
---|
| 136 | |
---|
| 137 | |
---|