[3] | 1 | """ |
---|
| 2 | Read sequence lengths from a file. Each line is of the form <name> <length> |
---|
| 3 | where <name> is typically a chromsome name (e.g. chr12) and length is the |
---|
| 4 | number of bases the sequence. |
---|
| 5 | """ |
---|
| 6 | |
---|
| 7 | def read_lengths_file( name ): |
---|
| 8 | """ |
---|
| 9 | Returns a hash from sequence name to length. |
---|
| 10 | """ |
---|
| 11 | |
---|
| 12 | chrom_to_length = {} |
---|
| 13 | f = file ( name, "rt" ) |
---|
| 14 | for line in f: |
---|
| 15 | line = line.strip() |
---|
| 16 | if line == '' or line[0] == '#': continue |
---|
| 17 | try: |
---|
| 18 | fields = line.split() |
---|
| 19 | if len(fields) != 2: raise |
---|
| 20 | chrom = fields[0] |
---|
| 21 | length = int( fields[1] ) |
---|
| 22 | except: |
---|
| 23 | raise "bad length file line: %s" % line |
---|
| 24 | if chrom in chrom_to_length and length != chrom_to_length[chrom]: |
---|
| 25 | raise "%s has more than one length!" % chrom |
---|
| 26 | chrom_to_length[chrom] = length |
---|
| 27 | f.close() |
---|
| 28 | return chrom_to_length |
---|
| 29 | |
---|