| 1 | """ |
|---|
| 2 | Read sequence lengths from a file. Each line is of the form <name> <length> |
|---|
| 3 | where <name> is typically a chromsome name (e.g. chr12) and length is the |
|---|
| 4 | number of bases the sequence. |
|---|
| 5 | """ |
|---|
| 6 | |
|---|
| 7 | def read_lengths_file( name ): |
|---|
| 8 | """ |
|---|
| 9 | Returns a hash from sequence name to length. |
|---|
| 10 | """ |
|---|
| 11 | |
|---|
| 12 | chrom_to_length = {} |
|---|
| 13 | f = file ( name, "rt" ) |
|---|
| 14 | for line in f: |
|---|
| 15 | line = line.strip() |
|---|
| 16 | if line == '' or line[0] == '#': continue |
|---|
| 17 | try: |
|---|
| 18 | fields = line.split() |
|---|
| 19 | if len(fields) != 2: raise |
|---|
| 20 | chrom = fields[0] |
|---|
| 21 | length = int( fields[1] ) |
|---|
| 22 | except: |
|---|
| 23 | raise "bad length file line: %s" % line |
|---|
| 24 | if chrom in chrom_to_length and length != chrom_to_length[chrom]: |
|---|
| 25 | raise "%s has more than one length!" % chrom |
|---|
| 26 | chrom_to_length[chrom] = length |
|---|
| 27 | f.close() |
|---|
| 28 | return chrom_to_length |
|---|
| 29 | |
|---|