1 | """ |
---|
2 | Read sequence lengths from a file. Each line is of the form <name> <length> |
---|
3 | where <name> is typically a chromsome name (e.g. chr12) and length is the |
---|
4 | number of bases the sequence. |
---|
5 | """ |
---|
6 | |
---|
7 | def read_lengths_file( name ): |
---|
8 | """ |
---|
9 | Returns a hash from sequence name to length. |
---|
10 | """ |
---|
11 | |
---|
12 | chrom_to_length = {} |
---|
13 | f = file ( name, "rt" ) |
---|
14 | for line in f: |
---|
15 | line = line.strip() |
---|
16 | if line == '' or line[0] == '#': continue |
---|
17 | try: |
---|
18 | fields = line.split() |
---|
19 | if len(fields) != 2: raise |
---|
20 | chrom = fields[0] |
---|
21 | length = int( fields[1] ) |
---|
22 | except: |
---|
23 | raise "bad length file line: %s" % line |
---|
24 | if chrom in chrom_to_length and length != chrom_to_length[chrom]: |
---|
25 | raise "%s has more than one length!" % chrom |
---|
26 | chrom_to_length[chrom] = length |
---|
27 | f.close() |
---|
28 | return chrom_to_length |
---|
29 | |
---|