root/galaxy-central/tools/metag_tools/blat_mapping.py

リビジョン 2, 2.6 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/env python
2
3import os, sys
4
5assert sys.version_info[:2] >= ( 2, 4 )
6
7def reverse_complement(s):
8    complement_dna = {"A":"T", "T":"A", "C":"G", "G":"C", "a":"t", "t":"a", "c":"g", "g":"c", "N":"N", "n":"n" , ".":"."}
9    reversed_s = []
10    for i in s:
11        reversed_s.append(complement_dna[i])
12    reversed_s.reverse()
13    return "".join(reversed_s)
14   
15def __main__():
16    nuc_index = {'a':0,'t':1,'c':2,'g':3,'n':4}
17    coverage = {}        # key = (chrom, index)
18    invalid_lines = 0
19    invalid_chrom = 0
20    infile = sys.argv[1]
21    outfile = sys.argv[2]
22
23    for i, line in enumerate( open( infile ) ):
24        line = line.rstrip('\r\n')
25        if not line or line.startswith('#'):
26            continue
27        fields = line.split()
28        if len(fields) < 21:                # standard number of pslx columns
29            invalid_lines += 1
30            continue
31        if not fields[0].isdigit():
32            invalid_lines += 1
33            continue
34        chrom = fields[13]
35        if not chrom.startswith( 'chr' ):
36            invalid_lines += 1
37            invalid_chrom += 1
38            continue
39        try:
40            block_count = int(fields[17])
41        except:
42            invalid_lines += 1
43            continue
44        block_size = fields[18].split(',')
45        chrom_start = fields[20].split(',')
46
47        for j in range( block_count ):
48            try:
49                this_block_size = int(block_size[j])
50                this_chrom_start = int(chrom_start[j])
51            except:
52                invalid_lines += 1
53                break
54            # brut force coverage               
55            for k in range( this_block_size ):
56                cur_index = this_chrom_start + k
57                if coverage.has_key( ( chrom, cur_index ) ):
58                    coverage[(chrom, cur_index)] += 1
59                else:
60                    coverage[(chrom, cur_index)] = 1
61               
62    # generate a index file
63    outputfh = open(outfile, 'w')
64    keys = coverage.keys()
65    keys.sort()
66    previous_chrom = ''
67    for i in keys:
68        (chrom, location) = i
69        sum = coverage[(i)]
70        if chrom != previous_chrom:
71            outputfh.write( 'variableStep chrom=%s\n' % ( chrom ) )
72            previous_chrom = chrom
73        outputfh.write( "%s\t%s\n" % ( location, sum ) )
74    outputfh.close()
75   
76    if invalid_lines:
77        invalid_msg = "Skipped %d invalid lines" % invalid_lines
78        if invalid_chrom:
79            invalid_msg += ", including %d lines with chrom id errors which must begin with 'chr' to map correctly to the UCSC Genome Browser. "
80       
81if __name__ == '__main__': __main__()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。