root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/maf_mapping_word_frequency.py @ 3

リビジョン 3, 1.7 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4
5Reads a maf file from stdin and applies the mapping file specified by
6`mapping_file` to produce a sequence of integers. Then for each possible word
7of length `motif_len` in this integer alphabet print the number of times
8that word occurs in the block.
9
10usage: %prog motif_len mapping_file < maf_file > counts
11"""
12
13from __future__ import division
14
15import psyco_full
16
17import bx.align.maf
18from bx import seqmapping
19import string
20import sys
21from Numeric import *
22
23def main():
24
25    word_length = int( sys.argv[1] )
26    align_count, alpha_map = seqmapping.alignment_mapping_from_file( file( sys.argv[2] ) )
27
28    for maf in bx.align.maf.Reader( sys.stdin ):
29        assert len( maf.components ) == align_count
30        # Translate alignment to ints
31        ints = seqmapping.DNA.translate_list( [ c.text for c in maf.components ] )
32        # Apply mapping
33        ints = alpha_map.translate( ints )
34        # Count words
35        radix = alpha_map.get_out_size()
36        counts = zeros( radix ** word_length, Int )
37        total = 0
38        for i in range( word_length, len( ints ) ):
39            index = 0
40            factor = 1
41            skip = False
42            for j in range( word_length ):
43                assert 0 < i-j < len( ints )
44                letter = ints[i-j]
45                if letter < 0:
46                    skip = True
47                    break
48                index += letter * factor
49                factor *= radix
50            if skip:
51                continue       
52            else:
53                counts[ index ] += 1
54                total += 1
55        # Write ints separated by tabs
56        print '\t'.join( [ str( total ) ] + map( str, counts ) )
57
58if __name__ == "__main__": main()
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。