root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/EGG-INFO/scripts/maf_col_counts_all.py

リビジョン 3, 1.7 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/python2.6
2
3"""
4For every column that occurs in a multiple alignment print the column
5and the number of times it occurs (one column/count per line, tab
6separated), sorted by count descending.
7
8This version allows special handling of the 'wildcard' symbol in alignments.
9
10Note: all blocks must have exactly the same number of species.
11
12usage: %prog [options] < maf > column_counts
13    -w, --wildcard: include wildcards
14    -m, --maxwildcards=N: only allow N missing species
15"""
16
17import bx.align.maf
18import sys
19from bx.cookbook import doc_optparse, cross_lists
20
21from itertools import *
22
23counts = {}
24
25nspecies = None
26
27for block in bx.align.maf.Reader( sys.stdin ):
28    # Ensure all blocks have the same number of rows
29    if nspecies: assert len( block.components ) == nspecies
30    else: nspecies = len( block.components )
31    # Increment count for each column
32    for col in izip( * [ iter( comp.text.upper() ) for comp in block.components ] ):
33        col = ''.join( col )
34        try: counts[ col ] += 1
35        except: counts[ col ] = 1
36
37# counts = [ ( value, key ) for key, value in counts.iteritems() ]
38# counts.sort()
39# counts.reverse()
40
41## for count, col in counts:
42##     print "".join(col), count
43
44options, args = doc_optparse.parse( __doc__ )
45
46wildcard = False
47if options.wildcard:
48    wildcard = True
49    max_wildcard = nspecies - 1
50if options.maxwildcards:
51    wildcard = True
52    max_wildcard = int( options.maxwildcards )
53
54nucs = "ACGT-"
55if wildcard:
56    nucs += "*"
57
58for col in cross_lists( *( [ nucs ] * nspecies ) ):
59    col = ''.join( col )
60    if wildcard and col.count( "*" ) > max_wildcard:
61        continue
62    if col.count( "-" ) == nspecies:
63        continue
64    print col, counts.get( col, 0 )
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。