root/galaxy-central/eggs/bx_python-0.5.0_dev_f74aec067563-py2.6-macosx-10.6-universal-ucs2.egg/bx/intervals/operations/concat.py

リビジョン 3, 2.6 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1"""
2Concatenate sets of intervals.
3
4Preserves format of the first input -- it is possible to concat two files that
5have different column orders. Of course, the meta-data of the second will be
6lost (and filled with a "."). If all of the files (GenomicInteralReaders) are
7the same format, sameformat=True will preserve all columns of the first input,
8cuts extra columns on subsequent input, and pads missing columns. If
9sameformat=False then extra columns are filled with ".".
10"""
11
12import psyco_full
13
14import traceback
15import fileinput
16from warnings import warn
17
18from bx.intervals.io import *
19from bx.intervals.operations import *
20
21def concat(readers, comments=True, header=True, sameformat=True):
22    # Save columns from the first input
23    chrom_col = readers[0].chrom_col
24    start_col = readers[0].start_col
25    end_col = readers[0].end_col
26    strand_col = readers[0].strand_col
27    nfields = None
28    firstdataset = True
29    output = False
30    for intervals in readers:
31        for interval in intervals:
32            if type( interval ) is GenomicInterval:
33                if not nfields: nfields = interval.nfields
34                out_interval = interval.copy()
35                if sameformat or firstdataset:
36                    # everything except the first input has to be
37                    # trimmed or padded to match the first input
38                    if len(out_interval.fields) > nfields:
39                        out_interval.fields = out_interval.fields[0:nfields]
40                        while len(out_interval.fields) < nfields:
41                            out_interval.fields.append(".")
42                    output = True
43                    yield out_interval
44                else:
45                    chrom = out_interval.chrom
46                    start = out_interval.start
47                    end = out_interval.end
48                    strand = out_interval.strand
49                    out_interval.fields = ["." for col in range(nfields)] 
50                    out_interval.fields[chrom_col] = chrom
51                    out_interval.fields[start_col] = str(start)
52                    out_interval.fields[end_col] = str(end)
53                    # Strand is optional, might not exist in output
54                    if strand_col < len( out_interval.fields ):
55                        out_interval.fields[strand_col] = strand
56                    yield out_interval
57            elif type( interval ) is Header and header:
58                yield interval
59            elif type( interval ) is Comment and comments:
60                yield interval
61        if output and firstdataset: firstdataset = False
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。