root/galaxy-central/tools/regVariation/delete_overlapping_indels.pl

リビジョン 2, 3.5 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/perl -w
2
3# This program detects overlapping indels in a chromosome and keeps all non-overlapping indels. As for overlapping indels,
4# the first encountered one is kept and all others are removed. It requires three inputs:
5# The first input is a TABULAR format file containing coordinates of indels in blocks extracted from multi-alignment.
6# The second input is an integer number representing the number of the column where indel start coordinates are stored in the input file.
7# The third input is an integer number representing the number of the column where indel end coordinates are stored in the input file.
8# The output is a TABULAR format file containing all non-overlapping indels in the input file, and the first encountered indel of overlapping ones.
9# Note: The number of the first column is 1.
10 
11use strict;
12use warnings;
13
14#varaibles to handle information related to indels
15my $indel1 = "";
16my $indel2 = "";
17my @indelArray1 = ();
18my @indelArray2 = ();
19my $lineCounter1 = 0;
20my $lineCounter2 = 0;
21my $totalNumberofNonOverlappingIndels = 0;
22
23# check to make sure having correct files
24my $usage = "usage: delete_overlapping_indels.pl [TABULAR.in] [indelStartColumn] [indelEndColumn] [TABULAR.out]\n";
25die $usage unless @ARGV == 4;
26
27my $inputFile = $ARGV[0];
28my $indelStartColumn = $ARGV[1] - 1;
29my $indelEndColumn = $ARGV[2] - 1;
30my $outputFile = $ARGV[3];
31
32#verifie column numbers
33if ($indelStartColumn < 0 ){
34        die ("The indel start column number is invalid \n");
35}
36if ($indelEndColumn < 0 ){
37        die ("The indel end column number is invalid \n");
38}
39
40#open the input and output files
41open (INPUT, "<", $inputFile) || die ("Could not open file $inputFile \n");
42open (OUTPUT, ">", $outputFile) || die ("Could not open file $outputFile \n");
43
44#store the input file in the array @rawData
45my @indelsRawData = <INPUT>;
46
47#iterated through the indels of the input file
48INDEL1:
49foreach $indel1 (@indelsRawData){
50        chomp ($indel1);
51        $lineCounter1++;
52       
53        #get the first indel
54        @indelArray1 = split(/\t/, $indel1);
55         
56        #our purpose is to detect overlapping indels and to store one copy of them only in the output file
57        #all other non-overlapping indels will stored in the output file also
58                         
59        $lineCounter2 = 0;
60                 
61        #iterated through the indels of the input file
62        INDEL2:
63        foreach $indel2 (@indelsRawData){
64                chomp ($indel2);
65                $lineCounter2++;
66                               
67                if ($lineCounter2 > $lineCounter1){
68                        #get the second indel
69                        @indelArray2 = split(/\t/, $indel2);
70                                               
71                        #check if the two indels are overlapping
72                        if (($indelArray2[$indelEndColumn] >= $indelArray1[$indelStartColumn] && $indelArray2[$indelEndColumn] <= $indelArray1[$indelEndColumn]) || ($indelArray2[$indelStartColumn] >= $indelArray1[$indelStartColumn] && $indelArray2[$indelStartColumn] <= $indelArray1[$indelEndColumn])){
73                                #print ("There is an overlap between" . "\n" . $indel1 . "\n" . $indel2 . "\n");
74                                #print("The two overlapping indels are located at the lines: " . $lineCounter1 . " " . $lineCounter2 . "\n\n");
75                               
76                                #break out of the loop and go back to the outerloop
77                                next INDEL1;
78                        }
79                        else{
80                                #print("The two non-overlaapping indels are located at the lines: " . $lineCounter1 . " " . $lineCounter2 . "\n");
81                        }
82                }
83        }
84                 
85        print OUTPUT $indel1 . "\n";
86        $totalNumberofNonOverlappingIndels++;
87}
88
89#print("The total number of indels is: " . $lineCounter1 . "\n");
90#print("The total number of non-overlapping indels is: " . $totalNumberofNonOverlappingIndels . "\n");
91
92#close the input and output files
93close(OUTPUT);
94close(INPUT);
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。