root/galaxy-central/tools/regVariation/categorize_elements_satisfying_criteria.pl

リビジョン 2, 5.7 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

行番号 
1#!/usr/bin/perl -w
2
3# The program takes as input a set of categories, such that each category contains many elements.
4# It also takes a table relating elements with criteria, such that each element is assigned a number
5# representing the number of times the element satisfies a certain criterion.
6# The first input is a TABULAR format file, such that the left column represents the name of categories and,
7# all other columns represent the names of elements.
8# The second input is a TABULAR format file relating elements with criteria, such that the first line
9# represents the names of criteria and the left column represents the names of elements.
10# The output is a TABULAR format file relating catergories with criteria, such that each categoy is
11# assigned a number representing the total number of times its elements satisfies a certain criterion.
12# Each category is assigned as many numbers as criteria.
13
14use strict;
15use warnings;
16
17#variables to handle information of the categories input file
18my @categoryElementsArray = ();
19my @categoriesArray = ();
20my $categoryMemberNames;
21my $categoryName;
22my %categoryMembersHash = ();
23my $memberNumber = 0;
24my $totalMembersNumber = 0;
25my $totalCategoriesNumber = 0;
26my @categoryCountersTwoDimArray = ();
27my $lineCounter1 = 0;
28
29#variables to handle information of the criteria and elements data input file
30my $elementLine;
31my @elementDataArray = ();
32my $elementName;
33my @criteriaArray = ();
34my $criteriaNumber = 0;
35my $totalCriteriaNumber = 0;
36my $lineCounter2 = 0;
37
38#variable representing the row and column indices used to store results into a two-dimensional array
39my $row = 0;
40my $column = 0;
41
42# check to make sure having correct files
43my $usage = "usage: categorize_motifs_significance.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n";
44die $usage unless @ARGV == 3;
45
46#get the categories input file
47my $categories_inputFile = $ARGV[0];
48
49#get the criteria and data input file
50my $elements_data_inputFile = $ARGV[1];
51
52#get the output file
53my $categorized_data_outputFile = $ARGV[2];
54
55#open the input and output files
56open (INPUT1, "<", $categories_inputFile) || die("Could not open file $categories_inputFile \n");
57open (INPUT2, "<", $elements_data_inputFile ) || die("Could not open file $elements_data_inputFile  \n");
58open (OUTPUT, ">", $categorized_data_outputFile) || die("Could not open file $categorized_data_outputFile \n");
59
60#store the first input file into an array
61my @categoriesData = <INPUT1>;
62
63#reset the value of $lineCounter1 to 0
64$lineCounter1 = 0;
65
66#iterate through the first input file to get the names of categories and their corresponding elements   
67foreach $categoryMemberNames (@categoriesData){
68        chomp ($categoryMemberNames);
69               
70        @categoryElementsArray = split(/\t/, $categoryMemberNames);
71       
72        #store the name of the current category into an array
73        $categoriesArray [$lineCounter1] = $categoryElementsArray[0];
74       
75        #store the name of the current category into a two-dimensional array
76        $categoryCountersTwoDimArray [$lineCounter1] [0] = $categoryElementsArray[0];
77               
78        #get the total number of elements in the current category
79        $totalMembersNumber = @categoryElementsArray;
80       
81        #store the names of categories and their corresponding elements into a hash
82        for ($memberNumber = 1; $memberNumber < $totalMembersNumber; $memberNumber++) {
83                       
84                $categoryMembersHash{$categoryElementsArray[$memberNumber]} = $categoriesArray[$lineCounter1];
85        }
86       
87        $lineCounter1++;
88}
89
90#store the second input file into an array
91my @elementsData = <INPUT2>;
92
93#reset the value of $lineCounter2 to 0
94$lineCounter2 = 0;
95
96#iterate through the second input file in order to count the number of elements
97#in each category that satisfy each criterion   
98foreach $elementLine (@elementsData){
99        chomp ($elementLine);
100               
101        $lineCounter2++;
102       
103        @elementDataArray = split(/\t/, $elementLine);
104       
105        #if at the first line, get the total number of criteria and the total 
106        #number of catergories and initialize the two-dimensional array
107        if ($lineCounter2 == 1){
108                @criteriaArray = @elementDataArray;     
109                $totalCriteriaNumber = @elementDataArray;
110               
111                $totalCategoriesNumber = @categoriesArray;
112               
113                #initialize the two-dimensional array
114                for ($row = 0; $row < $totalCategoriesNumber; $row++) {
115       
116                        for ($column = 1; $column <= $totalCriteriaNumber; $column++) {
117                               
118                                $categoryCountersTwoDimArray [$row][$column] = 0;
119                        }
120                }
121        }
122        else{
123                #get the element data
124                $elementName = $elementDataArray[0];
125               
126                #do the counting and store the result in the two-dimensional array
127                for ($criteriaNumber = 0; $criteriaNumber < $totalCriteriaNumber; $criteriaNumber++) {
128                       
129                        if ($elementDataArray[$criteriaNumber + 1] > 0){
130                               
131                                $categoryName = $categoryMembersHash{$elementName};
132                               
133                                my ($categoryIndex) = grep $categoriesArray[$_] eq $categoryName, 0 .. $#categoriesArray;
134                               
135                                $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] = $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] + $elementDataArray[$criteriaNumber + 1];
136                        }
137                }
138        }
139}
140
141print OUTPUT "\t";
142
143#store the criteria names into the output file 
144for ($column = 1; $column <= $totalCriteriaNumber; $column++) {
145               
146        if ($column < $totalCriteriaNumber){
147                print OUTPUT $criteriaArray[$column - 1] . "\t";
148        }
149        else{
150                print OUTPUT $criteriaArray[$column - 1] . "\n";
151        }
152}
153       
154#store the category names and their corresponding number of elements satisfying criteria into the output file
155for ($row = 0; $row < $totalCategoriesNumber; $row++) {
156       
157        for ($column = 0; $column <= $totalCriteriaNumber; $column++) {
158               
159                if ($column < $totalCriteriaNumber){
160                        print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\t";
161                }
162                else{
163                        print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\n";
164                }
165        }
166}
167
168#close the input and output file
169close(OUTPUT);
170close(INPUT2);
171close(INPUT1);
172
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。