root/galaxy-central/tools/unix_tools/find_and_replace.pl

リビジョン 3, 5.2 KB (コミッタ: kohda, 14 年 前)

Install Unix tools  http://hannonlab.cshl.edu/galaxy_unix_tools/galaxy.html

行番号 
1#!/usr/bin/perl
2use strict;
3use warnings;
4use Getopt::Std;
5
6sub parse_command_line();
7sub build_regex_string();
8sub usage();
9
10my $input_file ;
11my $output_file;
12my $find_pattern ;
13my $replace_pattern ;
14my $find_complete_words ;
15my $find_pattern_is_regex ;
16my $find_in_specific_column ;
17my $find_case_insensitive ;
18my $replace_global ;
19my $skip_first_line ;
20
21
22##
23## Program Start
24##
25usage() if @ARGV<2;
26parse_command_line();
27my $regex_string = build_regex_string() ;
28
29# Allow first line to pass without filtering?
30if ( $skip_first_line ) {
31        my $line = <$input_file>;
32        print $output_file $line ;
33}
34
35
36##
37## Main loop
38##
39
40## I LOVE PERL (and hate it, at the same time...)
41##
42## So what's going on with the self-compiling perl code?
43##
44## 1. The program gets the find-pattern and the replace-pattern from the user (as strings).
45## 2. If both the find-pattern and replace-pattern are simple strings (not regex),
46##    it would be possible to pre-compile a regex (with qr//) and use it in a 's///'
47## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references)
48##    it is still possible to pre-compile the regex and use it in a 's///'
49## However,
50## 4. If the replace-pattern contains back-references, pre-compiling is not possible.
51##    (in perl, you can't precompile a substitute regex).
52##    See these examples:
53##    http://www.perlmonks.org/?node_id=84420
54##    http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl
55##
56##    The solution:
57##    we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ),
58##    Then eval() a new perl code that contains the substitution regex as inlined code.
59##    Gotta love perl!
60
61my $perl_program ;
62if ( $find_in_specific_column ) {
63        # Find & replace in specific column
64
65        $perl_program = <<EOF;
66        while ( <STDIN> ) {
67                chomp ;
68                my \@columns = split ;
69
70                #not enough columns in this line - skip it
71                next if ( \@columns < $find_in_specific_column ) ;
72
73                \$columns [ $find_in_specific_column - 1 ] =~ $regex_string ;
74
75                print STDOUT join("\t", \@columns), "\n" ;
76        }
77EOF
78
79} else {
80        # Find & replace the entire line
81        $perl_program = <<EOF;
82                while ( <STDIN> ) {
83                        $regex_string ;
84                        print STDOUT;
85                }
86EOF
87}
88
89
90# The dynamic perl code reads from STDIN and writes to STDOUT,
91# so connect these handles (if the user didn't specifiy input / output
92# file names, these might be already be STDIN/OUT, so the whole could be a no-op).
93*STDIN = $input_file ;
94*STDOUT = $output_file ;
95eval $perl_program ;
96
97
98##
99## Program end
100##
101
102
103sub parse_command_line()
104{
105        my %opts ;
106        getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n";
107
108        die "$0: missing Find-Pattern argument\n" if (@ARGV==0);
109        $find_pattern = $ARGV[0];
110        die "$0: missing Replace-Pattern argument\n" if (@ARGV==1);
111        $replace_pattern = $ARGV[1];
112
113        $find_complete_words = ( exists $opts{w} ) ;
114        $find_case_insensitive = ( exists $opts{i} ) ;
115        $skip_first_line = ( exists $opts{s} ) ;
116        $find_pattern_is_regex = ( exists $opts{r} ) ;
117        $replace_global = ( exists $opts{g} ) ;
118
119        # Search in specific column ?
120        if ( defined $opts{c} ) {
121                $find_in_specific_column = $opts{c};
122
123                die "$0: invalid column number ($find_in_specific_column).\n"
124                        unless $find_in_specific_column =~ /^\d+$/ ;
125                       
126                die "$0: invalid column number ($find_in_specific_column).\n"
127                        if $find_in_specific_column <= 0;
128        }
129        else {
130                $find_in_specific_column = 0 ;
131        }
132
133        # Output File specified (instead of STDOUT) ?
134        if ( defined $opts{o} ) {
135                my $filename = $opts{o};
136                open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ;
137        } else {
138                $output_file = *STDOUT ;
139        }
140
141
142        # Input file Specified (instead of STDIN) ?
143        if ( @ARGV>2 ) {
144                my $filename = $ARGV[2];
145                open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ;
146        } else {
147                $input_file = *STDIN;
148        }
149}
150
151sub build_regex_string()
152{
153        my $find_string ;
154        my $replace_string ;
155
156        if ( $find_pattern_is_regex ) {
157                $find_string = $find_pattern ;
158                $replace_string = $replace_pattern ;
159        } else {
160                $find_string = quotemeta $find_pattern ;
161                $replace_string = quotemeta $replace_pattern;
162        }
163
164        if ( $find_complete_words ) {
165                $find_string = "\\b($find_string)\\b";
166        }
167
168        my $regex_string = "s/$find_string/$replace_string/";
169
170        $regex_string .= "i" if ( $find_case_insensitive );
171        $regex_string .= "g" if ( $replace_global ) ;
172       
173
174        return $regex_string;
175}
176
177sub usage()
178{
179print <<EOF;
180
181Find and Replace
182Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu )
183
184Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE]
185
186   -g   - Global replace - replace all occurences in line/column.
187          Default - replace just the first instance.
188   -w   - search for complete words (not partial sub-strings).
189   -i   - case insensitive search.
190   -c N - check only column N, instead of entire line (line split by whitespace).
191   -l   - skip first line (don't replace anything in it)
192   -r   - FIND-PATTERN and REPLACE-PATTERN are perl regular expression,
193          usable inside a 's///' statement.
194          By default, they are used as verbatim text strings.
195   -o OUT - specify output file (default = STDOUT).
196   INPUT-FILE - (optional) read from file (default = from STDIN).
197
198
199EOF
200
201        exit;
202}
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。