root/galaxy-central/tools/next_gen_conversion/bwa_solid2fastq_modified.pl

リビジョン 2, 2.6 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

  • 属性 svn:executable の設定値 *
行番号 
1#!/usr/bin/perl -w
2
3# Author: lh3
4# Note: Ideally, this script should be written in C. It is a bit slow at present.
5
6use strict;
7use warnings;
8use Getopt::Std;
9
10my %opts;
11my $version = '0.1.3';
12my $usage = qq{
13Usage: solid2fastq.pl <paired> <outfile1> <outfile2> <F3.csfasta> <F3.qual> <R3.csfasta> <R3.qual>
14
15Note: <in.title> is the string showed in the `# Title:' line of a
16      ".csfasta" read file. Then <in.title>F3.csfasta is read sequence
17      file and <in.title>F3_QV.qual is the quality file. If
18      <in.title>R3.csfasta is present, this script assumes reads are
19      paired; otherwise reads will be regarded as single-end.
20
21      The read name will be <out.prefix>:panel_x_y/[12] with `1' for R3
22      tag and `2' for F3. Usually you may want to use short <out.prefix>
23      to save diskspace. Long <out.prefix> also causes troubles to maq.
24
25};
26
27getopts('', \%opts);
28die($usage) if (@ARGV != 7);
29my ($is_paired,$outfile1,$outfile2,$f3reads,$f3qual,$r3reads,$r3qual) = @ARGV;
30my (@fhr, @fhw);
31my $fn = '';
32my @fn_suff = ($f3reads,$f3qual,$r3reads,$r3qual);
33if ($is_paired eq "yes") { # paired end
34  for (0 .. 3) {
35        $fn = $fn_suff[$_];
36        $fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
37        open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
38  }
39  open($fhw[0], "|gzip >$outfile2") || die;
40  open($fhw[1], "|gzip >$outfile1") || die;
41  my (@df, @dr);
42  @df = &read1(1); @dr = &read1(2);
43  while (@df && @dr) {
44        if ($df[0] eq $dr[0]) { # mate pair
45          print {$fhw[0]} $df[1]; print {$fhw[1]} $dr[1];
46          @df = &read1(1); @dr = &read1(2);
47        }
48  }
49  close($fhr[$_]) for (0 .. $#fhr);
50  close($fhw[$_]) for (0 .. $#fhw);
51} else { # single end
52  for (0 .. 1) {
53        my $fn = "$fn_suff[$_]";
54        $fn = "gzip -dc $fn.gz |" if (!-f $fn && -f "$fn.gz");
55        open($fhr[$_], $fn) || die("** Fail to open '$fn'.\n");
56  }
57  open($fhw[2], "|gzip >$outfile1") || die;
58  my @df;
59  while (@df = &read1(1, $fhr[0], $fhr[1])) {
60        print {$fhw[2]} $df[1];
61  }
62  close($fhr[$_]) for (0 .. $#fhr);
63  close($fhw[2]);
64}
65
66sub read1 {
67  my $i = shift(@_);
68  my $j = ($i-1)<<1;
69  my ($key, $seq);
70  my ($fhs, $fhq) = ($fhr[$j], $fhr[$j|1]);
71  while (<$fhs>) {
72        my $t = <$fhq>;
73        if (/^>(\d+)_(\d+)_(\d+)_[FR]3/) {
74          $key = sprintf("%.4d_%.4d_%.4d", $1, $2, $3); # this line could be improved on 64-bit machines
75          #print $key;
76          die(qq/** unmatched read name: '$_' != '$t'\n/) unless ($_ eq $t);
77          my $name = "$1_$2_$3/$i";
78          $_ = substr(<$fhs>, 2);
79          tr/0123./ACGTN/;
80          my $s = $_;
81          $_ = <$fhq>;
82          s/^(\d+)\s*//;
83          s/(\d+)\s*/chr($1+33)/eg;
84          $seq = qq/\@$name\n$s+\n$_\n/;
85          last;
86        }
87  }
88  return defined($seq)? ($key, $seq) : ();
89}
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。