root/galaxy-central/tools/human_genome_variation/lped_to_geno.pl

リビジョン 2, 2.7 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

  • 属性 svn:executable の設定値 *
行番号 
1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5
6#convert from a MAP and PED file to a genotype file
7#assumes not many SNPs but lots of individuals
8# transposed formats are used when lots of SNPs (TPED, TFAM)
9
10if (!@ARGV or scalar @ARGV ne 2) {
11   print "usage: lped_to_geno.pl infile.map infile.ped > outfile\n";
12   exit;
13}
14
15my $map = shift @ARGV;
16my $ped = shift @ARGV;
17
18my @snp; #array to hold SNPs from map file
19open(FH, $map) or die "Couldn't open $map, $!\n";
20while (<FH>) {
21   chomp;
22   my @f = split(/\s+/); #3 or 4 columns
23   #chrom ID [distance|morgans] position
24   if (!exists $f[3]) { $f[3] = $f[2]; } #only 3 columns
25   #have to leave in so know which to skip later
26   #if ($f[3] < 0) { next; } #way of excluding SNPs
27   #if ($f[0] eq '0') { next; } #unplaced SNP
28   $f[0] = "chr$f[0]";
29   push(@snp, "$f[0]:$f[3]:$f[1]");
30}
31close FH or die "Couldn't finish $map, $!\n";
32
33#rows are individuals, columns are SNPs (7 & up)
34#need to print row per SNP
35my @allele; #alleles to go with @snp
36my @pheno;  #marker for phenotype
37open(FH, $ped) or die "Couldn't open $ped, $!\n";
38while (<FH>) {
39   chomp;
40   my @f = split(/\s+/);
41   if (!defined $f[5]) { die "ERROR undefined phenotype $f[0] $f[1] $f[2] $f[3] $f[4]\n"; }
42   push(@pheno, $f[5]);
43   my $j = 0;
44   for(my $i = 6; $i< $#f; $i+=2) {
45      if (!$allele[$j]) { $allele[$j] = ''; }
46      #can be ACTG or 1234 (for haploview etc) or 0 for missing
47      if ($f[$i] eq '1') { $f[$i] = 'A'; }
48      elsif ($f[$i] eq '2') { $f[$i] = 'C'; }
49      elsif ($f[$i] eq '3') { $f[$i] = 'G'; }
50      elsif ($f[$i] eq '4') { $f[$i] = 'T'; }
51      if ($f[$i+1] eq '1') { $f[$i+1] = 'A'; }
52      elsif ($f[$i+1] eq '2') { $f[$i+1] = 'C'; }
53      elsif ($f[$i+1] eq '3') { $f[$i+1] = 'G'; }
54      elsif ($f[$i+1] eq '4') { $f[$i+1] = 'T'; }
55      $f[$i] = uc($f[$i]);
56      $f[$i+1] = uc($f[$i+1]);
57      $allele[$j] .= " $f[$i]$f[$i+1]";
58      $j++;
59   }
60}
61close FH or die "Couldn't close $ped, $!\n";
62
63print "ID Chr Pos";
64foreach (@pheno) { if ($_ > 0) { print " ", $_ - 1; }} #go from 1/2 to 0/1
65print "\n";
66for(my $i =0; $i <= $#snp; $i++) { #foreach snp
67   $allele[$i] =~ /(\w)/;
68   my $nt = $1;
69   my $j = 0;
70   my @t = split(/:/, $snp[$i]);
71   if ($t[0] eq 'chr0' or $t[1] < 0) { next; } #skip this SNP
72   if ($t[0] eq 'chrX') { $t[0] = 'chr23'; }
73   elsif ($t[0] eq 'chrY') { $t[0] = 'chr24'; }
74   elsif ($t[0] eq 'chrXY') { $t[0] = 'chr23'; }
75   elsif ($t[0] eq 'chrMT') { $t[0] = 'chr25'; }
76   print "$t[2] $t[0] $t[1]";
77   $allele[$i] =~ s/^\s+//;
78   foreach my $p (split(/ +/, $allele[$i])) {
79      if ($pheno[$j] > 0) { #pheno 0 or -9 skip
80          #change AA BB AB to 2 0 1
81          if ($p eq "$nt$nt") { print " 2"; }
82          elsif ($p =~ /$nt/) { print " 1"; }
83          else { print " 0"; }
84      }
85      $j++;
86   }
87   print "\n";
88}
89
90exit;
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。