#!/usr/bin/env perl use strict; use warnings; #convert from a MAP and PED file to a genotype file #assumes not many SNPs but lots of individuals # transposed formats are used when lots of SNPs (TPED, TFAM) if (!@ARGV or scalar @ARGV ne 2) { print "usage: lped_to_geno.pl infile.map infile.ped > outfile\n"; exit; } my $map = shift @ARGV; my $ped = shift @ARGV; my @snp; #array to hold SNPs from map file open(FH, $map) or die "Couldn't open $map, $!\n"; while () { chomp; my @f = split(/\s+/); #3 or 4 columns #chrom ID [distance|morgans] position if (!exists $f[3]) { $f[3] = $f[2]; } #only 3 columns #have to leave in so know which to skip later #if ($f[3] < 0) { next; } #way of excluding SNPs #if ($f[0] eq '0') { next; } #unplaced SNP $f[0] = "chr$f[0]"; push(@snp, "$f[0]:$f[3]:$f[1]"); } close FH or die "Couldn't finish $map, $!\n"; #rows are individuals, columns are SNPs (7 & up) #need to print row per SNP my @allele; #alleles to go with @snp my @pheno; #marker for phenotype open(FH, $ped) or die "Couldn't open $ped, $!\n"; while () { chomp; my @f = split(/\s+/); if (!defined $f[5]) { die "ERROR undefined phenotype $f[0] $f[1] $f[2] $f[3] $f[4]\n"; } push(@pheno, $f[5]); my $j = 0; for(my $i = 6; $i< $#f; $i+=2) { if (!$allele[$j]) { $allele[$j] = ''; } #can be ACTG or 1234 (for haploview etc) or 0 for missing if ($f[$i] eq '1') { $f[$i] = 'A'; } elsif ($f[$i] eq '2') { $f[$i] = 'C'; } elsif ($f[$i] eq '3') { $f[$i] = 'G'; } elsif ($f[$i] eq '4') { $f[$i] = 'T'; } if ($f[$i+1] eq '1') { $f[$i+1] = 'A'; } elsif ($f[$i+1] eq '2') { $f[$i+1] = 'C'; } elsif ($f[$i+1] eq '3') { $f[$i+1] = 'G'; } elsif ($f[$i+1] eq '4') { $f[$i+1] = 'T'; } $f[$i] = uc($f[$i]); $f[$i+1] = uc($f[$i+1]); $allele[$j] .= " $f[$i]$f[$i+1]"; $j++; } } close FH or die "Couldn't close $ped, $!\n"; print "ID Chr Pos"; foreach (@pheno) { if ($_ > 0) { print " ", $_ - 1; }} #go from 1/2 to 0/1 print "\n"; for(my $i =0; $i <= $#snp; $i++) { #foreach snp $allele[$i] =~ /(\w)/; my $nt = $1; my $j = 0; my @t = split(/:/, $snp[$i]); if ($t[0] eq 'chr0' or $t[1] < 0) { next; } #skip this SNP if ($t[0] eq 'chrX') { $t[0] = 'chr23'; } elsif ($t[0] eq 'chrY') { $t[0] = 'chr24'; } elsif ($t[0] eq 'chrXY') { $t[0] = 'chr23'; } elsif ($t[0] eq 'chrMT') { $t[0] = 'chr25'; } print "$t[2] $t[0] $t[1]"; $allele[$i] =~ s/^\s+//; foreach my $p (split(/ +/, $allele[$i])) { if ($pheno[$j] > 0) { #pheno 0 or -9 skip #change AA BB AB to 2 0 1 if ($p eq "$nt$nt") { print " 2"; } elsif ($p =~ /$nt/) { print " 1"; } else { print " 0"; } } $j++; } print "\n"; } exit;