#!/usr/bin/perl

# output:  chr, pos, mm9 allele, B6 allele, C3 allele, whether F1 is het

$fileB6 = "/ddn/gs1/project/mousemeth/DNAseq/analysis/jianying/SNV_parental_strains/BL6_parental_SNP.txt";
$fileC3 = "/ddn/gs1/project/mousemeth/DNAseq/analysis/jianying/SNV_parental_strains/C3H_parental_SNP.txt";

%alleleMM9 = ();
%alleleBL6 = ();
%alleleC3H = ();

open(B6, "$fileB6");
while (<B6>) {
  chomp $_; ($coordinate, $a1, $a2, $info, $mouse) = split/\t/, $_;
  next if ($coordinate eq "SNP_ID");
  ($txt, $chrN, $pos) = split/\_/, $coordinate;
  $chr = "chr$chrN";
  $alleleMM9{$chr}{$pos} = $a1;
  $alleleBL6{$chr}{$pos} = $a2;
}
close(B6);

open(C3, "$fileC3");
while (<C3>) {
  chomp $_; ($coordinate, $a1, $a2, $info, $mouse) = split/\t/, $_;
  next if ($coordinate eq "SNP_ID");
  ($txt, $chrN,	$pos) = split/\_/, $coordinate;
  $chr = "chr$chrN";
  $alleleMM9{$chr}{$pos} = $a1;
  $alleleC3H{$chr}{$pos} = $a2;
}
close(C3);

open(OUT, ">NISC_SNVs.mm9_canonAuto.txt.pre");
print OUT "#chr\tpos\tmm9\tB6\tC3\tF1het\n";
for ($i=1; $i<20; $i++) {
  $chr = "chr$i"; $prev = "";
  @poslist = sort {$a <=> $b} (keys %{$alleleMM9{$chr}});
  foreach $p (@poslist) {
    next if ($p == $prev);
    $z1 = $alleleBL6{$chr}{$p}; if ($z1 eq "") { $z1 = $alleleMM9{$chr}{$p}; }
    $z2	= $alleleC3H{$chr}{$p}; if ($z2	eq "") { $z2 = $alleleMM9{$chr}{$p}; }
    if ($z1 eq $z2) { $het = "homz"; } else { $het = "hetz"; }
    print OUT "$chr\t$p\t$alleleMM9{$chr}{$p}\t$z1\t$z2\t$het\n";
    $prev = $p;
  }
}
close(OUT);
