#!/usr/bin/perl

# Remove header, separate by strand, coordinate-sort, and convert to bigBed.

$infile = $ARGV[0];
$root = $ARGV[1];
$chromSizes = "/ddn/gs1/shared/dirib/reference_genomes/hg38/hg38.chromSizes";
$b2bb = "/ddn/gs1/home/grimmsa/tools/ucsc/bedToBigBed";

@chrlist = (); %chrSize = ();
open(CS, "$chromSizes"); while (<CS>) { chomp $_; ($c, $s) = split/\t/, $_; push @chrlist, $c; $chrSize{$c} = $s; } close(CS);

open(IN, "$infile");
open(OUTP, ">$root.pos.bed.tmp");
open(OUTN, ">$root.neg.bed.tmp");
while (<IN>) {
  next if ($_ =~ /^\#/);
  chomp $_;
  ($chr, $p0, $p2, $name, $score, $str) = split/\t/, $_;
  $p1 = $p0+1;
  $out = "$chr\t$p0\t$p2\t$chr:$p1-$p2:$str\t$score\t$str";
  if ($str eq "+") { print OUTP "$out\n"; } elsif ($str eq "-") { print OUTN "$out\n"; } else { print "ERROR: unexpected strand \'$str\'; exit\n"; exit; }
}
close(IN); close(OUT);
mysort("$root.pos.bed.tmp");
mysort("$root.neg.bed.tmp");
system "rm $root.pos.bed.tmp $root.neg.bed.tmp";
system "$b2bb $root.pos.bed $chromSizes $root.pos.bigBed";
system "$b2bb $root.neg.bed $chromSizes $root.neg.bigBed";

sub mysort {
  $infile = $_[0];
  $outfile = $infile; $outfile =~ s/\.tmp//;
  if (-e $outfile) { system "rm $outfile"; }
  foreach $chrom (@chrlist) {
    system "grep -a -w $chrom $infile | sort -k2,2n -k3,3n >> $outfile";
  }
}

