#!/usr/bin/perl

# Use category=1 for ">=3 tissues" and category=2 for "<3 tissues".

$dir = "/ddn/gs1/home/grimmsa/ruifang/integrate-DMR_DEG_HiC/CTCF_boundaries/ENCODE_CTCF";

open(IN, "$dir/CTCF.mm10.merged.bed");
open(TMP, ">CTCF.merged.tmp");
while (<IN>) {
  chomp $_; @ar = split/\t/, $_;
  @tissues = split/\,/, $ar[3]; $T = @tissues;
  if ($T >= 3) { $cat = 1; } else { $cat = 2; }
  print TMP "$ar[0]\t$ar[1]\t$ar[2]\t$cat\n";
}
close(IN); close(TMP);
system "sort -k1,1 -k2,2n -k3,3n CTCF.merged.tmp > CTCF.merged.bed";
system "rm CTCF.merged.tmp";
system "/ddn/gs1/home/grimmsa/tools/tabix-0.2.6/bgzip CTCF.merged.bed";
system "/ddn/gs1/home/grimmsa/tools/tabix-0.2.6/tabix -p bed CTCF.merged.bed.gz";

