#!/usr/bin/perl
#####################################
# Program: name_gtf.pl  -  Date: Thu Jan  8 12:13:00 EST 2015
# Autor: Elisa Donnard
#
# License: GPL - http://www.gnu.org/licenses/gpl.html
#
#####################################

my $ucsc = "";
my $transc = "";
my $gene = "";
my %reftable = ();
my $gtfstart = "";
my $ucscgtf = "";
my $genegtf = "";
my $transcgtf = "";
my @ids = ();
my @names = ();
my @line = ();
my $linetoprint = "";
my %find = ();
my %keepname = ();
my $trn = "";
my $ln = "";

open (IN0,"<$ARGV[0]"); # ucsc table known gene to ref
while (<IN0>) {
    chomp $_;
    @tmp = split (/\t/, $_);
    $ucsc = $tmp[0];
    if ($tmp[5] ne "") {
	$transc = $tmp[5];
	$gene = $tmp[4];
	$reftable{$ucsc} = "$gene".";"."$transc";
    }
    else {
	if ($tmp[1] ne "" && $tmp[1] ne $tmp[4]) {
	    $transc = $tmp[1];
	    $gene = $tmp[4];
	    $reftable{$ucsc} = "$gene".";"."$transc";
	}
	else {
	    $transc = $tmp[0];
	    $gene = $tmp[4];
	    $reftable{$ucsc} = "$gene".";"."$transc";
	}
    }
}
close (IN0);

open (IN1,"<$ARGV[1]"); # gtf file with ucsc id
while (<IN1>) {
    chomp $_;
    @line = split (/gene_id/, $_);
    $gtfstart = $line[0];
    @ids = split (/\"/, $line[1]);
    $ucscgtf = $ids[1];
    if ($reftable{$ucscgtf}) {
	@names = split (/;/, $reftable{$ucscgtf});
	$genegtf = $names[0];
	$transcgtf = $names[1];
	$linetoprint = "$gtfstart"."gene_id \"$genegtf\"; transcript_id \"$transcgtf";
	$find{$transcgtf}{$linetoprint} = $ucscgtf;
	if ($linetoprint !~ /random|chrUn|hap/) {
	    $keepname{$transcgtf} = $ucscgtf;
	}
    }
}
foreach $trn (keys %find) {
    foreach $ln (keys %{$find{$trn}}) {
	if ($keepname{$trn} && $find{$trn}{$ln} eq $keepname{$trn}) {
	    print "$ln";
	    print "\";\n"; # print normal
	}
	else {
	    print "$ln";
	    print "_";
	    print "$find{$trn}{$ln}\";\n"; # print with ucsc id added
	}	    
    }
}
