#!/usr/bin/perl
#####################################
# Program: name_refseq.pl  -  Date: Fri Jan  9 18:16:30 EST 2015
# Autor: Elisa Donnard
#
# License: GPL - http://www.gnu.org/licenses/gpl.html
#
#####################################

my $transc = "";
my $gene = "";
my $renamed = "";
my %ref = ();
my %keepname = ();
my @line = ();
my @gtfend = ();
my $gtfstart = "";
my $transcript = "";
my $geneid = "";
my %count = ();

open (IN0,"<$ARGV[0]"); # ucsc refGene table complete
while (<IN0>) {
    chomp $_;
    @tmp = split (/\t/, $_);
    $transc = $tmp[1];
    $gene = $tmp[12];
    $renamed = "$transc"."_$tmp[2]".":$tmp[4]"."_$tmp[5]";
    $ref{$renamed} = $gene;
    if ($renamed !~ /random|chrUn|hap/ && $count{$transc} != 1) {
	$keepname{$renamed} = $transc;
	$count{$transc} = 1;
    }
}
open (IN1,"<$ARGV[1]"); # gtf file from bed with altered transcript names
while (<IN1>) {
    chomp $_;
    @line = split (/gene_id/, $_);
    $gtfstart = $line[0];
    @gtfend = split (/;/, $line[1]);
    $transcript = $gtfend[1];
    $transcript =~ s/ transcript_id //;
    $transcript =~ s/\"//g;
    $geneid = $ref{$transcript};
    if ($keepname{$transcript}) {
	print "$gtfstart";
	print "gene_id \"$geneid\"; transcript_id \"$keepname{$transcript}\";\n";
    }
    else {
	print "$gtfstart";
	print "gene_id \"$geneid\"; transcript_id \"$transcript\";\n";
    }
}

