#!/usr/bin/env python
max_count = 1090483

grab = "truechr_CHO-K1_refseq_2014_final_adj.gtf"
edit = ""

f = open(grab);
#w = open(edit, "w")

transDict = {}
count = 0
total = 0
doubles = []
doublesReplaced = []
bigNum = 0

while count < max_count:
    tmp_str = f.readline().strip()
    tmp_array = tmp_str.split("\t")
    if len(tmp_array) > 8:
        col9 = tmp_array[8].split(" ")
        length = len(col9[3]) - 2
        transcript = col9[3][1:length]
        if transDict.get(transcript) != tmp_array[0]:
            if transDict.get(transcript) == None:
                transDict[transcript] = tmp_array[0]
                total = total + 1
            else:
                doubles.append(transDict.get(transcript))
                doublesReplaced.append(tmp_array[0])
                transDict[transcript] = tmp_array[0]
                total = total + 1
    count = count + 1

dubCount = 0
while dubCount < len(doubles) - 1:
    print doubles[dubCount] + "\t" + doublesReplaced[dubCount]
    dubCount = dubCount + 1
f.close()
#w.close()
