################################################################################################################# # # # Fix gtfs and build RSEM reference # # # # Download from ucsc tables the BED file for RefSeq genes and the corresponding refGene complete table # # # ################################################################################################################# source ~xz18w/ve awk -F "\t" '{print $1"\t"$2"\t"$3"\t"$4"_"$1":"$2"_"$3"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10"\t"$11"\t"$12}' mm9_ucsc_refseq.bed > mm9_ucsc_refseq_renamed.bed ~xz18w/bin/group_bed_v2.py -i mm9_ucsc_refseq_renamed.bed -o mm9_ucsc_refseq_clusters_new.bed grep "_TR.1" mm9_ucsc_refseq_clusters_new.bed | awk -F " chr" '{print "chr"$2}' > mm9_ucsc_refseq_clusters_new_tr1.bed awk -F "\t" '{print $1,$2,$3,$4,int($5),$6,$7,$8,$9,$10,$11,$12}' mm9_ucsc_refseq_clusters_new_tr1.bed | sed "s/ /\t/g" > mm9_ucsc_refseq_clusters_new_tr1_mod.bed bedToGenePred mm9_ucsc_refseq_clusters_new_tr1_mod.bed stdout | genePredToGtf file stdin stdout -honorCdsStat | sed "s/stdin/Gene/" > mm9_ucsc_refseq_clusters_new_tr1_mod.gtf perl name_refseq.pl mm9_ucsc_refseq.complete mm9_ucsc_refseq_clusters_new_tr1_mod.gtf > mm9_ucsc_refseq_final.gtf egrep -v "random|chrUn|hap" mm9_ucsc_refseq_final.gtf > truechr_mm9_ucsc_refseq_final.gtf awk -F "gene_id" '{print $2}' truechr_mm9_ucsc_refseq_final.gtf | sed "s/\"//g;s/ //g;s/;//g;s/transcript_id/\t/" | sort | uniq > truechr_mm9_refseq_gene_transcript_ref.rsem #module load RSEM/1.2.11 #rsem-prepare-reference --gtf truechr_mm9_ucsc_refseq_final.gtf --transcript-to-gene-map truechr_mm9_refseq_gene_transcript_ref.rsem --bowtie2 --bowtie2-path /project/umw_garberlab/bin/bowtie2-2.1.0 ../../mouse/mm9/mm9.fa ./refseq_mm9_ref_rsem