heatmap_add_gene_name.pl

#!/usr/bin/perl -w use strict; use File::Basename qw(basename dirname); use Getopt::Long; my ($matrix,$samplelist,$genelist,$sample_sort,$gene_sort,$gene_anno);

#my $outdir=dirname($matrix); GetOptions( 'matrix=s' => \$matrix, 'samplelist=s' => \$samplelist, 'genelist=s' => \$genelist, 'sample_sort=s' =>\$sample_sort, 'gene_sort=s' =>\$gene_sort, 'gene_anno=s' =>\$gene_anno, ) || &help; $sample_sort ||="yes"; $gene_sort ||="yes";

&help unless ($matrix && $samplelist && $genelist); my $outdir=dirname($matrix); my @sample=split/,/,$samplelist; my %anno; my %gene; open GENE,"$genelist" or die "can not open the $genelist$!";

while(){

chomp;

my @line=split/\t/;

$gene{$line[0]}=1;

#}

open ANNO,"$gene_anno"; while(){ chomp; my @line=split/\t/; $anno{$line[0]}=$line[5];

} my %sample_site; open MATRIX,$matrix or die "can not open the matrix $matrix file $!"; my $head=; chomp($head); open OUT,">$outdir/matrix.heatmap.txt" or die "can not open the matrix.heatmap.txt!"; my @sample_site=split/\t/,$head; foreach my $sample(@sample){ print OUT "\t$sample"; for(my $i=0;$i<=$#sample_site;$i++){ if($sample eq $sample_site[$i]){ $sample_site{$sample}=$i; } } } print OUT "\n"; while(){ chomp; my @line=split/\t/; $gene{$line[0]}=""; for(my $i=0;$i<=$#sample;$i++){ $gene{$line[0]}= $gene{$line[0]}."\t".$line[$sample_site{$sample[$i]}];

}

if(exists $gene{$line[0]}){

print OUT "$line[0]";

for(my $i=0;$i<=$#sample;$i++){

print OUT "\t$line[$sample_site{$sample[$i]}]";

#}

print OUT "\n";

#}

else{

next;

#} } while(){ chomp; my @line=split/\t/; print OUT "$anno{$line[0]}|$line[0]"."$gene{$line[0]}\n";

$gene{$line[0]}=1;

} close OUT; open R,">$outdir/matrix.heatmap.R" or die "can not open the matrix.heatmap.R!"; if($sample_sort eq "yes" and $gene_sort eq "yes"){ print R <<str; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2))

#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2)) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str } elsif($sample_sort eq "no" and $gene_sort eq "yes"){

print R <<str111111; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='row', Rowv=as.dendrogram(hc_genes), Colv="NA",srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2)) #cexRow=0.1

#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2),labRow=NA) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str111111 } elsif($sample_sort eq "no" and $gene_sort eq "no"){ print R <<str; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='none', Rowv="NA", Colv="NA",srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2))

#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2),labRow=NA) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str } else{ print R <<str; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='column', Rowv="NA", Colv=as.dendrogram(hc_samples),srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2))

#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2)) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str }

system "Rscript $outdir/matrix.heatmap.R"; system "convert -density 1000 $outdir/matrix.heatmap.pdf $outdir/matrix.heatmap.png";

sub help{ print << "EOF";

Description:

Usage:

#################################################################################

##

-matrix rpkm file,normalize.rpkm

##

-samplelist samplelist,such as sample1,sample2,...

##

-genelist gene list file,the first col must be gene ID

##

-sample_sort yes or no

##

-gene_sort yes or no

-gene_anno

###################################################################################### # # EOF

          exit;
            }
ch7
JSRUN前端笔记, 是针对前端工程师开放的一个笔记分享平台,是前端工程师记录重点、分享经验的一个笔记本。JSRUN前端采用的 MarkDown 语法 (极客专用语法), 这里属于IT工程师。