#!/usr/bin/perl -w use strict; use File::Basename qw(basename dirname); use Getopt::Long; my ($matrix,$samplelist,$genelist,$sample_sort,$gene_sort,$gene_anno);
#my $outdir=dirname($matrix); GetOptions( 'matrix=s' => \$matrix, 'samplelist=s' => \$samplelist, 'genelist=s' => \$genelist, 'sample_sort=s' =>\$sample_sort, 'gene_sort=s' =>\$gene_sort, 'gene_anno=s' =>\$gene_anno, ) || &help; $sample_sort ||="yes"; $gene_sort ||="yes";
&help unless ($matrix && $samplelist && $genelist); my $outdir=dirname($matrix); my @sample=split/,/,$samplelist; my %anno; my %gene; open GENE,"$genelist" or die "can not open the $genelist$!";
#}
open ANNO,"$gene_anno";
while(
}
my %sample_site;
open MATRIX,$matrix or die "can not open the matrix $matrix file $!";
my $head=
}
#}
#}
#}
}
while(
} close OUT; open R,">$outdir/matrix.heatmap.R" or die "can not open the matrix.heatmap.R!"; if($sample_sort eq "yes" and $gene_sort eq "yes"){ print R <<str; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2))
#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2)) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str } elsif($sample_sort eq "no" and $gene_sort eq "yes"){
print R <<str111111; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='row', Rowv=as.dendrogram(hc_genes), Colv="NA",srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2)) #cexRow=0.1
#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2),labRow=NA) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str111111 } elsif($sample_sort eq "no" and $gene_sort eq "no"){ print R <<str; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='none', Rowv="NA", Colv="NA",srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2))
#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2),labRow=NA) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str } else{ print R <<str; library(cluster) library(gplots) library(Biobase) library(ctc) library(ape) data = read.table("$outdir/matrix.heatmap.txt", header=T, com='', sep="\t",check.names=F) rownames(data) = data[,1] # set rownames to gene identifiers data = data[,2:length(data[1,])] # remove the gene column since its now the rowname value data = as.matrix(data) # convert to matrix cr = cor(data, method='spearman') data = log10(data+1) centered_data = t(scale(t(data), scale=F)) # center rows, mean substracted gene_dist = dist(centered_data, method='euclidean') hc_genes = hclust(gene_dist, method='complete') hc_samples = hclust(as.dist(1-cr), method="complete") # cluster conditions myheatcol = greenred(75) gene_partition_assignments <- cutree(as.hclust(hc_genes), k=3); partition_colors = rainbow(length(unique(gene_partition_assignments)), start=0.4, end=0.95) gene_colors = partition_colors[gene_partition_assignments] save(list=ls(all=TRUE), file="diffExpr.P0.05_C1.matrix.R.all.RData") pdf("$outdir/matrix.heatmap.pdf") heatmap.2(centered_data, dendrogram='column', Rowv="NA", Colv=as.dendrogram(hc_samples),srtCol=30, col=bluered(75), RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2))
#try(heatmap.2(cr, col = cm.colors(256), scale='none', symm=TRUE, key=TRUE,density.info='none', trace='none', symkey=FALSE, Colv=TRUE,margins=c(10,10), cexCol=1, cexRow=1)) dev.off() GeneName=heatmap.2(centered_data, dendrogram='both', Rowv=as.dendrogram(hc_genes), Colv=as.dendrogram(hc_samples),col=myheatcol, RowSideColors=gene_colors, scale="none", density.info="none", trace="none", key=TRUE, keysize=1.2, cexCol=1, lmat=rbind(c(5,0,4,0),c(3,1,2,0)), lhei=c(1.5,5),lwid=c(1.5,0.2,2.5,2.5), margins=c(5,2)) GeneName_list=rownames(centered_data)[GeneName\$rowInd] GeneCol<-gene_colors[GeneName\$rowInd] out <- cbind(GeneName_list,GeneCol) write.table(file="$outdir/heatmap_Gene.xls",out,sep="\t",quote=F,row.names=F) dev.off() str }
system "Rscript $outdir/matrix.heatmap.R"; system "convert -density 1000 $outdir/matrix.heatmap.pdf $outdir/matrix.heatmap.png";
sub help{ print << "EOF";
Description:
Usage:
#################################################################################
##
##
##
##
##
###################################################################################### # # EOF
exit;
}