[Statistics] two-component mixture model (mclust)
#Gauusian modeling --> sample quality control
library(mclust)
a = t(p_dat2)
#for model selection
BIC = mclustBIC(a)
plot(BIC)
#model select(smallest BIC value)
mod1 = Mclust(a, x = BIC)
#mod1 = Mclust(a, modelNames=c("VEI"))
# The clustering vector:
cls <- mod1$classification
#the mean for each cluster,
m_dat = mod1$parameters$mean
a = apply(m_dat,2,mean)
names(a) = c("C1","C2","C3","C4")
barplot(a, main="Mean value of each gaussian component")
summary(mod1)
summary(mod1, parameters = TRUE)
write.table(c_dat,'/Volumes/Data1/part1/LP_data/20170901/protein/2_20170829_protein_clster.txt',sep='\t',quote=FALSE)
par(mfrow=c(1,4))
for(i in 1:length(table(cls)))
{
c1 = c_dat[,which(n_dat[,2]==i)]
plotDensity(log(c1,2), main=paste("cluster ",i,sep=""))
}
library(pheatmap)
library(RColorBrewer)
g_cnt = apply(p_dat2,1,function(x) length(which(x ==0)))
g_dat= p_dat2[which(g_cnt ==0),]
c_dat = c()
n_dat = c()
for(i in 1:length(table(cls)))
{
c_dat = cbind(c_dat,g_dat[,which(colnames(g_dat) %in% names(which(cls==i)))])
n_dat = rbind(n_dat,cbind(names(which(cls==i)),rep(i,length(names(which(cls==i))))))
}
annotdf <- data.frame(n_dat[,2])
rownames(annotdf) = colnames(g_dat)
colnames(annotdf) = c("cluster")
mycolors <- c("white","black","red","blue")
mycolors <- list(cluster = mycolors)
my_palette <- colorRampPalette(c("blue", "gray83", "red"))(n = 100)
pheatmap(log(g_dat,2),
color = my_palette,
# breaks=seq(-15, 30, length.out=101),
cluster_rows = FALSE,
cluster_cols = FALSE,
annotation_col= annotdf,
#annotation_colors = mycolors,
show_rownames = F
)