Statistics

[Statistics] two-component mixture model (mclust)

jjbang 2017. 8. 29. 14:13

#Gauusian modeling -->  sample quality control

library(mclust)

a = t(p_dat2)

#for model selection 

BIC = mclustBIC(a) 

plot(BIC)


#model select(smallest BIC value)

mod1 = Mclust(a, x = BIC)

#mod1 = Mclust(a, modelNames=c("VEI"))


# The clustering vector:

cls <- mod1$classification


#the mean for each cluster,

m_dat = mod1$parameters$mean 

a = apply(m_dat,2,mean)

names(a) = c("C1","C2","C3","C4")

barplot(a, main="Mean value of each gaussian component")



summary(mod1)

summary(mod1, parameters = TRUE)


write.table(c_dat,'/Volumes/Data1/part1/LP_data/20170901/protein/2_20170829_protein_clster.txt',sep='\t',quote=FALSE)

par(mfrow=c(1,4))

for(i in 1:length(table(cls)))

{

  c1 = c_dat[,which(n_dat[,2]==i)]

  plotDensity(log(c1,2), main=paste("cluster ",i,sep=""))

  

}





library(pheatmap) 

library(RColorBrewer)

g_cnt = apply(p_dat2,1,function(x) length(which(x ==0)))

g_dat= p_dat2[which(g_cnt ==0),]


c_dat = c()

n_dat = c()

for(i in 1:length(table(cls)))

{

  c_dat = cbind(c_dat,g_dat[,which(colnames(g_dat) %in% names(which(cls==i)))])

  n_dat = rbind(n_dat,cbind(names(which(cls==i)),rep(i,length(names(which(cls==i))))))

  

}



annotdf <- data.frame(n_dat[,2])

rownames(annotdf) = colnames(g_dat)

colnames(annotdf) = c("cluster")

mycolors <- c("white","black","red","blue")

mycolors <- list(cluster = mycolors)

my_palette <- colorRampPalette(c("blue", "gray83", "red"))(n = 100)

pheatmap(log(g_dat,2),

         color = my_palette,

        # breaks=seq(-15, 30, length.out=101),

         cluster_rows = FALSE,

         cluster_cols = FALSE,

         annotation_col= annotdf,

         #annotation_colors = mycolors,

        show_rownames = F

)