[Statistics] two-component mixture model (mclust)

Statistics

[Statistics] two-component mixture model (mclust)

jjbang 2017. 8. 29. 14:13

#Gauusian modeling --> sample quality control

library(mclust)

a = t(p_dat2)

#for model selection

BIC = mclustBIC(a)

plot(BIC)

#model select(smallest BIC value)

mod1 = Mclust(a, x = BIC)

#mod1 = Mclust(a, modelNames=c("VEI"))

# The clustering vector:

cls <- mod1$classification

#the mean for each cluster,

m_dat = mod1$parameters$mean

a = apply(m_dat,2,mean)

names(a) = c("C1","C2","C3","C4")

barplot(a, main="Mean value of each gaussian component")

summary(mod1)

summary(mod1, parameters = TRUE)

write.table(c_dat,'/Volumes/Data1/part1/LP_data/20170901/protein/2_20170829_protein_clster.txt',sep='\t',quote=FALSE)

par(mfrow=c(1,4))

for(i in 1:length(table(cls)))

{

c1 = c_dat[,which(n_dat[,2]==i)]

plotDensity(log(c1,2), main=paste("cluster ",i,sep=""))

}

library(pheatmap)

library(RColorBrewer)

g_cnt = apply(p_dat2,1,function(x) length(which(x ==0)))

g_dat= p_dat2[which(g_cnt ==0),]

c_dat = c()

n_dat = c()

for(i in 1:length(table(cls)))

{

c_dat = cbind(c_dat,g_dat[,which(colnames(g_dat) %in% names(which(cls==i)))])

n_dat = rbind(n_dat,cbind(names(which(cls==i)),rep(i,length(names(which(cls==i))))))

}

annotdf <- data.frame(n_dat[,2])

rownames(annotdf) = colnames(g_dat)

colnames(annotdf) = c("cluster")

mycolors <- c("white","black","red","blue")

mycolors <- list(cluster = mycolors)

my_palette <- colorRampPalette(c("blue", "gray83", "red"))(n = 100)

pheatmap(log(g_dat,2),

color = my_palette,

# breaks=seq(-15, 30, length.out=101),

cluster_rows = FALSE,

cluster_cols = FALSE,

annotation_col= annotdf,

#annotation_colors = mycolors,

show_rownames = F

)

저작자표시 비영리 변경금지