summary.kmeans_inference.Rd
Summarize the inferential result for k-means clustering
# S3 method for kmeans_inference
summary(object, ...)
output from running kmeans_inference
to be passed to methods
A data frame with summarized results
library(CADET)
library(ggplot2)
set.seed(2022)
n <- 150
true_clusters <- c(rep(1, 50), rep(2, 50), rep(3, 50))
delta <- 10
q <- 2
mu <- rbind(c(delta/2,rep(0,q-1)),
c(rep(0,q-1), sqrt(3)*delta/2),
c(-delta/2,rep(0,q-1)) )
sig <- 1
# Generate a matrix normal sample
X <- matrix(rnorm(n*q, sd=sig), n, q) + mu[true_clusters, ]
# Visualize the data
ggplot(data.frame(X), aes(x=X1, y=X2)) +
geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
theme_classic(base_size=18) + theme(legend.position="none") +
scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
k <- 3
# Run k-means clustering with K=3
estimated_clusters <- kmeans_estimation(X, k,iter.max = 20,seed = 2023)$final_cluster
table(true_clusters,estimated_clusters)
#> estimated_clusters
#> true_clusters 1 2 3
#> 1 0 0 50
#> 2 0 50 0
#> 3 50 0 0
# Visualize the clusters
ggplot(data.frame(X), aes(x=X1, y=X2, col=as.factor(estimated_clusters))) +
geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
theme_classic(base_size=18) + theme(legend.position="none") +
scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5))
# Let's test the difference between first feature across estimated clusters 1 and 2:
cl_1_2_feat_1 <- kmeans_inference_1f(X, k=3, 1, 2,
feat=1, iso=TRUE,
sig=sig,
covMat=NULL, seed=2023,
iter.max = 30)
summary(cl_1_2_feat_1)
#> cluster_1 cluster_2 test_stat p_selective p_naive
#> 1 1 2 -5.445953 0.0007753861 2.886169e-163