Summarize the inferential result for k-means clustering

# S3 method for kmeans_inference
summary(object, ...)

Arguments

object

output from running kmeans_inference

...

to be passed to methods

Value

A data frame with summarized results

Examples

library(KmeansInference)
library(ggplot2)
set.seed(2022)
n <- 150
true_clusters <- c(rep(1, 50), rep(2, 50), rep(3, 50))
delta <- 10
q <- 2
mu <- rbind(c(delta/2,rep(0,q-1)),
c(rep(0,q-1), sqrt(3)*delta/2),
c(-delta/2,rep(0,q-1)) )
sig <- 1
# Generate a matrix normal sample
X <- matrix(rnorm(n*q, sd=sig), n, q) + mu[true_clusters, ]
# Visualize the data
ggplot(data.frame(X), aes(x=X1, y=X2)) +
geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
 theme_classic(base_size=18) + theme(legend.position="none") +
 scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
 theme(legend.title = element_blank(),
 plot.title = element_text(hjust = 0.5))

 k <- 3
 # Run k-means clustering with K=3
 estimated_clusters <- kmeans_estimation(X, k,iter.max = 20,seed = 2021)$final_cluster
 table(true_clusters,estimated_clusters)
#>              estimated_clusters
#> true_clusters  1  2  3
#>             1  0 50  0
#>             2  0  0 50
#>             3 50  0  0
 # Visualize the clusters
 ggplot(data.frame(X), aes(x=X1, y=X2, col=as.factor(estimated_clusters))) +
 geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
 theme_classic(base_size=18) + theme(legend.position="none") +
 scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
 theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5))

 ### Run a test for a difference in means between estimated clusters 1 and 3
 cluster_1 <- 1
 cluster_2 <- 3
 cl_1_2_inference_demo <- kmeans_inference(X, k=3, cluster_1, cluster_2,
 sig=sig, iter.max = 20, seed = 2021)
 summary(cl_1_2_inference_demo)
#>   cluster_1 cluster_2 test_stat  p_selective p_naive
#> 1         1         3  10.44088 4.473563e-15       0