summary.kmeans_inference.Rd
Summarize the inferential result for k-means clustering
# S3 method for kmeans_inference
summary(object, ...)
output from running kmeans_inference
to be passed to methods
A data frame with summarized results
library(KmeansInference)
library(ggplot2)
set.seed(2022)
n <- 150
true_clusters <- c(rep(1, 50), rep(2, 50), rep(3, 50))
delta <- 10
q <- 2
mu <- rbind(c(delta/2,rep(0,q-1)),
c(rep(0,q-1), sqrt(3)*delta/2),
c(-delta/2,rep(0,q-1)) )
sig <- 1
# Generate a matrix normal sample
X <- matrix(rnorm(n*q, sd=sig), n, q) + mu[true_clusters, ]
# Visualize the data
ggplot(data.frame(X), aes(x=X1, y=X2)) +
geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
theme_classic(base_size=18) + theme(legend.position="none") +
scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
theme(legend.title = element_blank(),
plot.title = element_text(hjust = 0.5))
k <- 3
# Run k-means clustering with K=3
estimated_clusters <- kmeans_estimation(X, k,iter.max = 20,seed = 2021)$final_cluster
table(true_clusters,estimated_clusters)
#> estimated_clusters
#> true_clusters 1 2 3
#> 1 0 50 0
#> 2 0 0 50
#> 3 50 0 0
# Visualize the clusters
ggplot(data.frame(X), aes(x=X1, y=X2, col=as.factor(estimated_clusters))) +
geom_point(cex=2) + xlab("Feature 1") + ylab("Feature 2") +
theme_classic(base_size=18) + theme(legend.position="none") +
scale_colour_manual(values=c("dodgerblue3", "rosybrown", "orange")) +
theme(legend.title = element_blank(), plot.title = element_text(hjust = 0.5))
### Run a test for a difference in means between estimated clusters 1 and 3
cluster_1 <- 1
cluster_2 <- 3
cl_1_2_inference_demo <- kmeans_inference(X, k=3, cluster_1, cluster_2,
sig=sig, iter.max = 20, seed = 2021)
summary(cl_1_2_inference_demo)
#> cluster_1 cluster_2 test_stat p_selective p_naive
#> 1 1 3 10.44088 4.473563e-15 0