-
Notifications
You must be signed in to change notification settings - Fork 4
/
kmeanspp2.R
45 lines (42 loc) · 1.74 KB
/
kmeanspp2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# kmeanspp <- function(x, k, iter.max = 10, nstart = 1, ...) {
# n <- nrow(x) # number of data points
# centers <- numeric(k) # IDs of centers
# distances <- matrix(numeric(n * (k - 1)), ncol = k - 1) # distances[i, j]: The distance between x[i,] and x[centers[j],]
# res.best <- list(tot.withinss = Inf) # the best result among <nstart> iterations
# for (rep in 1:nstart) {
# pr <- rep(1, n) # probability for sampling centers
# for (i in 1:(k - 1)) {
# centers[i] <- sample.int(n, 1, prob = pr) # Pick up the ith center
# distances[, i] <- colSums((t(x) - x[centers[i], ])^2) # Compute (the square of) distances to the center
# pr <- distances[cbind(1:n, max.col(-distances[, 1:i, drop = FALSE]))] # Compute probaiblity for the next sampling
# }
# centers[k] <- sample.int(n, 1, prob = pr)
# data_centers <- as.matrix(x[centers,])
# variations <- sum(seq(from = (k-1), to = 1))
# indexes <- vector(length = variations)
# k_iter <- 1
# for(i in 1:(nrow(data_centers)-1)) {
# for(j in (i+1):nrow(data_centers)){
# indexes[k_iter] <- identical(data_centers[i,], data_centers[j,])
# k_iter <- k_iter + 1
# }
# }
# ## Perform k-means with the obtained centers
# if(sum(indexes) > 0) {
# res <- kmeans(x, k, iter.max = iter.max, nstart = 1, ...)
# } else {
# res <- kmeans(x, data_centers, iter.max = iter.max, nstart = 1, ...)
# res$inicial.centers <- data_centers
# }
#
# ## Store the best result
# if (res$tot.withinss < res.best$tot.withinss) {
# res.best <- res
# }
# }
# res.best
# }
kmeanspp <- function(x, k, iter.max = 10, nstart = 1, ...) {
res <- kmeans(x, k, iter.max = iter.max, nstart = nstart)
res
}