diff --git a/R/gmum.errors.R b/R/gmum.errors.R
new file mode 100644
index 0000000..5fbe781
--- /dev/null
+++ b/R/gmum.errors.R
@@ -0,0 +1,7 @@
+# Error codes
+
+GMUM_WRONG_LIBRARY = "Error 20"
+GMUM_WRONG_KERNEL = "Error 21"
+GMUM_BAD_PREPROCESS = "Error 22"
+GMUM_NOT_SUPPORTED = "Error 23"
+GMUM_WRONG_PARAMS = "Error 24"
\ No newline at end of file
diff --git a/R/gng-visualize.R b/R/gng-visualize.R
index 0292ba1..6f4a0b4 100644
--- a/R/gng-visualize.R
+++ b/R/gng-visualize.R
@@ -2,7 +2,7 @@ library(igraph)
.gng.plot3d<-function(gngServer){
if("rgl" %in% rownames(installed.packages()) == TRUE){
- g <- .gng.construct_igraph(gngServer)
+ g <- convertToGraph(gngServer)
.visualizeIGraphRGL(g)
}else{
warning("Please install rgl package to plot 3d graphs")
@@ -81,9 +81,7 @@ library(igraph)
}
}
.gng.plot2d.errors<-function(gngServer, vertex.color, layout){
- tmp_name <- paste("tmp",sample(1:1000, 1),".graphml", sep="")
- gngServer$exportToGraphML(tmp_name)
- ig = .readFromGraphML(tmp_name )
+ ig <- convertToGraph(gngServer)
if(length(V(ig))==0) return
@@ -97,14 +95,12 @@ library(igraph)
}
.visualizeIGraph2dWithErrors(ig, vertex.color, layout, gngServer)
-
- file.remove(tmp_name)
}
.gng.plot2d<-function(gngServer, vertex.color, layout){
tmp_name <- paste("tmp",sample(1:1000, 1),".graphml", sep="")
- gngServer$exportToGraphML(tmp_name)
- ig = .readFromGraphML(tmp_name )
+
+ ig <- convertToGraph(gngServer)
if(length(V(ig))==0) return
@@ -118,7 +114,6 @@ library(igraph)
}
.visualizeIGraph2d(ig, vertex.color, layout)
- file.remove(tmp_name)
}
#' Visualize igraph using igraph plot
@@ -154,7 +149,9 @@ library(igraph)
.visualizeIGraph2d(ig, vertex.color, layout_2d)
title("Graph visualization")
errors_raw = gng$getErrorStatistics()
- errors = log((errors_raw+1)/min(errors_raw+1))
- plot(errors, type="l", lty=2, lwd=2, xlab="Time [s]", ylab="Mean error (log)", frame.plot=F)
+ errors_raw = errors_raw[5:length(errors_raw)]
+ errors = errors_raw
+ #errors = log((errors_raw)/min(errors_raw+1e-4))
+ plot(errors, type="l", lty=2, lwd=2, xlab="Batch", ylab="Mean batch error", frame.plot=F)
title("Mean error (log)")
}
diff --git a/R/gng.R b/R/gng.R
index 0639798..09695bd 100644
--- a/R/gng.R
+++ b/R/gng.R
@@ -1,5 +1,3 @@
-#dev note: I have no idea how to document S4 methods using roxygen, I will have to assign someone to this task
-
library(igraph)
library(methods)
@@ -49,7 +47,7 @@ gng.train.online <- function(dim){
.gng.dataset.bagging <- 2
.gng.dataset.sequential <-1
-gng.train.offline <- function(max.iter = 100, min.improvement = 1e-2){
+gng.train.offline <- function(max.iter = 100, min.improvement = 1e-3){
c(.gng.train.offline, max.iter , min.improvement)
}
@@ -336,6 +334,26 @@ errorStatistics.gng <- NULL
OptimizedGNG <- NULL
+#' @title clustering
+#'
+#' @description Gets vector with node indexes assigned to examples in the dataset
+#'
+#' @usage
+#' clustering(gng)
+#'
+#' @export
+#'
+#' @rdname clustering-methods
+#'
+#' @docType methods
+#'
+#' @examples
+#' clustering(gng)
+#'
+#' @aliases clustering
+#'
+clustering.gng <- NULL
+
#' @title errorStatistics
#'
#' @description Gets vector with errors for every second of execution
@@ -424,6 +442,11 @@ summary.gng <- NULL
convertToGraph.gng <- NULL
+
+
+
+
+
generateExamples <- NULL
#' @title insertExamples
@@ -474,6 +497,8 @@ evalqOnLoad({
){
+
+
config <- new(GNGConfiguration)
# Fill in configuration
@@ -481,36 +506,36 @@ evalqOnLoad({
config$dim = ncol(x)
}else{
- config$dim = training[2]
- print(config$dim)
+ config$dim = training[2]
}
if(type[1] == .gng.type.optimized){
- config$uniformgrid_optimization = TRUE
- config$lazyheap_optimization = TRUE
- config$set_bounding_box(type[2], type[3])
+ config$.uniformgrid_optimization = TRUE
+ config$.lazyheap_optimization = TRUE
+ config$.set_bounding_box(type[2], type[3])
if(training[1] == .gng.train.offline){
- if(!max(df) <= type[3] && !min(df) >= type[2]){
- gmum.error("Passed incorrect parameters. The dataset is not in the defined range")
+ if(!max(x) <= type[3] && !min(x) >= type[2]){
+ gmum.error(ERROR_BAD_PARAMS, "Passed incorrect parameters. The dataset is not in the defined range")
}
}
}else{
- config$uniformgrid_optimization = FALSE
- config$lazyheap_optimization = FALSE
+ config$.uniformgrid_optimization = FALSE
+ config$.lazyheap_optimization = FALSE
}
if(type[1] == .gng.type.utility){
- config$experimental_utility_k = type[2]
- config$experimental_utility_option = 1
+ config$.experimental_utility_k = type[2]
+ config$.experimental_utility_option = 1
}
else{
- config$experimental_utility_option = 0
+ config$.experimental_utility_option = 0
}
- config$dataset_type=.gng.dataset.bagging
+
+ config$.dataset_type=.gng.dataset.bagging
config$beta = beta
config$max_edge_age = max.edge.age
config$alpha = alpha
@@ -521,7 +546,7 @@ evalqOnLoad({
config$lambda = lambda
config$verbosity = verbosity
- if(!config$check_correctness()){
+ if(!config$.check_correctness()){
gmum.error(ERROR_BAD_PARAMS, "Passed incorrect parameters.")
}
@@ -530,6 +555,7 @@ evalqOnLoad({
# Perform training on passed dataset
if(training[1] == .gng.train.offline){
+
print("Training offline")
if(is.null(x)){
gmum.error(ERROR, "Passed null data and requested training offline")
@@ -541,34 +567,63 @@ evalqOnLoad({
print(max_iter)
min_relative_dif = training[3]
iter = 0
- errors_calculated = 0
- while(iter < max_iter || errors_calculated == 0){
- Sys.sleep(0.1)
- iter = server$getCurrentIteration()
-
- if(iter %% (max_iter/100) == 0){
- print(paste("Iteration", iter))
- }
-
- # Iter 5 = 5 times passed whole dataset.
- if(iter > 5){
- errors_calculated = 1
- errors = server$getErrorStatistics()
- best_previously = min(errors[(length(errors)-5):length(errors)-1])
- current = errors[length(errors)]
- if(best_previously != 0){
- change = 1.0 - current/best_previously
- if(change < min_relative_dif){
- print(best_previously)
- print(errors[(length(errors)-5):length(errors)-1])
- print("Patience bailed out")
- break
+ previous_iter = -1
+ best_so_far = 1e10
+ initial_patience = 3
+ error_index = -1 # always bigger than 0
+ patience = initial_patience
+
+ tryCatch({
+ while(iter < max_iter && server$isRunning()){
+ Sys.sleep(0.1)
+ iter = server$getCurrentIteration()
+
+ if(previous_iter != iter && iter %% (max_iter/100) == 0){
+ print(paste("Iteration", iter))
+ }
+
+ if(length(server$getErrorStatistics()) > 5){
+ errors = server$getErrorStatistics()
+
+ best_previously = min(errors[(length(errors)-5):length(errors)])
+
+ #this is same as (best_so_far-best_previously)/best_so_far < min_relative_di
+ #we get minimum of window 5 and look at the history
+ if( (error_index - server$.getGNGErrorIndex()) > 4 &&
+ (best_so_far - best_previously) < best_so_far*min_relative_dif){
+ patience = patience - 1
+ if(patience <= 0){
+ print(sprintf("Best error during training: %f", best_so_far))
+ print(sprintf("Best error in 5 previous iterations %f", best_previously))
+ print(errors[(length(errors)-5):length(errors)])
+ print("Patience (which you can control) elapsed, bailing out")
+ break
+ }
+ }else{
+ patience = initial_patience
}
+
+
+ error_index = server$.getGNGErrorIndex()
+ best_so_far = min(best_previously, best_so_far)
}
}
- }
-
- terminate(server)
+
+ previous_iter = iter
+
+ if(server$isRunning()){
+ terminate(server)
+ }
+ else{
+ gmum.error(ERROR, "Training failed")
+ }
+ }, interrupt=
+ function(interrupt){
+ if(server$isRunning()){
+ terminate(server)
+ }
+
+ })
}
}
@@ -591,13 +646,21 @@ evalqOnLoad({
verbosity=0,
k=NULL
){
+
+ if(is(x, "data.frame")){
+ x = data.matrix(x);
+ }
+ gng <- NULL
+ call <- match.call(expand.dots = TRUE)
if(is.null(k)){
- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes,
+ gng <- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes,
eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.default(), training=training, lambda=lambda, verbosity=verbosity)
}else{
- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes,
+ gng <- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes,
eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.utility(k=k), training=training, lambda=lambda, verbosity=verbosity)
}
+ assign("call", call, gng)
+ gng
}
OptimizedGNG <<- function(x=NULL, labels=c(),
@@ -616,17 +679,28 @@ evalqOnLoad({
gmum.error(ERROR, "Incorrect range")
return
}
- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes,
-eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min=value.range[1]*1.1, max=value.range[2]*1.1), training=training, lambda=lambda, verbosity=verbosity)
-
+ if(is(x, "data.frame")){
+ x = data.matrix(x);
+ }
+ call <- match.call(expand.dots = TRUE)
+ gng <- .GNG(x=x, labels=labels, beta=beta, alpha=alpha, max.nodes=max.nodes,
+eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min=value.range[1], max=value.range[2]), training=training, lambda=lambda, verbosity=verbosity)
+ assign("call", call, gng)
+ gng
}
setGeneric("node",
function(x, gng_id, ...) standardGeneric("node"))
-
+
+ setGeneric("clustering",
+ function(object) standardGeneric("clustering"))
+
+
setGeneric("convertToGraph",
function(object, ...) standardGeneric("convertToGraph"))
-
+
+
+
setGeneric("run",
function(object, ...) standardGeneric("run"))
@@ -689,14 +763,34 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min
summary.gng <<- function(object){
print(sprintf("Growing Neural Gas, nodes %d with mean error %f",
object$getNumberNodes(), object$getMeanError()))
+ print(sprintf("Trained %d iterations", object$getCurrentIteration()))
print("Mean errors[s]: ")
- print(object$getErrorStatistics())
+ errors = object$getErrorStatistics()
+ if(length(errors) > 10){
+ errors = errors[(length(errors)-10):length(errors)]
+ }
+
+ print(errors)
+ }
+
+
+ # Autocompletion fix
+
+ .GlobalEnv$`.DollarNames.C++Object` <- function( x, pattern ){
+ grep(pattern, asNamespace("Rcpp")$complete(x), value = TRUE)[! (substr(grep(pattern, asNamespace("Rcpp")$complete(x), value = TRUE),1,1)==".")]
}
+ #.GlobalEnv$DollarNamesGmumr <- function( x, pattern ){
+ # asNamespace("Rcpp")$`.DollarNames.C++Object`(x, pattern)[! (substr(asNamespace("Rcpp")$`.DollarNames.C++Object`(x, pattern),1,1)==".")]
+ #}
+ #environment(.GlobalEnv$DollarNamesGmumr) <- .GlobalEnv
+ #setMethod( ".DollarNames", "C++Object", .GlobalEnv$DollarNamesGmumr )
+
setMethod("plot", "Rcpp_GNGServer", plot.gng)
setMethod("print", "Rcpp_GNGServer", print.gng)
setMethod("summary", "Rcpp_GNGServer", summary.gng)
+ setMethod("show", "Rcpp_GNGServer", summary.gng)
node.gng <<- function(x, gng_id){
x$getNode(gng_id)
@@ -708,6 +802,16 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min
pause.gng <<- function(object){
object$pause()
+ n = 0.0
+ sleep = 0.1
+ while(object$isRunning()){
+ Sys.sleep(sleep)
+ n = n + 1
+ if(n > 2/sleep){
+ print("Warning: GNG has not paused! Check status with gng$isRunning(). Something is wrong.")
+ return()
+ }
+ }
}
terminate.gng <<- function(object){
@@ -721,13 +825,19 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min
errorStatistics.gng <<- function(object){
object$getErrorStatistics()
}
-
+
+ clustering.gng <<- function(object){
+ object$clustering()
+ }
+
save.gng <<- function(object, filename){
+ warning("Saving does not preserve currently training history")
object$save(filename)
}
load.gng <<- function(filename){
- new(GNGServer, filename)
+ warning("Saving does not preserve currently training history")
+ fromFileGNG(filename)
}
@@ -750,7 +860,7 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min
setMethod("meanError", "Rcpp_GNGServer", meanError.gng)
setMethod("errorStatistics", "Rcpp_GNGServer", errorStatistics.gng)
- #' Get number of nodes
+ #'Get number of nodes
setMethod("numberNodes" ,
"Rcpp_GNGServer",
function(object){
@@ -758,33 +868,114 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min
})
-
+
convertToGraph.gng <- function(object){
- .gng.construct_igraph(object)
+ pause(object)
+
+ if(object$getNumberNodes() == 0){
+ return(graph.empty(n=0, directed=FALSE))
+ }
+
+ #Prepare index map. Rarely there is a difference in indexing
+ #due to a hole in memory representation of GNG graph (i.e.
+ #indexing in gng can be non-continuous)
+
+ # Warning: This is a hack. If there is a bug look for it here
+ indexesGNGToIGraph <- 1:(2*object$.getLastNodeIndex())
+ indexesIGraphToGNG <- 1:object$getNumberNodes()
+
+ if(object$.getLastNodeIndex() != object$getNumberNodes()){
+ igraph_index = 1
+ for(i in (1:object$.getLastNodeIndex())){
+ node <- node(object, i)
+ if(length(node) != 0){
+ indexesGNGToIGraph[i] = igraph_index
+ indexesIGraphToGNG[igraph_index] = i
+ igraph_index = igraph_index + 1
+ }
+ }
+ }
+
+ adjlist<-list()
+ for(i in 1:object$.getLastNodeIndex()){
+ node <- node(object, i)
+ if(length(node) != 0){
+
+ igraph_index = indexesGNGToIGraph[i]
+ #print(paste(object$.getLastNodeIndex(), length(indexesGNGToIGraph), object$isRunning()))
+ #print(paste(igraph_index, node$neighbours))
+ neighbours = node$neighbours[node$neighbours > i]
+ adjlist[[igraph_index]] <- sapply(neighbours, function(x){ indexesGNGToIGraph[x] })
+ } else{
+ #print("Empty node")
+ }
+ }
+
+ #print("Creating the graph")
+
+ g <- graph.adjlist(adjlist, mode = "all", duplicate=FALSE)
+ for(i in 1:object$.getLastNodeIndex()){
+ node <- node(object, i)
+ if(length(node) != 0){
+ igraph_index = indexesGNGToIGraph[i]
+ #TODO: it is more efficient to assign whole vectors
+ #TODO: refactor in whole code v0 v1 v2 to pos_1 pos_2 pos_3
+ V(g)[igraph_index]$v0 <- node$pos[1]
+ V(g)[igraph_index]$v1 <- node$pos[2]
+ V(g)[igraph_index]$v2 <- node$pos[3]
+ V(g)[igraph_index]$label <- node$label
+ V(g)[igraph_index]$error <- node$error
+ if(!is.null(node$utility)){
+ V(g)[igraph_index]$utility = node$utility
+ }
+ }
+ }
+
+ # Add distance information
+ dists <- apply(get.edges(g, E(g)), 1, function(x){
+ object$nodeDistance(indexesIGraphToGNG[x[1]], indexesIGraphToGNG[x[2]])
+ })
+ E(g)$dists = dists
+
+ g
}
+
-
- #' Get node descriptor from graph
- #'
- #' @note This function will dump graph to .graphml file on this first and then will remove
- #' the file. Be cautious with huge graphs!
- #'
- #' @param gng_id gng id of the node NOTE: nmight differ from one in exported igraph
+
setMethod("convertToGraph" ,
"Rcpp_GNGServer",
convertToGraph.gng)
-
-
- #' Find closest example
- #' @param x Vector of dimensionality of vertex
- #' @return gng_index of the closest example
+
+
+ setMethod("clustering" ,
+ "Rcpp_GNGServer",
+ clustering.gng)
+
setMethod("predict" ,
"Rcpp_GNGServer",
function(object, x){
- object$predict(x)
+ if( is.vector(x)){
+ object$predict(x)
+ }else{
+ if ( !is(x, "data.frame") && !is(x, "matrix") && !is(x,"numeric") ) {
+ gmum.error(ERROR_BAD_PARAMS, "Wrong target class, please provide data.frame, matrix or numeric vector")
+ }
+
+ if (!is(x, "matrix")) {
+ x <- data.matrix(x)
+ }
+
+ y <- rep(NA, nrow(x))
+
+ for(i in 1:nrow(x)){
+ y[i] <- object$predict(x[i,])
+ }
+
+ y
+ }
})
-
-
+
+
insertExamples.gng <<- function(object, examples, labels=c()){
if(length(labels) == 0){
object$insertExamples(examples, vector(mode="numeric", length=0))
@@ -820,4 +1011,18 @@ eps.n=eps.n, eps.w=eps.w, max.edge.age=max.edge.age, type=gng.type.optimized(min
"Rcpp_GNGServer",
insertExamples.gng)
+
+ methods = list()
+ for(name in names(GNGConfiguration@methods)){
+ methods[[name]] = eval(substitute(
+ function(...) .CppObject$WHAT(...), list(WHAT = as.name(name))))
+ }
+
+ methods[["initialize"]] <- function(...){
+
+ }
+
+
+
})
+
diff --git a/R/scripts/wine_dataset.R b/R/scripts/wine_dataset.R
index 117fc4e..3d924b8 100644
--- a/R/scripts/wine_dataset.R
+++ b/R/scripts/wine_dataset.R
@@ -1,14 +1,12 @@
-library("GrowingNeuralGas")
+library("gmum.r")
+
data(wine, package="rattle")
scaled.wine <- scale(wine[-1])
-#TODO: not 200 but 126
-
# Train in an offline manner
-gng <- GNG(scaled.wine, labels=wine$Type, max.nodes=200,
- training=gng.train.offline(max.iter=1000, min.improvement=0))
-devtools::install(".")
-devtools::load_all(".")
+gng <- GNG(scaled.wine, labels=wine$Type, max.nodes=20,
+ training=gng.train.offline(max.iter=10000, min.improvement=1e-1))
+
# Print number of nodes
numberNodes(gng)
@@ -20,3 +18,14 @@ mean(degree(ig))
# Plot using igraph layout
plot(gng, mode = gng.plot.2d,
vertex.color=gng.plot.color.label, layout=igraph::layout.fruchterman.reingold)
+
+# Print summary of trained object
+print(summary(gng))
+
+# Print prediction accuracy
+labels = as.vector(wine[,c("Type")], mode="double")
+preds <- c()
+for(i in 1:nrow(scaled.wine)){
+ preds <- c(preds,round(node(gng, round(predict(gng, as.vector(scaled.wine[i,], mode="double")))+1)$label))
+}
+print(table(preds, labels))
diff --git a/README.md b/README.md
index ea5cd32..30a9ef8 100644
--- a/README.md
+++ b/README.md
@@ -53,17 +53,22 @@ You can also refer to R package documentation (pdf version
### Cluster wine dataset
+
+Clustering of the UCI wine dataset
+
+
+
In this example we will construct a clustering of UCI wine dataset using offline GNG.
```R
-library("GrowingNeuralGas")
+library(gmum.r)
# Load data
data(wine, package="rattle")
scaled_wine <- scale(wine[-1])
# Train in an offline manner
-gng <- GNG(scaled_wine, labels=wine$Type, max_nodes=20)
+gng <- GNG(scaled_wine, labels=wine$Type, max.nodes=20)
# Find closest node to vector [1,1,1]
predict(gng, c(1,1,1))
@@ -77,13 +82,9 @@ meanError(gng)
# Plot with first 2 coordinates as position
plot(gng, mode=gng.plot.2d.errors, vertex.color=gng.plot.color.cluster,
- layout=gng.plot.layout.v2d)
+ layout=gng.plot.layout.igraph.fruchterman)
```
-Reconstruction of the Buddha figure from Standford Repositories
-
-
-
##List of functions
This is not a full documentation. Please refer to R package documentation (pdf version
@@ -161,12 +162,3 @@ Feel free to contribute to the code. Contributions should be posted as pull requ
##Known issues
---------
* Package is not released for Windows yet.
-
-* Igraph plotting issues
-
- * Due to bug in R (https://bugs.r-project.org/bugzilla/show_bug.cgi?id=15327)
- on some OS you have to install liblzma-dev additionally.
-
- * Sometimes after installation of igraph you might have disabled graphml support
- (http://lists.gnu.org/archive/html/igraph-help/2011-03/msg00101.html). Try
- installing libxml2-dev package and reinstalling igraph.a
diff --git a/demo/extra/mnist.R b/demo/extra/mnist.R
index 0e5d61d..f4cded0 100644
--- a/demo/extra/mnist.R
+++ b/demo/extra/mnist.R
@@ -1,17 +1,12 @@
-####################################################################
-# Clustering MNIST dataset with GNG algorithm and running RF on it #
-# note: make sure you have in data mnist dataset #
- ####################################################################
+library(gmum.r)
+library(igraph)
+####################################################################
+# Clustering MNIST dataset with GNG algorithm #
+####################################################################
-# Load the MNIST digit recognition dataset into R
-# http://yann.lecun.com/exdb/mnist/
-# assume you have all 4 files and gunzip'd them
-# creates train$n, train$x, train$y and test$n, test$x, test$y
-# e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28)
-# call: show_digit(train$x[5,]) to see a digit.
-# brendan o'connor - gist.github.com/39760 - anyall.org
+### Helper functions ###
load_mnist <- function() {
load_image_file <- function(filename) {
ret = list()
@@ -35,155 +30,68 @@ load_mnist <- function() {
ret = y
ret
}
- train <<- load_image_file('./data/train-images.idx3-ubyte')
- test <<- load_image_file('./data/t10k-images.idx3-ubyte')
+ train <<- load_image_file('./data/train-images-idx3-ubyte')
+ test <<- load_image_file('./data/t10k-images-idx3-ubyte')
train <- train/255.0
test <- test/255.0
data = list()
- data$train = cbind(train, as.matrix(load_label_file('./data/train-labels.idx1-ubyte')))
- data$test = cbind(test, as.matrix(load_label_file('./data/t10k-labels.idx1-ubyte')))
-
+ data$train = cbind(train, as.matrix(load_label_file('./data/train-labels-idx1-ubyte')))
+ data$test = cbind(test, as.matrix(load_label_file('./data/t10k-labels-idx1-ubyte')))
data
}
-data <- load_mnist()
-
-#write.csv(data$train, 'mnist-train.csv')
-#write.csv(data$test, 'mnist-test.csv')
-
show_digit <- function(arr784, col=gray(12:1/12), ...) {
print(matrix(arr784, nrow=28, ncol=28)[1,])
image(matrix(arr784, nrow=28, ncol=28)[,28:1], col=col, ...)
}
-library("GrowingNeuralGas")
-library(igraph)
-library(testthat)
-
-max_nodes <- 1500
+### Configure and load examples ###
+train.examples <- 10000
+max.nodes <- 100
+max.iter = 500
+data <- load_mnist()
+X = data$train[1:train.examples,-785]
+Y = data$train[1:train.examples,785]
+X.test = data$test[,-785]
+Y.test = data$test[,785]
-# Construct gng object, NOTE: adding last column (target) as extra_data - this data won't be used
-# in training, but will be assigned to close vertex in the graph (technically speaking it WILL be used in training,
-# but will bear no effect on convergence)
-gng <- GNG(dataset_type=gng.dataset.bagging, max_nodes=max_nodes, dim=784, lazyheap_optimization=TRUE,
- experimental_vertex_extra_data=TRUE
- )
+### Train Optimized GNG ###
+gng <- OptimizedGNG(max.nodes=max.nodes, x=X, value.range=c(0,1),
+ labels=Y, training = gng.train.offline(max.iter, 1e-2))
-data <- load_mnist()
-data0 <- data$train
-data0[data0[,785]!=0.0,785] = 1.0
-gng$insert_examples(data0)
-### Run algorithm ###
-run(gng)
-
-number_nodes(gng)
-mean_error(gng)
-
-### Pause and dump ###
-pause(gng)
-GrowingNeuralGas::dump_model(gng, "mnist.trained.1500.bin")
+### Print some variables and save ###
+numberNodes(gng)
+meanError(gng)
+save.gng(gng, "mnist.trained.100.bin")
### Plot using igraph layout and coloring from extra vertex ###
plot(gng, mode=gng.plot.2d.errors,
- vertex.color=gng.plot.color.cluster, layout=gng.plot.layout.igraph.fruchterman.fast)
+ vertex.color=gng.plot.color.label, layout=gng.plot.layout.igraph.fruchterman.fast)
-# layout.fruchterman.reingold)
+### Show closest to some examples ###
+id=200
+show_digit(X.test[id,])
+show_digit(node(gng, predict(gng, X.test[id,])+1)$pos)
-### Test prediction of 0 ###
-for(i in 1:nrow(data$test)){
- if(data$test[i, 785]==0){
- predict(gng, data$test[i,-1])
- node(gng, predict(gng, data$test[i,-1]))
- print(node(gng, predict(gng, data$test[i,-1]))$extra_data)
- break
- }
-}
+id=300
+show_digit(X.test[id,])
+show_digit(node(gng, predict(gng, X.test[id,])+1)$pos)
+
+id=400
+show_digit(X.test[id,])
+show_digit(node(gng, predict(gng, X.test[id,])+1)$pos)
-### Test infomap community ###
-plot(gng, vertex.color=gng.plot.color.cluster,
- mode=gng.plot.2d, layout=igraph::layout.fruchterman.reingold)
-centr <- centroids2.gng(gng)
### Plot centroids ###
+centr <- centroids.gng(gng)
centroids_pos = lapply(centr, function(x){ node(gng, x)$pos})
par(mfrow=c(2,2))
show_digit(node(gng, centr[1])$pos)
show_digit(node(gng, centr[2])$pos)
show_digit(node(gng, centr[3])$pos)
-show_digit(node(gng, centr[4])$pos)
-
-
-#####################################################################
-# Code training classifier, not pertaining to Growing-Neural-Gas API#
-#####################################################################
-
-function train_classifier(){
- ### Transform data ###
- data <-load_mnist()
- data_transformed_train <- matrix(0, ncol=(length(centroids_pos) + 1), nrow=nrow(data$train))
- data_transformed_test <- matrix(0, ncol=(length(centroids_pos) + 1), nrow=nrow(data$test))
-
- for(i in 1:nrow(data$train)){
- data_transformed_train[i, 1:length(centroids_pos)] = unlist(lapply(centroids_pos,
- function(x){ sqrt(sum((x - data$train[i, 1:784]) ^ 2)) } ))
- data_transformed_train[i, length(centroids_pos)+1 ] = data$train[i, 785]
- }
- for(i in 1:nrow(data$test)){
- data_transformed_test[i, 1:length(centroids_pos)] = unlist(lapply(centroids_pos,
- function(x){ sqrt(sum((x - data$test[i, 1:784]) ^ 2)) } ))
- data_transformed_test[i, length(centroids_pos)+1 ] = data$test[i, 785]
- }
-
- write.csv(data_transformed_train, file='mnist.transformed.train.csv')
- write.csv(data_transformed_test, file='mnist.transformed.test.csv')
-
- ### Construct formula for nnet ###
- data_transformed_train = read.csv(file='mnist.transformed.train.csv')
- data_transformed_test = read.csv(file='mnist.transformed.test.csv')
-
- colnames(data_transformed_train) <- paste0("V", seq_len(ncol(data_transformed_train)))
- colnames(data_transformed_test) <- paste0("V", seq_len(ncol(data_transformed_test)))
- n <- colnames(data_transformed_train)
- last_col = paste("V", ncol(data_transformed_train), sep="")
- f <- as.formula(paste(paste(last_col, " ~", sep=""), paste(n[!n %in% last_col], collapse = " + ")))
- print(f)
-
-
- ### Train nnet ###
- install.packages("randomForest")
- library(randomForest)
- library(kernlab)
- library(klaR)
- library("nnet")
-
- rf <- randomForest(x=data_transformed_train[,1:26],
- y=as.factor(data_transformed_train[,27]), ntree=50)
-
- as.double(predict(rf, data_transformed_test[1,1:26]))-1
-
- k <- 0
- cor <- 0
- for(i in 1:nrow(data_transformed_test)){
-
- if(i%%100 == 0){
- print((cor+0.0)/k)
- print(i)
- }
-
- if((as.double(predict(rf, data_transformed_test[i,1:(ncol(data_transformed_test)-1)]))-1)
- == data_transformed_test[i,ncol(data_transformed_test)]){
- cor <- cor + 1
- }
-
- k <- k + 1
- }
-
-}
-
-
-
+show_digit(node(gng, centr[4])$pos)
\ No newline at end of file
diff --git a/doc/img/gng_readme.png b/doc/img/gng_readme.png
new file mode 100644
index 0000000..fd78680
Binary files /dev/null and b/doc/img/gng_readme.png differ
diff --git a/inst/include/gng/GNG.h b/inst/include/gng/GNG.h
deleted file mode 100644
index f65b4b7..0000000
--- a/inst/include/gng/GNG.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * File: GNGInclude.h
- * Author: staszek
- *
- * Created on 12 sierpień 2012, 11:56
- */
-#ifndef GNGINCLUDE_H
-#define GNGINCLUDE_H
-
-#include "utils/logger.h"
-
-
-#include "GNGGlobals.h"
-#include "UniformGrid.h"
-#include "GNGNode.h"
-#include "GNGGraph.h"
-#include "GNGDataset.h"
-#include "GNGAlgorithm.h"
-#include "GNGDefines.h"
-#include "GNGConfiguration.h"
-#include "GNGServer.h"
-
-#endif /* GNGINCLUDE_H */
-
diff --git a/inst/include/gng/GNGAlgorithm.h b/inst/include/gng/GNGAlgorithm.h
deleted file mode 100644
index cc6844d..0000000
--- a/inst/include/gng/GNGAlgorithm.h
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * File: GNGAlgorithm.h
- * Author: Stanislaw "kudkudak" Jastrzebski
- *
- * Created on 11 sierpień 2012, 10:02
- */
-
-
-#ifndef GNGALGORITHM_H
-#define GNGALGORITHM_H
-
-#include
-
-#include "utils/threading.h"
-#include "utils/circular_buffer.h"
-
-#include "GNGGlobals.h"
-#include "GNGGraph.h"
-#include "GNGDataset.h"
-#include "UniformGrid.h"
-#include "GNGLazyErrorHeap.h"
-#include
-#include
-using namespace std;
-
-namespace gmum {
-
-/**
- * The main class of the implementation dealing with computations.
- * It should be agnostic of inner working (memory management etc.) of the graph and database.
- * Also should not be concerned with locking logic.
- */
-class GNGAlgorithm {
-public:
- typedef std::list Node;
-
- circular_buffer m_mean_error; //error of the network
- int m_lambda; //lambda parameter
- double m_eps_w, m_eps_n; //epsilon of the winner and of the neighbour
- int m_max_age;
- int m_max_nodes;
- int m_iteration;
-
- bool m_toggle_uniformgrid, m_toggle_lazyheap;
-
- double m_utility_k;
- int m_utility_option;
-
-
- double m_alpha, m_betha;
- double * m_betha_powers;
- int m_betha_powers_to_n_length;
- double * m_betha_powers_to_n;
- int m_betha_powers_size;
- double m_accumulated_error;
-
- int dim;
- boost::shared_ptr m_logger;
-
- std::map times;
-
- double m_density_threshold, m_grow_rate;
-
- /** Constants used by lazy heap implementation */
- int s, c;
-
- GNGGraph & m_g;
- GNGDataset * g_db;
- UniformGrid, Node, int> * ug;
- GNGLazyErrorHeap errorHeap;
-
- enum GngStatus {
- GNG_PREPARING, GNG_RUNNING, GNG_PAUSED, GNG_TERMINATED
- };
-
- GngStatus m_gng_status;
- bool running;
-
- enum UtilityOptions {
- None, BasicUtility
- };
-
-
- //For each iteration
- gmum::fast_mutex m_statistics_mutex;
-
-
- gmum::recursive_mutex status_change_mutex;
- gmum::gmum_condition status_change_condition;
-
- GngStatus gng_status() {
- return m_gng_status;
- }
-
-public:
- /** Run main loop of the algorithm*/
- void runAlgorithm();
-
- /**Construct main algorithm object, that will hold mid-results
- * @param alg_memory_lock When locked algorithm is not running anything that is memory dangerous
- * @param g GNGGraph object implementing graph interface
- * @param db GNGDataset object
- * @param boundingbox_origin Starting point for reference system
- * @param boundingbox_axis Axis lengths for reference system
- * @param l Starting box size for uniform grid. Advised to be set to axis[0]/4 (TODO: move to the end of parameters list)
- * @param max_nodes Maximum number of nodes
- * @param max_age Maximum age of edge
- * @param alpha See original paper(TODO: add description)
- * @param betha See original paper (TODO: add description)
- * @param lambda Every lambda new vertex is added
- * @param eps_v See original paper(TODO: add description)
- * @param eps_n See original paper (TODO: add description)
- * @param dim Dimensionality
- */
- GNGAlgorithm(GNGGraph * g, GNGDataset * db, double * boundingbox_origin,
- double * boundingbox_axis, double l, int max_nodes = 1000,
- int max_age = 200, double alpha = 0.95, double betha = 0.9995,
- double lambda = 200, double eps_w = 0.05, double eps_n = 0.0006,
- int dim = 3, bool uniformgrid_optimization = true,
- bool lazyheap_optimization = true, unsigned int utility_option =
- GNGConfiguration::UtilityOff, double utility_k = -1,
- boost::shared_ptr logger = boost::shared_ptr());
-
- ///Retrieve closest node's gng_index to the example
- int predict(const std::vector &);
-
- /** Start algorithm loop */
- void run() {
- this->m_gng_status = GNG_RUNNING;
- this->status_change_condition.notify_all();
- }
-
- /** Pause algorithm loop */
- void pause() {
- this->m_gng_status = GNG_PAUSED;
- this->status_change_condition.notify_all();
- }
-
- /** Terminate the algorithm */
- void terminate() {
- this->m_gng_status = GNG_TERMINATED;
- this->status_change_condition.notify_all();
- }
-
- void setMaxNodes(int value) {
- m_max_nodes = value;
- }
-
- int getIteration() const{
- return m_iteration;
- }
-
- double getMeanError() {
-
- gmum::scoped_lock alg_lock(m_statistics_mutex);
- DBG(m_logger, 3, gmum::to_string(m_mean_error.size()));
- if(m_mean_error.size() == 0){
-
- return std::numeric_limits::max();
- }else{
-
- return m_mean_error[m_mean_error.size()-1];
- }
- }
-
- vector getMeanErrorStatistics() {
- gmum::scoped_lock alg_lock(m_statistics_mutex);
- if(m_mean_error.size() == 0){
- return vector(1, std::numeric_limits::max());
- }else{
- return vector(m_mean_error.begin(), m_mean_error.end());
- }
- }
-
- double calculateAccumulatedError();
-
- void testAgeCorrectness();
-
- virtual ~GNGAlgorithm() {
- delete[] m_betha_powers_to_n;
- delete[] m_betha_powers;
- }
-
-private:
-
- void resetUniformGrid(double * orig, double *axis, double l) {
- ug->purge(orig, axis, l);
- int maximum_index = m_g.get_maximum_index();
-
- REP(i, maximum_index + 1)
- {
- if (m_g.existsNode(i))
- ug->insert(m_g[i].position, m_g[i].nr);
- }
- }
-
- GNGNode ** LargestErrorNodesLazy();
-
- GNGNode ** LargestErrorNodes();
-
- GNGNode ** TwoNearestNodes(const double * position);
-
- void randomInit();
-
- void addNewNode();
-
- double adapt(const double * ex, const double * extra);
-
- void resizeUniformGrid();
-
- bool stoppingCriterion() {
- return m_g.get_number_nodes() > m_max_nodes;
- }
-
- void increaseErrorNew(GNGNode * node, double error) {
- fixErrorNew(node);
- assert(m_lambda - s <= m_betha_powers_size -1);
- node->error += m_betha_powers[m_lambda - s] * error;
- errorHeap.updateLazy(node->nr);
- }
-
- void fixErrorNew(GNGNode * node) {
-
- if (node->error_cycle == c)
- return;
-
- while(c - node->error_cycle > m_betha_powers_to_n_length - 1){
- DBG_2(m_logger, 5, "Recreating m_betha_powers_to_n");
- delete[] m_betha_powers_to_n;
- m_betha_powers_to_n_length *= 2;
- m_betha_powers_to_n = new double[m_betha_powers_to_n_length];
- REP(i, m_betha_powers_to_n_length)
- m_betha_powers_to_n[i] = std::pow(m_betha, m_lambda * (double) (i));
- }
-
- assert(c - node->error_cycle <= m_betha_powers_to_n_length -1);
-
- node->error = m_betha_powers_to_n[c - node->error_cycle] * node->error;
- node->error_cycle = c;
-
- }
-
- double getMaximumError() const {
- double max_error = 0;
- int maximum_index = m_g.get_maximum_index();
- REP(i,maximum_index+1)
- if (m_g.existsNode(i))
- max_error = std::max(max_error, m_g[i].error);
- return max_error;
- }
-
- void decreaseAllErrorsNew() {
- return;
- }
-
- void decreaseErrorNew(GNGNode * node) {
- fixErrorNew(node);
- node->error = m_alpha * node->error;
- errorHeap.updateLazy(node->nr);
- }
-
- void setErrorNew(GNGNode * node, double error) {
- node->error = error;
- node->error_cycle = c;
- errorHeap.insertLazy(node->nr);
- }
-
- void increaseError(GNGNode * node, double error) {
- node->error += error;
- }
-
- void decreaseAllErrors() {
- int maximum_index = m_g.get_maximum_index();
- REP(i,maximum_index+1)
- if (m_g.existsNode(i))
- m_g[i].error = m_betha * m_g[i].error;
- }
-
- void decreaseError(GNGNode * node) {
- node->error = m_alpha * node->error;
- }
-
- void setError(GNGNode * node, double error) {
- node->error = error;
- }
-
- // Note: this code is not optimal and is inserted only for research purposes
-
- double getUtility(int i) {
- return m_g[i].utility;
- }
-
- void setUtility(int i, double u) {
- m_g[i].utility = u;
- }
-
- void utilityCriterionCheck() {
-
- if (m_g.get_number_nodes() < 10)
- return; //just in case
-
- double max_error = this->getMaximumError();
- int maximum_index = m_g.get_maximum_index();
-
- double min_utility = 100000000;
- int min_utility_index = -1;
-
- for (int i = 0; i <= maximum_index; ++i)
- if (min_utility > getUtility(i)) {
- min_utility = getUtility(i);
- min_utility_index = i;
- }
-
- if (m_g.existsNode(min_utility_index) && max_error / getUtility(min_utility_index) > m_utility_k) {
-
- DBG(m_logger,2, "GNGAlgorithm:: removing node with utility "+gmum::to_string(getUtility(min_utility_index)) + " max error "+gmum::to_string(max_error));
-
- DBG(m_logger,2,gmum::to_string(max_error));
-
- GNGNode::EdgeIterator edg = m_g[min_utility_index].begin();
- while (edg != m_g[min_utility_index].end()) {
- int nr = (*edg)->nr;
- edg = m_g.removeUDEdge(min_utility_index, nr);
- }
-
- m_g.deleteNode(min_utility_index);
- setUtility(min_utility_index, 0);
- }
-
- }
- void decreaseAllUtility() {
- int maximum_index = m_g.get_maximum_index();
- for (int i = 0; i <= maximum_index; ++i)
- if (m_g.existsNode(i))
- setUtility(i, getUtility(i) * (m_betha));
- }
-};
-
-/**Design hack for passing distance function dist(index, position)*/
-struct GNGGraphAccessHack {
- static GNGGraph * pool;
- static double dist(int index, double *position) {
- return pool->get_euclidean_dist((*pool)[index].position, position);
- }
-};
-
-
-}
-
-#endif
diff --git a/inst/include/gng/GNGConfiguration.h b/inst/include/gng/GNGConfiguration.h
deleted file mode 100644
index 5fa28ba..0000000
--- a/inst/include/gng/GNGConfiguration.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * File: GNGConfiguration.h
- * Author: staszek
- *
- * Created on October 17, 2013, 8:11 PM
- */
-
-#ifndef GNGCONFIGURATION_H
-#define GNGCONFIGURATION_H
-
-#ifdef RCPP_INTERFACE
-#include
-using namespace Rcpp;
-#endif
-
-#include "utils/utils.h"
-#include
-
-
-
- /**
- *
- * Configuration of GNG algorithm/server
- * TODO: add detailed description for parameters
- */
- class GNGConfiguration{
- public:
- enum GraphNodeStorage{
- NoneGraphNodeStorage,
- SharedMemory,
- RAMMemory
- } graph_storage;
-
-
- enum DatasetType{
- NoneDatasetTypeinit,
- DatasetSeq,
- DatasetSampling,
- DatasetSamplingProb
- };
-
- enum ExperimentalUtility{
- UtilityOff,
- UtilityBasicOn
- };
-
-
- /**Maximum number of nodes*/
- int max_nodes;//=1000;
- /**Uniform grid optimization*/
- bool uniformgrid_optimization;//=true,lazyheap=true;
- /**Lazy heap optimization*/
- bool lazyheap_optimization;
- /**Bounding box specification*/
-
-
- /**Dimensionality of examples*/
- int dim;
-
-
- std::vector orig;
- std::vector axis;
- /**Max edge age*/
- int max_age;//=200;
- /**Alpha coefficient*/
- double alpha;//=0.95;
- /**Beta coefficient*/
- double beta;//=0.9995;
- /**Lambda coefficient*/
- double lambda;//=200;
- /**Epsilion v. How strongly move winning node*/
- double eps_w;//=0.05;
- /**Memory bound*/
- int graph_memory_bound;
- /**Epsilion n*/
- double eps_n;//=0.0006;
-
- int verbosity;
-
- /**Pseudodistance function used (might be non metric)*/
- int distance_function;
-
- /**Type of used database, unsgined int for compabititlity with Rcpp**/
- unsigned int datasetType;
-
- /**Initial reserve memory for nodes */
- int starting_nodes;
-
- ///Utility constant
- double experimental_utility_k;
-
- ///Utility option. Currently supported simples utility
- int experimental_utility_option;
-
- public:
-
-
- GNGConfiguration(){
-
- verbosity = 10;
-
- starting_nodes = 100;
-
- experimental_utility_option = (int)UtilityOff;
- experimental_utility_k = 1.5;
-
- graph_storage = RAMMemory;
-
- dim = 3;
- setBoundingBox(0, 1);
-
- datasetType = DatasetSampling;
- max_nodes=1000;
- uniformgrid_optimization=false;
- graph_memory_bound = 200000*sizeof(double);
-
- lazyheap_optimization=false;
- max_age=200;
- alpha=0.95;
- beta=0.9995;
- lambda=200;
- eps_w=0.05;
- eps_n=0.0006;
-
- distance_function = gmum::GNGGraph::Euclidean;
-
-
- }
-
-
- void deserialize(std::istream & in){
- ///Utility constant
- in >> experimental_utility_k;
-
- ///Utility option. Currently supported simples utility
- in >> experimental_utility_option;
-
- /**Maximum number of nodes*/
- in >> max_nodes;//=1000;
- /**Uniform grid optimization*/
- in >> uniformgrid_optimization;//=true,lazyheap=true;
- /**Lazy heap optimization*/
- in >> lazyheap_optimization;
- /**Bounding box specification*/
-
- /**Dimensionality of examples*/
- in >> dim;
-
- REPORT(dim);
-
- orig = vector(dim, 0);
- axis = vector(dim, 0);
-
- for(int i=0;i>axis[i]>>orig[i];
- }
- /**Max edge age*/
- in >> max_age;//=200;
- /**Alpha coefficient*/
- in >> alpha;//=0.95;
- /**Beta coefficient*/
- in >> beta;//=0.9995;
- /**Lambda coefficient*/
- in >> lambda;//=200;
- /**Epsilion v. How strongly move winning node*/
- in >> eps_w;//=0.05;
- /**Memory bound*/
- in >> graph_memory_bound;
- /**Epsilion n*/
- in >> eps_n;//=0.0006;
-
- in >> verbosity;
-
- /**Pseudodistance function used (might be non metric)*/
- in >> distance_function;
-
-
- /**Type of used database, unsgined int for compabititlity with Rcpp**/
- in >> datasetType;
-
- /**Initial reserve memory for nodes */
- in >> starting_nodes;
- }
-
- void serialize(std::ostream & out){
- ///Utility constant
- out << experimental_utility_k << endl;
-
- ///Utility option. Currently supported simples utility
- out << experimental_utility_option<< endl;
-
- /**Maximum number of nodes*/
- out << max_nodes<< endl;//=1000;
- /**Uniform grid optimization*/
- out << uniformgrid_optimization<< endl;//=true,lazyheap=true;
- /**Lazy heap optimization*/
- out << lazyheap_optimization<< endl;
- /**Bounding box specification*/
-
- /**Dimensionality of examples*/
- out << dim<< endl;
-
- REPORT(dim);
-
- for(int i=0;i();
- axis = vector();
- for(int i=0;i 3 or datasetType <= 0){
- cerr<<"ERROR: wrong database specified\n";
-
- return false;
- }
- if(! (dim < 20 || ! uniformgrid_optimization)){
-
- cerr<<"WARNING: It might be too big dimensionality for OptimizedGNG."
- "OptimizedGNG works best for smaller dimensionality dataset"
- "Consider using PCA or other dim. reduction technique"
- "\n";
-
- }
- if(! (distance_function==gmum::GNGGraph::Euclidean || ! uniformgrid_optimization)){
-
- cerr<<"ERROR: You can use only Euclidean distance function with uniformgrid optimization\n";
- return false;
- }
- if(! (!uniformgrid_optimization or (dim == axis.size() && dim == orig.size()))){
-
- cerr<<"ERROR: dimensionality doesn't agree with axis and orig"<
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include "utils/threading.h"
-#include "utils/utils.h"
-
-#include "GNGNode.h"
-#include "GNGGlobals.h"
-
-using namespace std;
-
-namespace gmum {
-/** Graph interface for GNGAlgorithm.
- *
- *
- */
-class GNGGraph {
-public:
-
- enum GNGDistanceFunction {
- Euclidean, Cosine
- };
-
- virtual ~ GNGGraph() {
- }
- /** Lock from unsafe operations
- * @note It ensures that operations won't fail (in worst case block)
- * Mostly used for blocking regrowing
- */
- virtual void lock() {
- //Do nothing by default
- }
-
- /** Unlock for unsafe operations */
- virtual void unlock() {
- //Do nothing by default
- }
-
- /** This is specific for GNG Graph - e
- * each node is assigned index. It fetches maximum node index
- */
- virtual unsigned int get_maximum_index() const = 0;
-
- /*
- * @return True if exists node in the graph
- */
- virtual bool existsNode(unsigned int) const = 0;
-
- virtual int get_dim() const = 0;
-
- virtual GNGNode & operator[](int i) = 0;
-
- virtual unsigned int get_number_nodes() const = 0;
-
- //TODO: move it to GNGNode
- virtual double get_dist(int a, int b) = 0;
-
- //TODO: move it to GNGNode
- virtual double get_euclidean_dist(const double * pos_1, const double * pos_2) const= 0;
-
- //TODO: move it to GNGNode
- virtual double get_dist(const double *pos_a, const double *pos_b) const = 0;
-
- /* Initialize node with position attribute */
- virtual int newNode(const double *position) = 0;
-
- virtual bool deleteNode(int x) = 0;
-
- virtual bool isEdge(int a, int b) const = 0;
-
- //Again:dependency on GNGNode::
- virtual typename GNGNode::EdgeIterator removeUDEdge(int a, int b) = 0;
-
- virtual void addUDEdge(int a, int b) = 0;
-
- virtual void addDEdge(int a, int b) = 0;
-
- virtual std::string reportPool() {
- return "";
- }
-
- virtual void load(std::istream & in) = 0;
- virtual void serialize(std::ostream & out) = 0;
-
-
-};
-
-/* @note: Not thread safe. To be used from one thread only!
- *
- * Can be used by external thread by has to be locked. All operations moving
- * whole memory sectory will unlock. Elegant solution: locking interface
- *
- * It allows for easy erasing of nodes.
- *
- * Node: implements GNGNode interface
- * Edge: implements GNGEdge interface
- * Mutex: implements lock and unlock interface
- *
- * TODO: change GNGEdge* to GNGEdge (problems with rev)
- * TODO: edges ~ gng_dim - maybe use this for better efficiency?
- */
-template class RAMGNGGraph: public GNGGraph {
- /** Mutex provided externally for synchronization*/
- Mutex * mutex;
-
- std::vector g;
- std::vector occupied;
-
- //TODO: change to vector
- std::vector positions; //as continuous array for speed/caching purposes, could be vector
-
- int maximum_index;
- unsigned int nodes;
-
- unsigned int gng_dim;
-
- boost::shared_ptr m_logger;
-
-public:
- /** Indicates next free vertex */
- std::vector next_free; //TODO: has to be public : /
- int first_free;
-
- GNGDistanceFunction dist_fnc;
-
- typedef typename Node::EdgeIterator EdgeIterator;
-
- RAMGNGGraph(Mutex * mutex, unsigned int dim, int initial_pool_size, GNGDistanceFunction dist_fnc = Euclidean,
- boost::shared_ptr logger = boost::shared_ptr()) :
- maximum_index(-1), mutex(mutex), gng_dim(dim), first_free(-1), nodes(0), dist_fnc(dist_fnc), m_logger(logger) {
-
- positions.resize(initial_pool_size * gng_dim);
-
- //Initialize graph data structures
- g.resize(initial_pool_size);
-
- for (int i = 0; i < initial_pool_size; ++i)
- g[i].reserve(gng_dim);
-
- occupied.resize(initial_pool_size);
-
- for (int i = 0; i < initial_pool_size; ++i)
- occupied[i] = false;
- next_free.resize(initial_pool_size);
-
- for (int i = 0; i < initial_pool_size - 1; ++i)
- next_free[i] = i + 1;
- next_free[initial_pool_size - 1] = -1;
- first_free = 0;
-
- }
-
- /** This is specific for GNG Graph - e
- * each node is assigned index. It fetches maximum node index
- */
-
- virtual unsigned int get_maximum_index() const {
- return this->maximum_index;
- }
-
- /* @note NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- * @return True if exists node in the graph
- */
- virtual bool existsNode(unsigned i) const {
- return i < nodes && occupied[i];
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- bool isEdge(int a, int b) const {
-
- FOREACH(edg, g[a])
- {
- if ((*edg)->nr == b)
- return true;
- }
- return false;
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- const double *getPosition(int nr) const {
- return g[nr].position;
- }
-
- unsigned int get_number_nodes() const {
- return this->nodes;
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- Node &
- operator[](int i) {
- return g[i];
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- double get_dist(int a, int b) {
- return get_dist(g[a].position, g[b].position);
- }
-
- double get_euclidean_dist(const double *pos_a, const double *pos_b) const {
- double distance = 0;
- for (int i = 0; i < this->gng_dim; ++i)
- distance += (pos_a[i] - pos_b[i]) * (pos_a[i] - pos_b[i]);
-
- return distance;
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- double get_dist(const double *pos_a, const double *pos_b) const {
- if (dist_fnc == Euclidean) {
- double distance = 0;
- for (int i = 0; i < this->gng_dim; ++i)
- distance += (pos_a[i] - pos_b[i]) * (pos_a[i] - pos_b[i]);
-
- return distance;
- } else if (dist_fnc == Cosine) {
- double norm_1 = 0, norm_2 = 0, distance = 0;
-
- for (int i = 0; i < this->gng_dim; ++i) {
- norm_1 += (pos_a[i]) * (pos_a[i]);
- norm_2 += (pos_b[i]) * (pos_b[i]);
- distance += pos_a[i] * pos_b[i];
- }
-
- norm_1 = sqrt(norm_1);
- norm_2 = sqrt(norm_2);
- return 1.0 - distance / (norm_1 * norm_2);
- }
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- int newNode(const double *position) {
- if (first_free == -1) {
- DBG(m_logger,10, "RAMGNGGraph::newNode() growing pool");
- this->resizeGraph();
-
- }
-
- int createdNode = first_free; //taki sam jak w g_node_pool
-
- maximum_index = createdNode > maximum_index ? createdNode : maximum_index;
-
- //Assuming it is clear here
-#ifdef GMUM_DEBUG
- assert(g[createdNode].size() == 0);
-#endif
-
- // Initialize node
- g[createdNode].position = &positions[createdNode * gng_dim];
- occupied[createdNode] = true;
- g[createdNode].nr = createdNode;
- g[createdNode].edgesCount = 0;
- g[createdNode].utility = 0.0;
- g[createdNode]._position_owner = false;
- g[createdNode].dim = gng_dim;
- g[createdNode].extra_data = 0.0;
-
- first_free = next_free[createdNode];
-
- //zwiekszam licznik wierzcholkow //na koncu zeby sie nie wywalil przypadkowo
- ++this->nodes;
- memcpy(&(g[createdNode].position[0]), position,
- sizeof(double) * (this->gng_dim)); //param
-
- //TODO: this should be tracked by GNGAlgorithm
- g[createdNode].error = 0.0;
- g[createdNode].error_cycle = 0;
-
- return createdNode;
-
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- bool deleteNode(int x) {
-
- this->lock();
- if (existsNode(x)) {
- //TODO: add automatic erasing edges
- assert(g[x].size() == 0);
-
- --nodes;
- if (maximum_index == x)
- maximum_index = maximum_index - 1;
-
- occupied[x] = false;
- next_free[x] = first_free;
- first_free = x;
- this->unlock();
- return true;
-
- }
-
- this->unlock();
- return false;
-
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- EdgeIterator removeUDEdge(int a, int b) {
-
- this->lock();
-
- FOREACH(edg, g[a])
- {
- if ((*edg)->nr == b) {
- Edge *ptr_rev = (Edge *) ((**edg).rev);
- Edge *ptr = (Edge *) (&(**edg));
-
- g[b].erase(find(g[b].begin(), g[b].end(), (*edg)->rev));
- edg = g[a].erase(edg);
-
- delete ptr;
- delete ptr_rev;
-
- g[a].edgesCount--;
- g[b].edgesCount--;
- this->unlock();
- return edg;
- }
- }
-
- this->unlock();
- DBG(m_logger,10, "ExtGraphNodeManager()::removeEdge Not found edge!");
- return g[a].end();
-
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- void addUDEdge(int a, int b) {
-
- this->lock();
-
- if (a == b)
- throw "Added loop to the graph";
-
- g[a].push_back(new Edge(b));
- g[b].push_back(new Edge(a));
-
- g[a].back()->rev = g[b].back();
- g[b].back()->rev = g[a].back();
-
- g[a].edgesCount++;
- g[b].edgesCount++;
- this->unlock();
-
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
-
- void addDEdge(int a, int b) {
- throw BasicException("Not implemented");
- }
-
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- std::string reportPool() {
- std::stringstream ss;
- for (unsigned int i = 0; i < g.size(); ++i) {
- string tmp = "";
- if (occupied[i]) {
- tmp = tmp + to_str(g[i]) + ":";
- FOREACH(it2, g[i])
- {
- tmp += to_str((*it2)->nr) + "["
- + to_str((((*it2)->rev))->nr) + "],";
- }
- tmp = tmp + "\n";
- }
- ss << tmp;
- }
- return ss.str();
- }
-
- ~RAMGNGGraph() {
- for (int i = 0; i < g.size(); ++i) {
- if (occupied[i]) {
- FOREACH(edg, g[i])
- delete *edg;
- }
- }
-
- }
-
- virtual int get_dim() const {
- return gng_dim;
- }
-
- virtual void lock() {
- mutex->lock();
- }
-
- virtual void unlock() {
- mutex->unlock();
- }
-
-
- /*
- * format is [N] [gng_dim] N* [0/1 + vertex] N*[ [l] l*[gng_idx]]
- */
- void serialize(std::ostream & output) {
- this->lock();
-
-
- vector S;
- S.reserve(10000);
-
- //Header
- S.push_back((double) (g.size()));
- S.push_back((double) (maximum_index + 1));
- S.push_back((double) gng_dim);
- S.push_back((double) first_free);
- S.push_back((double) nodes);
-
- DBG(m_logger,7, "GNGGraph::Serializing nodes");
- //Nodes
- for (int i = 0; i < g.size(); ++i) {
- if (existsNode(i)) {
- S.push_back((double) 1);
- vector serialized_node = g[i].dumpVertexData();
-
- std::copy(serialized_node.begin(), serialized_node.end(),
- std::back_inserter(S));
- } else {
- S.push_back((double) 0);
- }
- } DBG(m_logger,7, "GNGGraph::Serializing edges");
- //Edges
- for (int i = 0; i < g.size(); ++i) {
- if (existsNode(i)) {
- vector serialized_node = g[i].dumpEdges();
- std::copy(serialized_node.begin(), serialized_node.end(),
- std::back_inserter(S));
- } else {
- S.push_back((double) 0);
- }
- } DBG(m_logger,7, "GNGGraph::Serializing nextFree");
- //NextFree
- for (int i = 0; i < g.size(); ++i) {
- S.push_back((double) next_free[i]);
- } DBG(m_logger,7, "GNGGraph::Serialize;:writing out");
-
- _write_bin_vect(output, S);
-
-
- this->unlock();
- }
- void load(std::istream & input) {
- this->lock();
-
- DBG(m_logger,7, "GNGGraph:: loading ");
-
- vector S = _load_bin_vector(input);
- vector::iterator itr = S.begin();
- //Header
- unsigned int bufor_size = (int) *itr;
- maximum_index = (int) *(++itr) - 1;
- gng_dim = (int) *(++itr);
- first_free = (int) *(++itr);
- nodes = (int) *(++itr);
-
- DBG(m_logger,5, "Read in "+to_str(bufor_size) +" sized graph with "+
- " max_index="+to_str(maximum_index)+" gng_dim="+to_str(gng_dim)+" "+
- "first_free="+to_str(first_free)+" nodes="+to_str(nodes)
- );
-
- positions.clear();
- g.clear();
- next_free.clear();
- occupied.clear();
-
- occupied.resize(bufor_size);
- g.resize(bufor_size);
- next_free.resize(bufor_size);
- positions.resize((bufor_size + 1) * gng_dim);
-
- for (int i = 0; i < bufor_size; ++i) {
- occupied[i] = false;
- g[i].reserve(gng_dim + 2);
- }
-
- //Deserialize nodes
- for (int i = 0; i < g.size(); ++i) {
- int tmp = (int) *(++itr);
- occupied[i] = (bool) tmp;
- if (occupied[i])
- g[i].loadVertexData(itr, gng_dim, &positions[i * gng_dim]);
-
- }
-
- //Deserialize edges
- for (int i = 0; i < g.size(); ++i) {
- int edges_length = (int) *(++itr);
-
- for (int j = 0; j < edges_length; ++j) {
- int gng_endpoint_index = (int) *(++itr);
- if (gng_endpoint_index > i)
- this->addUDEdge(i, gng_endpoint_index);
- }
- }
-
- //Deserialize nextFree
- for (int i = 0; i < g.size(); ++i) {
- next_free[i] = (int) *(++itr);
- }
-
-
- this->unlock();
- }
-
-private:
- ///NOT THREAD SAFE - USE ONLY FROM ALGORITHM THREAD OR LOCK
- void resizeGraph() {
- //DBG(m_logger,5, "GNGGraph::resizing graph from "+to_string(g.size()));
- DBG_2(m_logger,5, "GNGGraph::resizing");
- unsigned int previous_size = g.size();
- //Grow positions pool
-
- positions.resize(2 * previous_size * gng_dim);
-
- //Reassign memory pointers
- for (int i = 0; i < previous_size; ++i) {
- g[i].position = &positions[i * gng_dim];
-
- }
-
- g.resize(2 * previous_size);
-
- for (int i = 0; i < previous_size; ++i) {
- g[i].position = &positions[i * gng_dim];
- }
-
- occupied.resize(2 * previous_size);
- for (int i = previous_size; i < 2 * previous_size; ++i) {
- // g[i].reset();
- // g[i].reserve(gng_dim); //for speed purposes
- occupied[i] = false;
- }
-
- next_free.resize(2 * previous_size);
- for (int i = previous_size - 1; i < 2 * previous_size - 1; ++i) {
- next_free[i] = i + 1;
- }
- next_free[g.size() - 1] = -1;
- first_free = previous_size;
-
- DBG_2(m_logger,5, "GNGGraph::resizing done"); DBG(m_logger,5, to_str(first_free)); DBG(m_logger,5, to_str(next_free[previous_size]));
- //DBG(m_logger,5, "GNGGraph::resizing graph from "+to_string(g.size())+" done");
- }
-};
-
-
-std::string writeToGraphML(GNGGraph &g, string filename = "");
-
-}
-#endif
diff --git a/inst/include/gng/GNGServer.h b/inst/include/gng/GNGServer.h
deleted file mode 100644
index f38c7d9..0000000
--- a/inst/include/gng/GNGServer.h
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * File: GNGServer.h
- * Author: staszek
- *
- * Created on October 17, 2013, 8:12 PM
- */
-#ifndef GNGSERVER_H
-#define GNGSERVER_H
-
-
-#include
-#include
-#include